From 23c832b10ca9ab2685d7d3e0990800ffc846fc92 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 12 Oct 2016 19:23:50 -0400 Subject: remove spd_release_page() no users left Signed-off-by: Al Viro --- include/linux/splice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/splice.h b/include/linux/splice.h index 00a21166e268..647243bdd9d7 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -82,7 +82,6 @@ extern ssize_t splice_direct_to_actor(struct file *, struct splice_desc *, */ extern int splice_grow_spd(const struct pipe_inode_info *, struct splice_pipe_desc *); extern void splice_shrink_spd(struct splice_pipe_desc *); -extern void spd_release_page(struct splice_pipe_desc *, unsigned int); extern const struct pipe_buf_operations page_cache_pipe_buf_ops; extern const struct pipe_buf_operations default_pipe_buf_ops; -- cgit v1.2.3 From f81dc7d7d5a2528f98f26a0b9406e822d0b35011 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 31 Oct 2016 16:47:15 -0400 Subject: splice_pipe_desc: kill ->flags no users left Signed-off-by: Al Viro --- include/linux/splice.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/splice.h b/include/linux/splice.h index 647243bdd9d7..3c98dad93bf3 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -55,7 +55,6 @@ struct splice_pipe_desc { struct partial_page *partial; /* pages[] may not be contig */ int nr_pages; /* number of populated pages in map */ unsigned int nr_pages_max; /* pages[] & partial[] arrays size */ - unsigned int flags; /* splice flags */ const struct pipe_buf_operations *ops;/* ops associated with output pipe */ void (*spd_release)(struct splice_pipe_desc *, unsigned int); }; -- cgit v1.2.3 From 3d6ea290f337cc64cf44290482e36306fc8aaa31 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 10 Dec 2016 13:17:32 -0500 Subject: splice/tee/vmsplice: validate flags Long overdue... Signed-off-by: Al Viro --- include/linux/splice.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/splice.h b/include/linux/splice.h index 3c98dad93bf3..db42746bdfea 100644 --- a/include/linux/splice.h +++ b/include/linux/splice.h @@ -20,6 +20,8 @@ #define SPLICE_F_MORE (0x04) /* expect more data */ #define SPLICE_F_GIFT (0x08) /* pages passed in are a gift */ +#define SPLICE_F_ALL (SPLICE_F_MOVE|SPLICE_F_NONBLOCK|SPLICE_F_MORE|SPLICE_F_GIFT) + /* * Passed to the actors */ -- cgit v1.2.3 From 2955b73def6712b693fc7ad82b34b3831faaa146 Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Tue, 21 Feb 2017 09:30:00 +0000 Subject: dma-buf/reservation: Wrap ww_mutex_trylock In a similar fashion to reservation_object_lock() and reservation_object_unlock(), ww_mutex_trylock is also useful and so is worth wrapping for consistency. Signed-off-by: Chris Wilson Cc: Sumit Semwal Cc: Joonas Lahtinen Cc: Daniel Vetter [danvet: Add __must_check Joonas wants.] Reviewed-by: Joonas Lahtinen Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/20170221093000.22802-1-chris@chris-wilson.co.uk --- include/linux/reservation.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reservation.h b/include/linux/reservation.h index 2b5a4679daea..156cfd330b66 100644 --- a/include/linux/reservation.h +++ b/include/linux/reservation.h @@ -166,6 +166,26 @@ reservation_object_lock(struct reservation_object *obj, return ww_mutex_lock(&obj->lock, ctx); } +/** + * reservation_object_trylock - trylock the reservation object + * @obj: the reservation object + * + * Tries to lock the reservation object for exclusive access and modification. + * Note, that the lock is only against other writers, readers will run + * concurrently with a writer under RCU. The seqlock is used to notify readers + * if they overlap with a writer. + * + * Also note that since no context is provided, no deadlock protection is + * possible. + * + * Returns true if the lock was acquired, false otherwise. + */ +static inline bool __must_check +reservation_object_trylock(struct reservation_object *obj) +{ + return ww_mutex_trylock(&obj->lock); +} + /** * reservation_object_unlock - unlock the reservation object * @obj: the reservation object -- cgit v1.2.3 From 38b0b219fbe89d824213beabf03bfb00b5d2c8fa Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Fri, 24 Feb 2017 17:14:33 +0100 Subject: of: add devm_ functions for populate and depopulate Lots of calls to of_platform_populate() are not unbalanced by a call to of_platform_depopulate(). This create issues while drivers are bind/unbind. In way to solve those issues is to add devm_of_platform_populate() which will call of_platform_depopulate() when the device is unbound from the bus. Signed-off-by: Benjamin Gaignard Acked-by: Rob Herring Signed-off-by: Daniel Vetter Link: http://patchwork.freedesktop.org/patch/msgid/1487952874-23635-2-git-send-email-benjamin.gaignard@linaro.org --- include/linux/of_platform.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index 956a1006aefc..dc8224ae28d5 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -76,6 +76,10 @@ extern int of_platform_default_populate(struct device_node *root, const struct of_dev_auxdata *lookup, struct device *parent); extern void of_platform_depopulate(struct device *parent); + +extern int devm_of_platform_populate(struct device *dev); + +extern void devm_of_platform_depopulate(struct device *dev); #else static inline int of_platform_populate(struct device_node *root, const struct of_device_id *matches, @@ -91,6 +95,13 @@ static inline int of_platform_default_populate(struct device_node *root, return -ENODEV; } static inline void of_platform_depopulate(struct device *parent) { } + +static inline int devm_of_platform_populate(struct device *dev) +{ + return -ENODEV; +} + +static inline void devm_of_platform_depopulate(struct device *dev) { } #endif #if defined(CONFIG_OF_DYNAMIC) && defined(CONFIG_OF_ADDRESS) -- cgit v1.2.3 From 90ec5e89e393c76e19afc845d8f88a5dc8315919 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 22 Feb 2017 19:23:37 +0530 Subject: kretprobes: Ensure probe location is at function entry kretprobes can be registered by specifying an absolute address or by specifying offset to a symbol. However, we need to ensure this falls at function entry so as to be able to determine the return address. Validate the same during kretprobe registration. By default, there should not be any offset from a function entry, as determined through a kallsyms_lookup(). Introduce arch_function_offset_within_entry() as a way for architectures to override this. Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/f1583bc4839a3862cfc2acefcc56f9c8837fa2ba.1487770934.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/kprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index c328e4f7dcad..177bdf6c6aeb 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -267,6 +267,7 @@ extern int arch_init_kprobes(void); extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); +extern bool arch_function_offset_within_entry(unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); -- cgit v1.2.3 From 59d0f2da35693bfbcf6ffb014213cb8e225c8928 Mon Sep 17 00:00:00 2001 From: Song Hongyan Date: Fri, 3 Mar 2017 21:44:32 +0800 Subject: iio: hid: Add temperature sensor support Environmental temperature sensor is a hid defined sensor, it measures temperature. More information can be found in: http://www.usb.org/developers/hidpage/HUTRR39b.pdf According to IIO ABI definition, IIO_TEMP data output unit is milli degrees Celsius. Add the unit convert from degree to milli degree. Signed-off-by: Song Hongyan Acked-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-ids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 30c7dc45e45f..46dd1f27d2f2 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -45,6 +45,10 @@ #define HID_USAGE_SENSOR_DATA_ATMOSPHERIC_PRESSURE 0x200430 #define HID_USAGE_SENSOR_ATMOSPHERIC_PRESSURE 0x200431 +/* Tempreture (200033) */ +#define HID_USAGE_SENSOR_TEMPERATURE 0x200033 +#define HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE 0x200434 + /* Gyro 3D: (200076) */ #define HID_USAGE_SENSOR_GYRO_3D 0x200076 #define HID_USAGE_SENSOR_DATA_ANGL_VELOCITY 0x200456 -- cgit v1.2.3 From 791ec491c372f49cea3ea7a7143454a9023ac9d4 Mon Sep 17 00:00:00 2001 From: Stephen Smalley Date: Fri, 17 Feb 2017 07:57:00 -0500 Subject: prlimit,security,selinux: add a security hook for prlimit When SELinux was first added to the kernel, a process could only get and set its own resource limits via getrlimit(2) and setrlimit(2), so no MAC checks were required for those operations, and thus no security hooks were defined for them. Later, SELinux introduced a hook for setlimit(2) with a check if the hard limit was being changed in order to be able to rely on the hard limit value as a safe reset point upon context transitions. Later on, when prlimit(2) was added to the kernel with the ability to get or set resource limits (hard or soft) of another process, LSM/SELinux was not updated other than to pass the target process to the setrlimit hook. This resulted in incomplete control over both getting and setting the resource limits of another process. Add a new security_task_prlimit() hook to the check_prlimit_permission() function to provide complete mediation. The hook is only called when acting on another task, and only if the existing DAC/capability checks would allow access. Pass flags down to the hook to indicate whether the prlimit(2) call will read, write, or both read and write the resource limits of the target process. The existing security_task_setrlimit() hook is left alone; it continues to serve a purpose in supporting the ability to make decisions based on the old and/or new resource limit values when setting limits. This is consistent with the DAC/capability logic, where check_prlimit_permission() performs generic DAC/capability checks for acting on another task, while do_prlimit() performs a capability check based on a comparison of the old and new resource limits. Fix the inline documentation for the hook to match the code. Implement the new hook for SELinux. For setting resource limits, we reuse the existing setrlimit permission. Note that this does overload the setrlimit permission to mean the ability to set the resource limit (soft or hard) of another process or the ability to change one's own hard limit. For getting resource limits, a new getrlimit permission is defined. This was not originally defined since getrlimit(2) could only be used to obtain a process' own limits. Signed-off-by: Stephen Smalley Signed-off-by: James Morris --- include/linux/lsm_hooks.h | 18 +++++++++++++++--- include/linux/security.h | 13 +++++++++++++ 2 files changed, 28 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index e29d4c62a3c8..ba3049f05aea 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -630,10 +630,19 @@ * Check permission before getting the ioprio value of @p. * @p contains the task_struct of process. * Return 0 if permission is granted. + * @task_prlimit: + * Check permission before getting and/or setting the resource limits of + * another task. + * @cred points to the cred structure for the current task. + * @tcred points to the cred structure for the target task. + * @flags contains the LSM_PRLIMIT_* flag bits indicating whether the + * resource limits are being read, modified, or both. + * Return 0 if permission is granted. * @task_setrlimit: - * Check permission before setting the resource limits of the current - * process for @resource to @new_rlim. The old resource limit values can - * be examined by dereferencing (current->signal->rlim + resource). + * Check permission before setting the resource limits of process @p + * for @resource to @new_rlim. The old resource limit values can + * be examined by dereferencing (p->signal->rlim + resource). + * @p points to the task_struct for the target task's group leader. * @resource contains the resource whose limit is being set. * @new_rlim contains the new limits for @resource. * Return 0 if permission is granted. @@ -1494,6 +1503,8 @@ union security_list_options { int (*task_setnice)(struct task_struct *p, int nice); int (*task_setioprio)(struct task_struct *p, int ioprio); int (*task_getioprio)(struct task_struct *p); + int (*task_prlimit)(const struct cred *cred, const struct cred *tcred, + unsigned int flags); int (*task_setrlimit)(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim); int (*task_setscheduler)(struct task_struct *p); @@ -1755,6 +1766,7 @@ struct security_hook_heads { struct list_head task_setnice; struct list_head task_setioprio; struct list_head task_getioprio; + struct list_head task_prlimit; struct list_head task_setrlimit; struct list_head task_setscheduler; struct list_head task_getscheduler; diff --git a/include/linux/security.h b/include/linux/security.h index 96899fad7016..97df7bac5b48 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -133,6 +133,10 @@ extern unsigned long dac_mmap_min_addr; /* setfsuid or setfsgid, id0 == fsuid or fsgid */ #define LSM_SETID_FS 8 +/* Flags for security_task_prlimit(). */ +#define LSM_PRLIMIT_READ 1 +#define LSM_PRLIMIT_WRITE 2 + /* forward declares to avoid warnings */ struct sched_param; struct request_sock; @@ -324,6 +328,8 @@ void security_task_getsecid(struct task_struct *p, u32 *secid); int security_task_setnice(struct task_struct *p, int nice); int security_task_setioprio(struct task_struct *p, int ioprio); int security_task_getioprio(struct task_struct *p); +int security_task_prlimit(const struct cred *cred, const struct cred *tcred, + unsigned int flags); int security_task_setrlimit(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim); int security_task_setscheduler(struct task_struct *p); @@ -949,6 +955,13 @@ static inline int security_task_getioprio(struct task_struct *p) return 0; } +static inline int security_task_prlimit(const struct cred *cred, + const struct cred *tcred, + unsigned int flags) +{ + return 0; +} + static inline int security_task_setrlimit(struct task_struct *p, unsigned int resource, struct rlimit *new_rlim) -- cgit v1.2.3 From dd0859dccbe291cf8179a96390f5c0e45cb9af1d Mon Sep 17 00:00:00 2001 From: James Morris Date: Wed, 15 Feb 2017 00:17:24 +1100 Subject: security: introduce CONFIG_SECURITY_WRITABLE_HOOKS Subsequent patches will add RO hardening to LSM hooks, however, SELinux still needs to be able to perform runtime disablement after init to handle architectures where init-time disablement via boot parameters is not feasible. Introduce a new kernel configuration parameter CONFIG_SECURITY_WRITABLE_HOOKS, and a helper macro __lsm_ro_after_init, to handle this case. Signed-off-by: James Morris Acked-by: Stephen Smalley Acked-by: Casey Schaufler Acked-by: Kees Cook --- include/linux/lsm_hooks.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index ba3049f05aea..1aa63335de9e 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1920,6 +1920,13 @@ static inline void security_delete_hooks(struct security_hook_list *hooks, } #endif /* CONFIG_SECURITY_SELINUX_DISABLE */ +/* Currently required to handle SELinux runtime hook disable. */ +#ifdef CONFIG_SECURITY_WRITABLE_HOOKS +#define __lsm_ro_after_init +#else +#define __lsm_ro_after_init __ro_after_init +#endif /* CONFIG_SECURITY_WRITABLE_HOOKS */ + extern int __init security_module_enable(const char *module); extern void __init capability_add_hooks(void); #ifdef CONFIG_SECURITY_YAMA -- cgit v1.2.3 From 5e6039d8a307d8411422c154f3d446b44fa32b6d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 27 Dec 2016 18:00:15 -0500 Subject: uaccess: move VERIFY_{READ,WRITE} definitions to linux/uaccess.h Signed-off-by: Al Viro --- include/linux/uaccess.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index f30c187ed785..b660f37beaf5 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -2,6 +2,10 @@ #define __LINUX_UACCESS_H__ #include + +#define VERIFY_READ 0 +#define VERIFY_WRITE 1 + #include static __always_inline void pagefault_disabled_inc(void) -- cgit v1.2.3 From af1d5b37d6211c814fac0d5d0b71ec695618054a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 27 Dec 2016 18:14:09 -0500 Subject: uaccess: drop duplicate includes from asm/uaccess.h Signed-off-by: Al Viro --- include/linux/uaccess.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index b660f37beaf5..b786ca2419b4 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -2,6 +2,7 @@ #define __LINUX_UACCESS_H__ #include +#include #define VERIFY_READ 0 #define VERIFY_WRITE 1 -- cgit v1.2.3 From f22775ede2eb58ed84b55e30768d041f607a2199 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Mon, 13 Feb 2017 14:37:41 +0200 Subject: ieee80211: add FT-PSK AKM suite selector Signed-off-by: Avraham Stern Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 0dd9498c694f..6ea381c98aae 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2347,6 +2347,7 @@ enum ieee80211_sa_query_action { /* AKM suite selectors */ #define WLAN_AKM_SUITE_8021X SUITE(0x000FAC, 1) #define WLAN_AKM_SUITE_PSK SUITE(0x000FAC, 2) +#define WLAN_AKM_SUITE_FT_PSK SUITE(0x000FAC, 4) #define WLAN_AKM_SUITE_8021X_SHA256 SUITE(0x000FAC, 5) #define WLAN_AKM_SUITE_PSK_SHA256 SUITE(0x000FAC, 6) #define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) -- cgit v1.2.3 From 2fb51c35815dc08638a7d9b1a497a9d7cb4109b8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Feb 2017 15:02:06 +0100 Subject: ieee80211: rename CCFS1/CCFS2 to CCFS0/CCFS1 This matches the spec, and otherwise things are really confusing with the next patch adding CCFS2. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 6ea381c98aae..e167a262d3b0 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1525,14 +1525,14 @@ enum ieee80211_vht_chanwidth { * This structure is the "VHT operation element" as * described in 802.11ac D3.0 8.4.2.161 * @chan_width: Operating channel width + * @center_freq_seg0_idx: center freq segment 0 index * @center_freq_seg1_idx: center freq segment 1 index - * @center_freq_seg2_idx: center freq segment 2 index * @basic_mcs_set: VHT Basic MCS rate set */ struct ieee80211_vht_operation { u8 chan_width; + u8 center_freq_seg0_idx; u8 center_freq_seg1_idx; - u8 center_freq_seg2_idx; __le16 basic_mcs_set; } __packed; -- cgit v1.2.3 From 75b99bc300463e65f87c90425704c2688489f963 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 15 Feb 2017 15:02:10 +0100 Subject: ieee80211: define HT operation CCFS2 field The Channel Center Frequency Segment 2 field is used in 802.11-2016 for encoding the actual channel position of the 80+80/160 MHz channel, if the max NSS is restricted. This is used for backwards compatibility. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index e167a262d3b0..22bf0676d928 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1411,6 +1411,8 @@ struct ieee80211_ht_operation { #define IEEE80211_HT_OP_MODE_PROTECTION_NONHT_MIXED 3 #define IEEE80211_HT_OP_MODE_NON_GF_STA_PRSNT 0x0004 #define IEEE80211_HT_OP_MODE_NON_HT_STA_PRSNT 0x0010 +#define IEEE80211_HT_OP_MODE_CCFS2_SHIFT 5 +#define IEEE80211_HT_OP_MODE_CCFS2_MASK 0x1fe0 /* for stbc_param */ #define IEEE80211_HT_STBC_PARAM_DUAL_BEACON 0x0040 -- cgit v1.2.3 From 826cf175ed705f70a49d04aca832c1cc9ff048d8 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 28 Feb 2017 14:25:18 -0800 Subject: spi: allow attaching device properties to SPI board info Generic device properties support statically defined property sets. For them to be usable, we need to attach these property sets before devices are registered and probed. Allowing to attach property list to spi_board_info structure will allow non-ACPI non-DT boards switch to using generic properties and get rid of custom platform data. Signed-off-by: Dmitry Torokhov Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 75c6bd0ac605..5a8c4b24f2dc 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -23,6 +23,7 @@ #include struct dma_chan; +struct property_entry; struct spi_master; struct spi_transfer; struct spi_flash_read_message; @@ -1209,6 +1210,7 @@ int spi_flash_read(struct spi_device *spi, * @modalias: Initializes spi_device.modalias; identifies the driver. * @platform_data: Initializes spi_device.platform_data; the particular * data stored there is driver-specific. + * @properties: Additional device properties for the device. * @controller_data: Initializes spi_device.controller_data; some * controllers need hints about hardware setup, e.g. for DMA. * @irq: Initializes spi_device.irq; depends on how the board is wired. @@ -1241,10 +1243,12 @@ struct spi_board_info { * * platform_data goes to spi_device.dev.platform_data, * controller_data goes to spi_device.controller_data, + * device properties are copied and attached to spi_device, * irq is copied too */ char modalias[SPI_NAME_SIZE]; const void *platform_data; + const struct property_entry *properties; void *controller_data; int irq; -- cgit v1.2.3 From 572d3c6444979a6a49c6b464110563f578e8dece Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 9 Feb 2017 18:03:57 -0800 Subject: HID: i2c-hid: support regulator power on/off On some boards, we need to enable a regulator before using the HID, and it's also nice to save power in suspend by disabling it. Support an optional "vdd-supply" and a companion initialization delay. Signed-off-by: Brian Norris Signed-off-by: Caesar Wang Acked-by: Benjamin Tissoires Reviewed-by: Dmitry Torokhov Cc: Jiri Kosina Cc: linux-input@vger.kernel.org Signed-off-by: Jiri Kosina --- include/linux/i2c/i2c-hid.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c/i2c-hid.h b/include/linux/i2c/i2c-hid.h index 7aa901d92058..1fb088239d12 100644 --- a/include/linux/i2c/i2c-hid.h +++ b/include/linux/i2c/i2c-hid.h @@ -14,9 +14,13 @@ #include +struct regulator; + /** * struct i2chid_platform_data - used by hid over i2c implementation. * @hid_descriptor_address: i2c register where the HID descriptor is stored. + * @supply: regulator for powering on the device. + * @post_power_delay_ms: delay after powering on before device is usable. * * Note that it is the responsibility of the platform driver (or the acpi 5.0 * driver, or the flattened device tree) to setup the irq related to the gpio in @@ -31,6 +35,8 @@ */ struct i2c_hid_platform_data { u16 hid_descriptor_address; + struct regulator *supply; + int post_power_delay_ms; }; #endif /* __LINUX_I2C_HID_H */ -- cgit v1.2.3 From 387ad9674b0013c8756ad20d854ff005b0c313ad Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Mon, 20 Feb 2017 12:19:00 +0200 Subject: kernel: convert cgroup_namespace.count from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f6b43fbb141c..44129793c7b8 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -640,7 +641,7 @@ static inline void cgroup_sk_free(struct sock_cgroup_data *skcd) {} #endif /* CONFIG_CGROUP_DATA */ struct cgroup_namespace { - atomic_t count; + refcount_t count; struct ns_common ns; struct user_namespace *user_ns; struct ucounts *ucounts; @@ -675,12 +676,12 @@ copy_cgroup_ns(unsigned long flags, struct user_namespace *user_ns, static inline void get_cgroup_ns(struct cgroup_namespace *ns) { if (ns) - atomic_inc(&ns->count); + refcount_inc(&ns->count); } static inline void put_cgroup_ns(struct cgroup_namespace *ns) { - if (ns && atomic_dec_and_test(&ns->count)) + if (ns && refcount_dec_and_test(&ns->count)) free_cgroup_ns(ns); } -- cgit v1.2.3 From fa5923cea8da3b5d4eb943651922b327b1df673c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20K=C4=99pie=C5=84?= Date: Fri, 17 Feb 2017 08:57:48 +0100 Subject: ALSA: hda - use dell_micmute_led_set() instead of dell_app_wmi_led_set() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The dell_app_wmi_led_set() method introduced in commit db6d8cc00773 ("dell-led: add mic mute led interface") was implemented as an easily extensible entry point for other modules to set the state of various LEDs. However, almost three years later it is still only used to control the mic mute LED, so it will be replaced with direct calls to dell_micmute_led_set(). Signed-off-by: Michał Kępień Tested-by: Alex Hung Reviewed-by: Pali Rohár Acked-by: Takashi Iwai Signed-off-by: Jacek Anaszewski --- include/linux/dell-led.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dell-led.h b/include/linux/dell-led.h index 7009b8bec77b..3f033c48071e 100644 --- a/include/linux/dell-led.h +++ b/include/linux/dell-led.h @@ -1,10 +1,6 @@ #ifndef __DELL_LED_H__ #define __DELL_LED_H__ -enum { - DELL_LED_MICMUTE, -}; - -int dell_app_wmi_led_set(int whichled, int on); +int dell_micmute_led_set(int on); #endif -- cgit v1.2.3 From df789fe752065f2ce761ba434125e335b514899f Mon Sep 17 00:00:00 2001 From: David Forster Date: Thu, 23 Feb 2017 16:27:18 +0000 Subject: ipv6: Provide ipv6 version of "disable_policy" sysctl This provides equivalent functionality to the existing ipv4 "disable_policy" systcl. ie. Allows IPsec processing to be skipped on terminating packets on a per-interface basis. Signed-off-by: David Forster Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 71be5b330d21..f0d79bd054ca 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -70,6 +70,7 @@ struct ipv6_devconf { #endif __u32 enhanced_dad; __u32 addr_gen_mode; + __s32 disable_policy; struct ctl_table_header *sysctl_header; }; -- cgit v1.2.3 From f3dd3f4797652c311df9c074436d420f1ad3566e Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 27 Feb 2017 10:26:48 -0800 Subject: vmbus: introduce in-place packet iterator This is mostly just a refactoring of previous functions (get_pkt_next_raw, put_pkt_raw and commit_rd_index) to make it easier to use for other drivers and NAPI. Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/linux/hyperv.h | 96 ++++++++++++++++---------------------------------- 1 file changed, 30 insertions(+), 66 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 62bbf3c1aa4a..36162485d663 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1504,14 +1504,6 @@ static inline void hv_signal_on_read(struct vmbus_channel *channel) return; } -static inline void -init_cached_read_index(struct vmbus_channel *channel) -{ - struct hv_ring_buffer_info *rbi = &channel->inbound; - - rbi->cached_read_index = rbi->ring_buffer->read_index; -} - /* * Mask off host interrupt callback notifications */ @@ -1545,76 +1537,48 @@ static inline u32 hv_end_read(struct hv_ring_buffer_info *rbi) /* * An API to support in-place processing of incoming VMBUS packets. */ -#define VMBUS_PKT_TRAILER 8 -static inline struct vmpacket_descriptor * -get_next_pkt_raw(struct vmbus_channel *channel) +/* Get data payload associated with descriptor */ +static inline void *hv_pkt_data(const struct vmpacket_descriptor *desc) { - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 priv_read_loc = ring_info->priv_read_index; - void *ring_buffer = hv_get_ring_buffer(ring_info); - u32 dsize = ring_info->ring_datasize; - /* - * delta is the difference between what is available to read and - * what was already consumed in place. We commit read index after - * the whole batch is processed. - */ - u32 delta = priv_read_loc >= ring_info->ring_buffer->read_index ? - priv_read_loc - ring_info->ring_buffer->read_index : - (dsize - ring_info->ring_buffer->read_index) + priv_read_loc; - u32 bytes_avail_toread = (hv_get_bytes_to_read(ring_info) - delta); - - if (bytes_avail_toread < sizeof(struct vmpacket_descriptor)) - return NULL; - - return ring_buffer + priv_read_loc; + return (void *)((unsigned long)desc + (desc->offset8 << 3)); } -/* - * A helper function to step through packets "in-place" - * This API is to be called after each successful call - * get_next_pkt_raw(). - */ -static inline void put_pkt_raw(struct vmbus_channel *channel, - struct vmpacket_descriptor *desc) +/* Get data size associated with descriptor */ +static inline u32 hv_pkt_datalen(const struct vmpacket_descriptor *desc) { - struct hv_ring_buffer_info *ring_info = &channel->inbound; - u32 packetlen = desc->len8 << 3; - u32 dsize = ring_info->ring_datasize; - - /* - * Include the packet trailer. - */ - ring_info->priv_read_index += packetlen + VMBUS_PKT_TRAILER; - ring_info->priv_read_index %= dsize; + return (desc->len8 << 3) - (desc->offset8 << 3); } + +struct vmpacket_descriptor * +hv_pkt_iter_first(struct vmbus_channel *channel); + +struct vmpacket_descriptor * +__hv_pkt_iter_next(struct vmbus_channel *channel, + const struct vmpacket_descriptor *pkt); + +void hv_pkt_iter_close(struct vmbus_channel *channel); + /* - * This call commits the read index and potentially signals the host. - * Here is the pattern for using the "in-place" consumption APIs: - * - * init_cached_read_index(); - * - * while (get_next_pkt_raw() { - * process the packet "in-place"; - * put_pkt_raw(); - * } - * if (packets processed in place) - * commit_rd_index(); + * Get next packet descriptor from iterator + * If at end of list, return NULL and update host. */ -static inline void commit_rd_index(struct vmbus_channel *channel) +static inline struct vmpacket_descriptor * +hv_pkt_iter_next(struct vmbus_channel *channel, + const struct vmpacket_descriptor *pkt) { - struct hv_ring_buffer_info *ring_info = &channel->inbound; - /* - * Make sure all reads are done before we update the read index since - * the writer may start writing to the read area once the read index - * is updated. - */ - virt_rmb(); - ring_info->ring_buffer->read_index = ring_info->priv_read_index; + struct vmpacket_descriptor *nxt; + + nxt = __hv_pkt_iter_next(channel, pkt); + if (!nxt) + hv_pkt_iter_close(channel); - hv_signal_on_read(channel); + return nxt; } +#define foreach_vmbus_pkt(pkt, channel) \ + for (pkt = hv_pkt_iter_first(channel); pkt; \ + pkt = hv_pkt_iter_next(channel, pkt)) #endif /* _HYPERV_H */ -- cgit v1.2.3 From 9b4b5a797cf8a8d904df979891a8de53f2cb9694 Mon Sep 17 00:00:00 2001 From: Milan Broz Date: Wed, 4 Jan 2017 20:23:51 +0100 Subject: dm table: add flag to allow target to handle its own integrity metadata Add DM_TARGET_INTEGRITY flag that specifies bio integrity metadata is not inherited but implemented in the target itself. Signed-off-by: Milan Broz Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a7e6903866fd..874462153f14 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -221,6 +221,12 @@ struct target_type { */ typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio); +/* + * A target implements own bio data integrity. + */ +#define DM_TARGET_INTEGRITY 0x00000010 +#define dm_target_has_integrity(type) ((type)->features & DM_TARGET_INTEGRITY) + struct dm_target { struct dm_table *table; struct target_type *type; -- cgit v1.2.3 From 0edae0b3ffa6fc968d63932347a4d74b0ad0340b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 3 Mar 2017 12:16:16 -0800 Subject: pstore: Add kernel-doc for struct pstore_info This adds documentation for struct pstore_info, which also includes the basic API the backends need to implement. Signed-off-by: Kees Cook --- include/linux/pstore.h | 133 +++++++++++++++++++++++++++++++++++++++++++++++-- 1 file changed, 128 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 0da29cae009b..56477ce6806a 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -30,7 +30,7 @@ #include #include -/* types */ +/* pstore record types (see fs/pstore/inode.c for filename templates) */ enum pstore_type_id { PSTORE_TYPE_DMESG = 0, PSTORE_TYPE_MCE = 1, @@ -47,14 +47,138 @@ enum pstore_type_id { struct module; +/** + * struct pstore_info - backend pstore driver structure + * + * @owner: module which is repsonsible for this backend driver + * @name: name of the backend driver + * + * @buf_lock: spinlock to serialize access to @buf + * @buf: preallocated crash dump buffer + * @bufsize: size of @buf available for crash dump writes + * + * @read_mutex: serializes @open, @read, @close, and @erase callbacks + * @flags: bitfield of frontends the backend can accept writes for + * @data: backend-private pointer passed back during callbacks + * + * Callbacks: + * + * @open: + * Notify backend that pstore is starting a full read of backend + * records. Followed by one or more @read calls, and a final @close. + * + * @psi: in: pointer to the struct pstore_info for the backend + * + * Returns 0 on success, and non-zero on error. + * + * @close: + * Notify backend that pstore has finished a full read of backend + * records. Always preceded by an @open call and one or more @read + * calls. + * + * @psi: in: pointer to the struct pstore_info for the backend + * + * Returns 0 on success, and non-zero on error. (Though pstore will + * ignore the error.) + * + * @read: + * Read next available backend record. Called after a successful + * @open. + * + * @id: out: unique identifier for the record + * @type: out: pstore record type + * @count: out: for PSTORE_TYPE_DMESG, the Oops count. + * @time: out: timestamp for the record + * @buf: out: kmalloc copy of record contents, to be freed by pstore + * @compressed: + * out: if the record contents are compressed + * @ecc_notice_size: + * out: ECC information + * @psi: in: pointer to the struct pstore_info for the backend + * + * Returns record size on success, zero when no more records are + * available, or negative on error. + * + * @write: + * Perform a frontend notification of a write to a backend record. The + * data to be stored has already been written to the registered @buf + * of the @psi structure. + * + * @type: in: pstore record type to write + * @reason: + * in: pstore write reason + * @id: out: unique identifier for the record + * @part: in: position in a multipart write + * @count: in: increasing from 0 since boot, the number of this Oops + * @compressed: + * in: if the record is compressed + * @size: in: size of the write + * @psi: in: pointer to the struct pstore_info for the backend + * + * Returns 0 on success, and non-zero on error. + * + * @write_buf: + * Perform a frontend write to a backend record, using a specified + * buffer. Unlike @write, this does not use the @psi @buf. + * + * @type: in: pstore record type to write + * @reason: + * in: pstore write reason + * @id: out: unique identifier for the record + * @part: in: position in a multipart write + * @buf: in: pointer to contents to write to backend record + * @compressed: + * in: if the record is compressed + * @size: in: size of the write + * @psi: in: pointer to the struct pstore_info for the backend + * + * Returns 0 on success, and non-zero on error. + * + * @write_buf_user: + * Perform a frontend write to a backend record, using a specified + * buffer that is coming directly from userspace. + * + * @type: in: pstore record type to write + * @reason: + * in: pstore write reason + * @id: out: unique identifier for the record + * @part: in: position in a multipart write + * @buf: in: pointer to userspace contents to write to backend record + * @compressed: + * in: if the record is compressed + * @size: in: size of the write + * @psi: in: pointer to the struct pstore_info for the backend + * + * Returns 0 on success, and non-zero on error. + * + * @erase: + * Delete a record from backend storage. Different backends + * identify records differently, so all possible methods of + * identification are included to help the backend locate the + * record to remove. + * + * @type: in: pstore record type to write + * @id: in: per-type unique identifier for the record + * @count: in: Oops count + * @time: in: timestamp for the record + * @psi: in: pointer to the struct pstore_info for the backend + * + * Returns 0 on success, and non-zero on error. + * + */ struct pstore_info { struct module *owner; char *name; - spinlock_t buf_lock; /* serialize access to 'buf' */ + + spinlock_t buf_lock; char *buf; size_t bufsize; - struct mutex read_mutex; /* serialize open/read/close */ + + struct mutex read_mutex; + int flags; + void *data; + int (*open)(struct pstore_info *psi); int (*close)(struct pstore_info *psi); ssize_t (*read)(u64 *id, enum pstore_type_id *type, @@ -76,11 +200,10 @@ struct pstore_info { int (*erase)(enum pstore_type_id type, u64 id, int count, struct timespec time, struct pstore_info *psi); - void *data; }; +/* Supported frontends */ #define PSTORE_FLAGS_DMESG (1 << 0) -#define PSTORE_FLAGS_FRAGILE PSTORE_FLAGS_DMESG #define PSTORE_FLAGS_CONSOLE (1 << 1) #define PSTORE_FLAGS_FTRACE (1 << 2) #define PSTORE_FLAGS_PMSG (1 << 3) -- cgit v1.2.3 From 9abdcccc3d5f3c72f25cd48160f60d911353bee9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 3 Mar 2017 16:59:29 -0800 Subject: pstore: Extract common arguments into structure The read/mkfile pair pass the same arguments and should be cleared between calls. Move to a structure and wipe it after every loop. Signed-off-by: Kees Cook --- include/linux/pstore.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 56477ce6806a..745468072d6e 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -30,6 +30,8 @@ #include #include +struct module; + /* pstore record types (see fs/pstore/inode.c for filename templates) */ enum pstore_type_id { PSTORE_TYPE_DMESG = 0, @@ -45,7 +47,31 @@ enum pstore_type_id { PSTORE_TYPE_UNKNOWN = 255 }; -struct module; +struct pstore_info; +/** + * struct pstore_record - details of a pstore record entry + * @psi: pstore backend driver information + * @type: pstore record type + * @id: per-type unique identifier for record + * @time: timestamp of the record + * @count: for PSTORE_TYPE_DMESG, the Oops count. + * @compressed: for PSTORE_TYPE_DMESG, whether the buffer is compressed + * @buf: pointer to record contents + * @size: size of @buf + * @ecc_notice_size: + * ECC information for @buf + */ +struct pstore_record { + struct pstore_info *psi; + enum pstore_type_id type; + u64 id; + struct timespec time; + int count; + bool compressed; + char *buf; + ssize_t size; + ssize_t ecc_notice_size; +}; /** * struct pstore_info - backend pstore driver structure -- cgit v1.2.3 From 125cc42baf8ab2149c207f8a360ea25668b8422d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 3 Mar 2017 22:09:18 -0800 Subject: pstore: Replace arguments for read() API The argument list for the pstore_read() interface is unwieldy. This changes passes the new struct pstore_record instead. The erst backend was already doing something similar internally. Signed-off-by: Kees Cook --- include/linux/pstore.h | 20 ++++++-------------- 1 file changed, 6 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 745468072d6e..22a46ebbe041 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -111,16 +111,11 @@ struct pstore_record { * Read next available backend record. Called after a successful * @open. * - * @id: out: unique identifier for the record - * @type: out: pstore record type - * @count: out: for PSTORE_TYPE_DMESG, the Oops count. - * @time: out: timestamp for the record - * @buf: out: kmalloc copy of record contents, to be freed by pstore - * @compressed: - * out: if the record contents are compressed - * @ecc_notice_size: - * out: ECC information - * @psi: in: pointer to the struct pstore_info for the backend + * @record: + * pointer to record to populate. @buf should be allocated + * by the backend and filled. At least @type and @id should + * be populated, since these are used when creating pstorefs + * file names. * * Returns record size on success, zero when no more records are * available, or negative on error. @@ -207,10 +202,7 @@ struct pstore_info { int (*open)(struct pstore_info *psi); int (*close)(struct pstore_info *psi); - ssize_t (*read)(u64 *id, enum pstore_type_id *type, - int *count, struct timespec *time, char **buf, - bool *compressed, ssize_t *ecc_notice_size, - struct pstore_info *psi); + ssize_t (*read)(struct pstore_record *record); int (*write)(enum pstore_type_id type, enum kmsg_dump_reason reason, u64 *id, unsigned int part, int count, bool compressed, -- cgit v1.2.3 From 76cc9580e3fbd323651d06e8184a5a54e0e1066e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Fri, 3 Mar 2017 23:28:53 -0800 Subject: pstore: Replace arguments for write() API Similar to the pstore_info read() callback, there were too many arguments. This switches to the new struct pstore_record pointer instead. This adds "reason" and "part" to the record structure as well. Signed-off-by: Kees Cook --- include/linux/pstore.h | 36 ++++++++++++++++++------------------ 1 file changed, 18 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 22a46ebbe041..9335f75c3ddb 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -54,23 +54,32 @@ struct pstore_info; * @type: pstore record type * @id: per-type unique identifier for record * @time: timestamp of the record - * @count: for PSTORE_TYPE_DMESG, the Oops count. - * @compressed: for PSTORE_TYPE_DMESG, whether the buffer is compressed * @buf: pointer to record contents * @size: size of @buf * @ecc_notice_size: * ECC information for @buf + * + * Valid for PSTORE_TYPE_DMESG @type: + * + * @count: Oops count since boot + * @reason: kdump reason for notification + * @part: position in a multipart record + * @compressed: whether the buffer is compressed + * */ struct pstore_record { struct pstore_info *psi; enum pstore_type_id type; u64 id; struct timespec time; - int count; - bool compressed; char *buf; ssize_t size; ssize_t ecc_notice_size; + + int count; + enum kmsg_dump_reason reason; + unsigned int part; + bool compressed; }; /** @@ -125,16 +134,10 @@ struct pstore_record { * data to be stored has already been written to the registered @buf * of the @psi structure. * - * @type: in: pstore record type to write - * @reason: - * in: pstore write reason - * @id: out: unique identifier for the record - * @part: in: position in a multipart write - * @count: in: increasing from 0 since boot, the number of this Oops - * @compressed: - * in: if the record is compressed - * @size: in: size of the write - * @psi: in: pointer to the struct pstore_info for the backend + * @record: + * pointer to record metadata. Note that @buf is NULL, since + * the @buf registered with @psi is what has been written. The + * backend is expected to update @id. * * Returns 0 on success, and non-zero on error. * @@ -203,10 +206,7 @@ struct pstore_info { int (*open)(struct pstore_info *psi); int (*close)(struct pstore_info *psi); ssize_t (*read)(struct pstore_record *record); - int (*write)(enum pstore_type_id type, - enum kmsg_dump_reason reason, u64 *id, - unsigned int part, int count, bool compressed, - size_t size, struct pstore_info *psi); + int (*write)(struct pstore_record *record); int (*write_buf)(enum pstore_type_id type, enum kmsg_dump_reason reason, u64 *id, unsigned int part, const char *buf, bool compressed, -- cgit v1.2.3 From a61072aae693ba08390f92eed1dd0573fa5c3cd9 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sat, 4 Mar 2017 23:31:19 -0800 Subject: pstore: Replace arguments for erase() API This removes the argument list for the erase() callback and replaces it with a pointer to the backend record details to be removed. Signed-off-by: Kees Cook --- include/linux/pstore.h | 16 +++++----------- 1 file changed, 5 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 9335f75c3ddb..2cd1979d1f9a 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -177,15 +177,11 @@ struct pstore_record { * * @erase: * Delete a record from backend storage. Different backends - * identify records differently, so all possible methods of - * identification are included to help the backend locate the - * record to remove. + * identify records differently, so entire original record is + * passed back to assist in identification of what the backend + * should remove from storage. * - * @type: in: pstore record type to write - * @id: in: per-type unique identifier for the record - * @count: in: Oops count - * @time: in: timestamp for the record - * @psi: in: pointer to the struct pstore_info for the backend + * @record: pointer to record metadata. * * Returns 0 on success, and non-zero on error. * @@ -215,9 +211,7 @@ struct pstore_info { enum kmsg_dump_reason reason, u64 *id, unsigned int part, const char __user *buf, bool compressed, size_t size, struct pstore_info *psi); - int (*erase)(enum pstore_type_id type, u64 id, - int count, struct timespec time, - struct pstore_info *psi); + int (*erase)(struct pstore_record *record); }; /* Supported frontends */ -- cgit v1.2.3 From b10b471145f28c219d9ddcc309a67e053776865a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 5 Mar 2017 00:27:54 -0800 Subject: pstore: Replace arguments for write_buf() API As with the other API updates, this removes the long argument list in favor of passing a single pstore recaord. Signed-off-by: Kees Cook --- include/linux/pstore.h | 21 +++++---------------- 1 file changed, 5 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 2cd1979d1f9a..cbf5e561778d 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -142,19 +142,11 @@ struct pstore_record { * Returns 0 on success, and non-zero on error. * * @write_buf: - * Perform a frontend write to a backend record, using a specified - * buffer. Unlike @write, this does not use the @psi @buf. + * Perform a frontend write to a backend record. The record contains + * all metadata and the buffer to write to backend storage. (Unlike + * @write, this does not use the @psi @buf.) * - * @type: in: pstore record type to write - * @reason: - * in: pstore write reason - * @id: out: unique identifier for the record - * @part: in: position in a multipart write - * @buf: in: pointer to contents to write to backend record - * @compressed: - * in: if the record is compressed - * @size: in: size of the write - * @psi: in: pointer to the struct pstore_info for the backend + * @record: pointer to record metadata. * * Returns 0 on success, and non-zero on error. * @@ -203,10 +195,7 @@ struct pstore_info { int (*close)(struct pstore_info *psi); ssize_t (*read)(struct pstore_record *record); int (*write)(struct pstore_record *record); - int (*write_buf)(enum pstore_type_id type, - enum kmsg_dump_reason reason, u64 *id, - unsigned int part, const char *buf, bool compressed, - size_t size, struct pstore_info *psi); + int (*write_buf)(struct pstore_record *record); int (*write_buf_user)(enum pstore_type_id type, enum kmsg_dump_reason reason, u64 *id, unsigned int part, const char __user *buf, -- cgit v1.2.3 From fdd0311863b32b42bb2c54e60c987bbbabc0c430 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 5 Mar 2017 00:56:38 -0800 Subject: pstore: Replace arguments for write_buf_user() API Removes argument list in favor of pstore record, though the user buffer remains passed separately since it must carry the __user annotation. Signed-off-by: Kees Cook --- include/linux/pstore.h | 23 +++++++---------------- 1 file changed, 7 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index cbf5e561778d..9b85d3eeca83 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -152,18 +152,11 @@ struct pstore_record { * * @write_buf_user: * Perform a frontend write to a backend record, using a specified - * buffer that is coming directly from userspace. - * - * @type: in: pstore record type to write - * @reason: - * in: pstore write reason - * @id: out: unique identifier for the record - * @part: in: position in a multipart write - * @buf: in: pointer to userspace contents to write to backend record - * @compressed: - * in: if the record is compressed - * @size: in: size of the write - * @psi: in: pointer to the struct pstore_info for the backend + * buffer that is coming directly from userspace, instead of the + * @record @buf. + * + * @record: pointer to record metadata. + * @buf: pointer to userspace contents to write to backend * * Returns 0 on success, and non-zero on error. * @@ -196,10 +189,8 @@ struct pstore_info { ssize_t (*read)(struct pstore_record *record); int (*write)(struct pstore_record *record); int (*write_buf)(struct pstore_record *record); - int (*write_buf_user)(enum pstore_type_id type, - enum kmsg_dump_reason reason, u64 *id, - unsigned int part, const char __user *buf, - bool compressed, size_t size, struct pstore_info *psi); + int (*write_buf_user)(struct pstore_record *record, + const char __user *buf); int (*erase)(struct pstore_record *record); }; -- cgit v1.2.3 From 4c9ec219766a217468fb94a281c416455a884dda Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 5 Mar 2017 22:41:10 -0800 Subject: pstore: Remove write_buf() callback Now that write() and write_buf() are functionally identical, this removes write_buf(), and renames write_buf_user() to write_user(). Additionally adds sanity-checks for pstore_info's declared functions and flags at registration time. Signed-off-by: Kees Cook --- include/linux/pstore.h | 29 ++++++++++------------------- 1 file changed, 10 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 9b85d3eeca83..e2233f50f428 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -130,27 +130,19 @@ struct pstore_record { * available, or negative on error. * * @write: - * Perform a frontend notification of a write to a backend record. The - * data to be stored has already been written to the registered @buf - * of the @psi structure. + * A newly generated record needs to be written to backend storage. * * @record: - * pointer to record metadata. Note that @buf is NULL, since - * the @buf registered with @psi is what has been written. The - * backend is expected to update @id. + * pointer to record metadata. When @type is PSTORE_TYPE_DMESG, + * @buf will be pointing to the preallocated @psi.buf, since + * memory allocation may be broken during an Oops. Regardless, + * @buf must be proccesed or copied before returning. The + * backend is also expected to write @id with something that + 8 can help identify this record to a future @erase callback. * * Returns 0 on success, and non-zero on error. * - * @write_buf: - * Perform a frontend write to a backend record. The record contains - * all metadata and the buffer to write to backend storage. (Unlike - * @write, this does not use the @psi @buf.) - * - * @record: pointer to record metadata. - * - * Returns 0 on success, and non-zero on error. - * - * @write_buf_user: + * @write_user: * Perform a frontend write to a backend record, using a specified * buffer that is coming directly from userspace, instead of the * @record @buf. @@ -188,9 +180,8 @@ struct pstore_info { int (*close)(struct pstore_info *psi); ssize_t (*read)(struct pstore_record *record); int (*write)(struct pstore_record *record); - int (*write_buf)(struct pstore_record *record); - int (*write_buf_user)(struct pstore_record *record, - const char __user *buf); + int (*write_user)(struct pstore_record *record, + const char __user *buf); int (*erase)(struct pstore_record *record); }; -- cgit v1.2.3 From af085d9084b48530153f51e6cad19fd0b1a13ed7 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Feb 2017 19:42:28 -0600 Subject: stacktrace/x86: add function for detecting reliable stack traces For live patching and possibly other use cases, a stack trace is only useful if it can be assured that it's completely reliable. Add a new save_stack_trace_tsk_reliable() function to achieve that. Note that if the target task isn't the current task, and the target task is allowed to run, then it could be writing the stack while the unwinder is reading it, resulting in possible corruption. So the caller of save_stack_trace_tsk_reliable() must ensure that the task is either 'current' or inactive. save_stack_trace_tsk_reliable() relies on the x86 unwinder's detection of pt_regs on the stack. If the pt_regs are not user-mode registers from a syscall, then they indicate an in-kernel interrupt or exception (e.g. preemption or a page fault), in which case the stack is considered unreliable due to the nature of frame pointers. It also relies on the x86 unwinder's detection of other issues, such as: - corrupted stack data - stack grows the wrong way - stack walk doesn't reach the bottom - user didn't provide a large enough entries array Such issues are reported by checking unwind_error() and !unwind_done(). Also add CONFIG_HAVE_RELIABLE_STACKTRACE so arch-independent code can determine at build time whether the function is implemented. Signed-off-by: Josh Poimboeuf Reviewed-by: Miroslav Benes Acked-by: Ingo Molnar # for the x86 changes Signed-off-by: Jiri Kosina --- include/linux/stacktrace.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stacktrace.h b/include/linux/stacktrace.h index 0a34489a46b6..4205f71a5f0e 100644 --- a/include/linux/stacktrace.h +++ b/include/linux/stacktrace.h @@ -18,6 +18,8 @@ extern void save_stack_trace_regs(struct pt_regs *regs, struct stack_trace *trace); extern void save_stack_trace_tsk(struct task_struct *tsk, struct stack_trace *trace); +extern int save_stack_trace_tsk_reliable(struct task_struct *tsk, + struct stack_trace *trace); extern void print_stack_trace(struct stack_trace *trace, int spaces); extern int snprint_stack_trace(char *buf, size_t size, @@ -29,12 +31,13 @@ extern void save_stack_trace_user(struct stack_trace *trace); # define save_stack_trace_user(trace) do { } while (0) #endif -#else +#else /* !CONFIG_STACKTRACE */ # define save_stack_trace(trace) do { } while (0) # define save_stack_trace_tsk(tsk, trace) do { } while (0) # define save_stack_trace_user(trace) do { } while (0) # define print_stack_trace(trace, spaces) do { } while (0) # define snprint_stack_trace(buf, size, trace, spaces) do { } while (0) -#endif +# define save_stack_trace_tsk_reliable(tsk, trace) ({ -ENOSYS; }) +#endif /* CONFIG_STACKTRACE */ -#endif +#endif /* __LINUX_STACKTRACE_H */ -- cgit v1.2.3 From 46c5a0113f843be5c55b1c40dd486538891156d4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Feb 2017 19:42:30 -0600 Subject: livepatch: create temporary klp_update_patch_state() stub Create temporary stubs for klp_update_patch_state() so we can add TIF_PATCH_PENDING to different architectures in separate patches without breaking build bisectability. Signed-off-by: Josh Poimboeuf Reviewed-by: Petr Mladek Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 9072f04db616..5cc20e588a22 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -123,10 +123,13 @@ void arch_klp_init_object_loaded(struct klp_patch *patch, int klp_module_coming(struct module *mod); void klp_module_going(struct module *mod); +void klp_update_patch_state(struct task_struct *task); + #else /* !CONFIG_LIVEPATCH */ static inline int klp_module_coming(struct module *mod) { return 0; } -static inline void klp_module_going(struct module *mod) { } +static inline void klp_module_going(struct module *mod) {} +static inline void klp_update_patch_state(struct task_struct *task) {} #endif /* CONFIG_LIVEPATCH */ -- cgit v1.2.3 From 0dade9f374f1c15f9b43ab01ab75a3b459bba5f6 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Feb 2017 19:42:35 -0600 Subject: livepatch: separate enabled and patched states Once we have a consistency model, patches and their objects will be enabled and disabled at different times. For example, when a patch is disabled, its loaded objects' funcs can remain registered with ftrace indefinitely until the unpatching operation is complete and they're no longer in use. It's less confusing if we give them different names: patches can be enabled or disabled; objects (and their funcs) can be patched or unpatched: - Enabled means that a patch is logically enabled (but not necessarily fully applied). - Patched means that an object's funcs are registered with ftrace and added to the klp_ops func stack. Also, since these states are binary, represent them with booleans instead of ints. Signed-off-by: Josh Poimboeuf Acked-by: Miroslav Benes Reviewed-by: Petr Mladek Reviewed-by: Kamalesh Babulal Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 5cc20e588a22..9787a63b57ac 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -28,11 +28,6 @@ #include -enum klp_state { - KLP_DISABLED, - KLP_ENABLED -}; - /** * struct klp_func - function structure for live patching * @old_name: name of the function to be patched @@ -41,8 +36,8 @@ enum klp_state { * can be found (optional) * @old_addr: the address of the function being patched * @kobj: kobject for sysfs resources - * @state: tracks function-level patch application state * @stack_node: list node for klp_ops func_stack list + * @patched: the func has been added to the klp_ops list */ struct klp_func { /* external */ @@ -60,8 +55,8 @@ struct klp_func { /* internal */ unsigned long old_addr; struct kobject kobj; - enum klp_state state; struct list_head stack_node; + bool patched; }; /** @@ -71,7 +66,7 @@ struct klp_func { * @kobj: kobject for sysfs resources * @mod: kernel module associated with the patched object * (NULL for vmlinux) - * @state: tracks object-level patch application state + * @patched: the object's funcs have been added to the klp_ops list */ struct klp_object { /* external */ @@ -81,7 +76,7 @@ struct klp_object { /* internal */ struct kobject kobj; struct module *mod; - enum klp_state state; + bool patched; }; /** @@ -90,7 +85,7 @@ struct klp_object { * @objs: object entries for kernel objects to be patched * @list: list node for global list of registered patches * @kobj: kobject for sysfs resources - * @state: tracks patch-level application state + * @enabled: the patch is enabled (but operation may be incomplete) */ struct klp_patch { /* external */ @@ -100,7 +95,7 @@ struct klp_patch { /* internal */ struct list_head list; struct kobject kobj; - enum klp_state state; + bool enabled; }; #define klp_for_each_object(patch, obj) \ -- cgit v1.2.3 From f5e547f4ac785c65a39211f0b8e4ffc4fe09112d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Feb 2017 19:42:39 -0600 Subject: livepatch: store function sizes For the consistency model we'll need to know the sizes of the old and new functions to determine if they're on the stacks of any tasks. Signed-off-by: Josh Poimboeuf Acked-by: Miroslav Benes Reviewed-by: Petr Mladek Reviewed-by: Kamalesh Babulal Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 9787a63b57ac..6602b34bed2b 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -37,6 +37,8 @@ * @old_addr: the address of the function being patched * @kobj: kobject for sysfs resources * @stack_node: list node for klp_ops func_stack list + * @old_size: size of the old function + * @new_size: size of the new function * @patched: the func has been added to the klp_ops list */ struct klp_func { @@ -56,6 +58,7 @@ struct klp_func { unsigned long old_addr; struct kobject kobj; struct list_head stack_node; + unsigned long old_size, new_size; bool patched; }; -- cgit v1.2.3 From d83a7cb375eec21f04c83542395d08b2f6641da2 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 13 Feb 2017 19:42:40 -0600 Subject: livepatch: change to a per-task consistency model Change livepatch to use a basic per-task consistency model. This is the foundation which will eventually enable us to patch those ~10% of security patches which change function or data semantics. This is the biggest remaining piece needed to make livepatch more generally useful. This code stems from the design proposal made by Vojtech [1] in November 2014. It's a hybrid of kGraft and kpatch: it uses kGraft's per-task consistency and syscall barrier switching combined with kpatch's stack trace switching. There are also a number of fallback options which make it quite flexible. Patches are applied on a per-task basis, when the task is deemed safe to switch over. When a patch is enabled, livepatch enters into a transition state where tasks are converging to the patched state. Usually this transition state can complete in a few seconds. The same sequence occurs when a patch is disabled, except the tasks converge from the patched state to the unpatched state. An interrupt handler inherits the patched state of the task it interrupts. The same is true for forked tasks: the child inherits the patched state of the parent. Livepatch uses several complementary approaches to determine when it's safe to patch tasks: 1. The first and most effective approach is stack checking of sleeping tasks. If no affected functions are on the stack of a given task, the task is patched. In most cases this will patch most or all of the tasks on the first try. Otherwise it'll keep trying periodically. This option is only available if the architecture has reliable stacks (HAVE_RELIABLE_STACKTRACE). 2. The second approach, if needed, is kernel exit switching. A task is switched when it returns to user space from a system call, a user space IRQ, or a signal. It's useful in the following cases: a) Patching I/O-bound user tasks which are sleeping on an affected function. In this case you have to send SIGSTOP and SIGCONT to force it to exit the kernel and be patched. b) Patching CPU-bound user tasks. If the task is highly CPU-bound then it will get patched the next time it gets interrupted by an IRQ. c) In the future it could be useful for applying patches for architectures which don't yet have HAVE_RELIABLE_STACKTRACE. In this case you would have to signal most of the tasks on the system. However this isn't supported yet because there's currently no way to patch kthreads without HAVE_RELIABLE_STACKTRACE. 3. For idle "swapper" tasks, since they don't ever exit the kernel, they instead have a klp_update_patch_state() call in the idle loop which allows them to be patched before the CPU enters the idle state. (Note there's not yet such an approach for kthreads.) All the above approaches may be skipped by setting the 'immediate' flag in the 'klp_patch' struct, which will disable per-task consistency and patch all tasks immediately. This can be useful if the patch doesn't change any function or data semantics. Note that, even with this flag set, it's possible that some tasks may still be running with an old version of the function, until that function returns. There's also an 'immediate' flag in the 'klp_func' struct which allows you to specify that certain functions in the patch can be applied without per-task consistency. This might be useful if you want to patch a common function like schedule(), and the function change doesn't need consistency but the rest of the patch does. For architectures which don't have HAVE_RELIABLE_STACKTRACE, the user must set patch->immediate which causes all tasks to be patched immediately. This option should be used with care, only when the patch doesn't change any function or data semantics. In the future, architectures which don't have HAVE_RELIABLE_STACKTRACE may be allowed to use per-task consistency if we can come up with another way to patch kthreads. The /sys/kernel/livepatch//transition file shows whether a patch is in transition. Only a single patch (the topmost patch on the stack) can be in transition at a given time. A patch can remain in transition indefinitely, if any of the tasks are stuck in the initial patch state. A transition can be reversed and effectively canceled by writing the opposite value to the /sys/kernel/livepatch//enabled file while the transition is in progress. Then all the tasks will attempt to converge back to the original patch state. [1] https://lkml.kernel.org/r/20141107140458.GA21774@suse.cz Signed-off-by: Josh Poimboeuf Acked-by: Miroslav Benes Acked-by: Ingo Molnar # for the scheduler changes Signed-off-by: Jiri Kosina --- include/linux/init_task.h | 9 +++++++++ include/linux/livepatch.h | 42 +++++++++++++++++++++++++++++++++++++++++- include/linux/sched.h | 3 +++ 3 files changed, 53 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 91d9049f0039..5a791055b176 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include @@ -202,6 +203,13 @@ extern struct cred init_cred; # define INIT_KASAN(tsk) #endif +#ifdef CONFIG_LIVEPATCH +# define INIT_LIVEPATCH(tsk) \ + .patch_state = KLP_UNDEFINED, +#else +# define INIT_LIVEPATCH(tsk) +#endif + #ifdef CONFIG_THREAD_INFO_IN_TASK # define INIT_TASK_TI(tsk) \ .thread_info = INIT_THREAD_INFO(tsk), \ @@ -288,6 +296,7 @@ extern struct cred init_cred; INIT_VTIME(tsk) \ INIT_NUMA_BALANCING(tsk) \ INIT_KASAN(tsk) \ + INIT_LIVEPATCH(tsk) \ } diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 6602b34bed2b..ed90ad1605c1 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -28,18 +28,40 @@ #include +/* task patch states */ +#define KLP_UNDEFINED -1 +#define KLP_UNPATCHED 0 +#define KLP_PATCHED 1 + /** * struct klp_func - function structure for live patching * @old_name: name of the function to be patched * @new_func: pointer to the patched function code * @old_sympos: a hint indicating which symbol position the old function * can be found (optional) + * @immediate: patch the func immediately, bypassing safety mechanisms * @old_addr: the address of the function being patched * @kobj: kobject for sysfs resources * @stack_node: list node for klp_ops func_stack list * @old_size: size of the old function * @new_size: size of the new function * @patched: the func has been added to the klp_ops list + * @transition: the func is currently being applied or reverted + * + * The patched and transition variables define the func's patching state. When + * patching, a func is always in one of the following states: + * + * patched=0 transition=0: unpatched + * patched=0 transition=1: unpatched, temporary starting state + * patched=1 transition=1: patched, may be visible to some tasks + * patched=1 transition=0: patched, visible to all tasks + * + * And when unpatching, it goes in the reverse order: + * + * patched=1 transition=0: patched, visible to all tasks + * patched=1 transition=1: patched, may be visible to some tasks + * patched=0 transition=1: unpatched, temporary ending state + * patched=0 transition=0: unpatched */ struct klp_func { /* external */ @@ -53,6 +75,7 @@ struct klp_func { * in kallsyms for the given object is used. */ unsigned long old_sympos; + bool immediate; /* internal */ unsigned long old_addr; @@ -60,6 +83,7 @@ struct klp_func { struct list_head stack_node; unsigned long old_size, new_size; bool patched; + bool transition; }; /** @@ -68,7 +92,7 @@ struct klp_func { * @funcs: function entries for functions to be patched in the object * @kobj: kobject for sysfs resources * @mod: kernel module associated with the patched object - * (NULL for vmlinux) + * (NULL for vmlinux) * @patched: the object's funcs have been added to the klp_ops list */ struct klp_object { @@ -86,6 +110,7 @@ struct klp_object { * struct klp_patch - patch structure for live patching * @mod: reference to the live patch module * @objs: object entries for kernel objects to be patched + * @immediate: patch all funcs immediately, bypassing safety mechanisms * @list: list node for global list of registered patches * @kobj: kobject for sysfs resources * @enabled: the patch is enabled (but operation may be incomplete) @@ -94,6 +119,7 @@ struct klp_patch { /* external */ struct module *mod; struct klp_object *objs; + bool immediate; /* internal */ struct list_head list; @@ -121,13 +147,27 @@ void arch_klp_init_object_loaded(struct klp_patch *patch, int klp_module_coming(struct module *mod); void klp_module_going(struct module *mod); +void klp_copy_process(struct task_struct *child); void klp_update_patch_state(struct task_struct *task); +static inline bool klp_patch_pending(struct task_struct *task) +{ + return test_tsk_thread_flag(task, TIF_PATCH_PENDING); +} + +static inline bool klp_have_reliable_stack(void) +{ + return IS_ENABLED(CONFIG_STACKTRACE) && + IS_ENABLED(CONFIG_HAVE_RELIABLE_STACKTRACE); +} + #else /* !CONFIG_LIVEPATCH */ static inline int klp_module_coming(struct module *mod) { return 0; } static inline void klp_module_going(struct module *mod) {} +static inline bool klp_patch_pending(struct task_struct *task) { return false; } static inline void klp_update_patch_state(struct task_struct *task) {} +static inline void klp_copy_process(struct task_struct *child) {} #endif /* CONFIG_LIVEPATCH */ diff --git a/include/linux/sched.h b/include/linux/sched.h index d67eee84fd43..e11032010318 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1037,6 +1037,9 @@ struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* A live task holds one reference: */ atomic_t stack_refcount; +#endif +#ifdef CONFIG_LIVEPATCH + int patch_state; #endif /* CPU-specific state of this task: */ struct thread_struct thread; -- cgit v1.2.3 From 3ec24776bfd09668079df7dca0c0136d80820ab4 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 6 Mar 2017 11:20:29 -0600 Subject: livepatch: allow removal of a disabled patch Currently we do not allow patch module to unload since there is no method to determine if a task is still running in the patched code. The consistency model gives us the way because when the unpatching finishes we know that all tasks were marked as safe to call an original function. Thus every new call to the function calls the original code and at the same time no task can be somewhere in the patched code, because it had to leave that code to be marked as safe. We can safely let the patch module go after that. Completion is used for synchronization between module removal and sysfs infrastructure in a similar way to commit 942e443127e9 ("module: Fix mod->mkobj.kobj potentially freed too early"). Note that we still do not allow the removal for immediate model, that is no consistency model. The module refcount may increase in this case if somebody disables and enables the patch several times. This should not cause any harm. With this change a call to try_module_get() is moved to __klp_enable_patch from klp_register_patch to make module reference counting symmetric (module_put() is in a patch disable path) and to allow to take a new reference to a disabled module when being enabled. Finally, we need to be very careful about possible races between klp_unregister_patch(), kobject_put() functions and operations on the related sysfs files. kobject_put(&patch->kobj) must be called without klp_mutex. Otherwise, it might be blocked by enabled_store() that needs the mutex as well. In addition, enabled_store() must check if the patch was not unregisted in the meantime. There is no need to do the same for other kobject_put() callsites at the moment. Their sysfs operations neither take the lock nor they access any data that might be freed in the meantime. There was an attempt to use kobjects the right way and prevent these races by design. But it made the patch definition more complicated and opened another can of worms. See https://lkml.kernel.org/r/1464018848-4303-1-git-send-email-pmladek@suse.com [Thanks to Petr Mladek for improving the commit message.] Signed-off-by: Miroslav Benes Signed-off-by: Josh Poimboeuf Reviewed-by: Petr Mladek Acked-by: Miroslav Benes Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index ed90ad1605c1..194991ef9347 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -23,6 +23,7 @@ #include #include +#include #if IS_ENABLED(CONFIG_LIVEPATCH) @@ -114,6 +115,7 @@ struct klp_object { * @list: list node for global list of registered patches * @kobj: kobject for sysfs resources * @enabled: the patch is enabled (but operation may be incomplete) + * @finish: for waiting till it is safe to remove the patch module */ struct klp_patch { /* external */ @@ -125,6 +127,7 @@ struct klp_patch { struct list_head list; struct kobject kobj; bool enabled; + struct completion finish; }; #define klp_for_each_object(patch, obj) \ -- cgit v1.2.3 From 923386f761f5ff35da2ac778839876fe4a2f165f Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 18 Dec 2015 15:36:57 +0200 Subject: clk: ti: remove un-used definitions from public clk_hw_omap struct Clksel support has been deprecated a while back, so remove these from the struct also. Signed-off-by: Tero Kristo Acked-by: Tony Lindgren --- include/linux/clk/ti.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 6110fe09ed18..07308db5a15d 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -129,8 +129,6 @@ struct clk_hw_omap_ops { * @enable_bit: bitshift to write to enable/disable the clock (see @enable_reg) * @flags: see "struct clk.flags possibilities" above * @clksel_reg: for clksel clks, register va containing src/divisor select - * @clksel_mask: bitmask in @clksel_reg for the src/divisor selector - * @clksel: for clksel clks, pointer to struct clksel for this clock * @dpll_data: for DPLLs, pointer to struct dpll_data for this clock * @clkdm_name: clockdomain name that this clock is contained in * @clkdm: pointer to struct clockdomain, resolved from @clkdm_name at runtime @@ -145,8 +143,6 @@ struct clk_hw_omap { u8 enable_bit; u8 flags; void __iomem *clksel_reg; - u32 clksel_mask; - const struct clksel *clksel; struct dpll_data *dpll_data; const char *clkdm_name; struct clockdomain *clkdm; -- cgit v1.2.3 From b6f27b2db2df395d65b02a758861c7fc54edbec1 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 30 Sep 2016 14:10:11 +0300 Subject: clk: ti: add clkdm_lookup to the exported functions This will be needed to move some additional clockdomain functionality under clock driver. Signed-off-by: Tero Kristo Acked-by: Tony Lindgren --- include/linux/clk/ti.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 07308db5a15d..bc7fd8f0fb5d 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -213,6 +213,7 @@ struct clk_omap_reg { * @clk_writel: pointer to register write function * @clkdm_clk_enable: pointer to clockdomain enable function * @clkdm_clk_disable: pointer to clockdomain disable function + * @clkdm_lookup: pointer to clockdomain lookup function * @cm_wait_module_ready: pointer to CM module wait ready function * @cm_split_idlest_reg: pointer to CM module function to split idlest reg * @@ -228,6 +229,7 @@ struct ti_clk_ll_ops { int (*clkdm_clk_enable)(struct clockdomain *clkdm, struct clk *clk); int (*clkdm_clk_disable)(struct clockdomain *clkdm, struct clk *clk); + struct clockdomain * (*clkdm_lookup)(const char *name); int (*cm_wait_module_ready)(u8 part, s16 prcm_mod, u16 idlest_reg, u8 idlest_shift); int (*cm_split_idlest_reg)(void __iomem *idlest_reg, s16 *prcm_inst, -- cgit v1.2.3 From 2e1a294c0f2273a6d3537c91965ca46a6483bd8c Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Fri, 30 Sep 2016 14:13:38 +0300 Subject: clk: ti: move omap2_init_clk_clkdm under TI clock driver This is not needed outside the driver, so move it inside it and remove the prototype from the public header also. Signed-off-by: Tero Kristo Acked-by: Tony Lindgren --- include/linux/clk/ti.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index bc7fd8f0fb5d..626ae94b7444 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -238,7 +238,6 @@ struct ti_clk_ll_ops { #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw) -void omap2_init_clk_clkdm(struct clk_hw *clk); int omap2_clk_disable_autoidle_all(void); int omap2_clk_enable_autoidle_all(void); int omap2_clk_allow_idle(struct clk *clk); -- cgit v1.2.3 From c91f07801f144920f8467486a1e36e42ed9d9ff2 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Mon, 30 Jan 2017 16:01:36 +0200 Subject: clk: ti: drop unnecessary MEMMAP_ADDRESSING flag This has been superceded by the usage of ti_clk_ll_ops for now. Signed-off-by: Tero Kristo Acked-by: Tony Lindgren --- include/linux/clk/ti.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index 626ae94b7444..affdabd0b6a1 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -168,7 +168,6 @@ struct clk_hw_omap { * should be used. This is a temporary solution - a better approach * would be to associate clock type-specific data with the clock, * similar to the struct dpll_data approach. - * MEMMAP_ADDRESSING: Use memmap addressing to access clock registers. */ #define ENABLE_REG_32BIT (1 << 0) /* Use 32-bit access */ #define CLOCK_IDLE_CONTROL (1 << 1) @@ -176,7 +175,6 @@ struct clk_hw_omap { #define ENABLE_ON_INIT (1 << 3) /* Enable upon framework init */ #define INVERT_ENABLE (1 << 4) /* 0 enables, 1 disables */ #define CLOCK_CLKOUTX2 (1 << 5) -#define MEMMAP_ADDRESSING (1 << 6) /* CM_CLKEN_PLL*.EN* bit values - not all are available for every DPLL */ #define DPLL_LOW_POWER_STOP 0x1 -- cgit v1.2.3 From 6c0afb503937a12a8d20a805fcf263e31afa9871 Mon Sep 17 00:00:00 2001 From: Tero Kristo Date: Thu, 9 Feb 2017 11:24:37 +0200 Subject: clk: ti: convert to use proper register definition for all accesses Currently, TI clock driver uses an encapsulated struct that is cast into a void pointer to store all register addresses. This can be considered as rather nasty hackery, and prevents from expanding the register address field also. Instead, replace all the code to use proper struct in place for this, which contains all the previously used data. This patch is rather large as it is touching multiple files, but this can't be split up as we need to avoid any boot breakage. Signed-off-by: Tero Kristo Acked-by: Tony Lindgren --- include/linux/clk/ti.h | 46 ++++++++++++++++++++++++---------------------- 1 file changed, 24 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/ti.h b/include/linux/clk/ti.h index affdabd0b6a1..d18da839b810 100644 --- a/include/linux/clk/ti.h +++ b/include/linux/clk/ti.h @@ -18,6 +18,18 @@ #include #include +/** + * struct clk_omap_reg - OMAP register declaration + * @offset: offset from the master IP module base address + * @index: index of the master IP module + */ +struct clk_omap_reg { + void __iomem *ptr; + u16 offset; + u8 index; + u8 flags; +}; + /** * struct dpll_data - DPLL registers and integration data * @mult_div1_reg: register containing the DPLL M and N bitfields @@ -67,12 +79,12 @@ * can be placed into read-only space. */ struct dpll_data { - void __iomem *mult_div1_reg; + struct clk_omap_reg mult_div1_reg; u32 mult_mask; u32 div1_mask; struct clk_hw *clk_bypass; struct clk_hw *clk_ref; - void __iomem *control_reg; + struct clk_omap_reg control_reg; u32 enable_mask; unsigned long last_rounded_rate; u16 last_rounded_m; @@ -84,8 +96,8 @@ struct dpll_data { u16 max_divider; unsigned long max_rate; u8 modes; - void __iomem *autoidle_reg; - void __iomem *idlest_reg; + struct clk_omap_reg autoidle_reg; + struct clk_omap_reg idlest_reg; u32 autoidle_mask; u32 freqsel_mask; u32 idlest_mask; @@ -113,10 +125,10 @@ struct clk_hw_omap; */ struct clk_hw_omap_ops { void (*find_idlest)(struct clk_hw_omap *oclk, - void __iomem **idlest_reg, + struct clk_omap_reg *idlest_reg, u8 *idlest_bit, u8 *idlest_val); void (*find_companion)(struct clk_hw_omap *oclk, - void __iomem **other_reg, + struct clk_omap_reg *other_reg, u8 *other_bit); void (*allow_idle)(struct clk_hw_omap *oclk); void (*deny_idle)(struct clk_hw_omap *oclk); @@ -139,10 +151,10 @@ struct clk_hw_omap { struct list_head node; unsigned long fixed_rate; u8 fixed_div; - void __iomem *enable_reg; + struct clk_omap_reg enable_reg; u8 enable_bit; u8 flags; - void __iomem *clksel_reg; + struct clk_omap_reg clksel_reg; struct dpll_data *dpll_data; const char *clkdm_name; struct clockdomain *clkdm; @@ -195,16 +207,6 @@ enum { CLK_MAX_MEMMAPS }; -/** - * struct clk_omap_reg - OMAP register declaration - * @offset: offset from the master IP module base address - * @index: index of the master IP module - */ -struct clk_omap_reg { - u16 offset; - u16 index; -}; - /** * struct ti_clk_ll_ops - low-level ops for clocks * @clk_readl: pointer to register read function @@ -222,16 +224,16 @@ struct clk_omap_reg { * operations not provided directly by clock drivers. */ struct ti_clk_ll_ops { - u32 (*clk_readl)(void __iomem *reg); - void (*clk_writel)(u32 val, void __iomem *reg); + u32 (*clk_readl)(const struct clk_omap_reg *reg); + void (*clk_writel)(u32 val, const struct clk_omap_reg *reg); int (*clkdm_clk_enable)(struct clockdomain *clkdm, struct clk *clk); int (*clkdm_clk_disable)(struct clockdomain *clkdm, struct clk *clk); struct clockdomain * (*clkdm_lookup)(const char *name); int (*cm_wait_module_ready)(u8 part, s16 prcm_mod, u16 idlest_reg, u8 idlest_shift); - int (*cm_split_idlest_reg)(void __iomem *idlest_reg, s16 *prcm_inst, - u8 *idlest_reg_id); + int (*cm_split_idlest_reg)(struct clk_omap_reg *idlest_reg, + s16 *prcm_inst, u8 *idlest_reg_id); }; #define to_clk_hw_omap(_hw) container_of(_hw, struct clk_hw_omap, hw) -- cgit v1.2.3 From 442c609830e98919faa78b797e9b89c53bab9cbf Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Mon, 6 Mar 2017 06:19:44 +0100 Subject: leds: core: add OF variants of LED registering functions MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit These new functions allow passing an additional device_node argument that will be internally set for created LED device. Thanks to this LED core code and triggers will be able to access DT node for reading extra info. The easiest solution for achieving this was reworking old functions to more generic ones & adding simple defines for API compatibility. Signed-off-by: Rafał Miłecki Acked-by: Pavel Machek Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index 38c0bd7ca107..64c56d454f7d 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -122,10 +122,16 @@ struct led_classdev { struct mutex led_access; }; -extern int led_classdev_register(struct device *parent, - struct led_classdev *led_cdev); -extern int devm_led_classdev_register(struct device *parent, - struct led_classdev *led_cdev); +extern int of_led_classdev_register(struct device *parent, + struct device_node *np, + struct led_classdev *led_cdev); +#define led_classdev_register(parent, led_cdev) \ + of_led_classdev_register(parent, NULL, led_cdev) +extern int devm_of_led_classdev_register(struct device *parent, + struct device_node *np, + struct led_classdev *led_cdev); +#define devm_led_classdev_register(parent, led_cdev) \ + devm_of_led_classdev_register(parent, NULL, led_cdev) extern void led_classdev_unregister(struct led_classdev *led_cdev); extern void devm_led_classdev_unregister(struct device *parent, struct led_classdev *led_cdev); -- cgit v1.2.3 From d1caa99055382c91b57244343020ea37c4fa4d09 Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Tue, 13 Dec 2016 15:33:32 +0100 Subject: iio: adc: add support for Allwinner SoCs ADC The Allwinner SoCs all have an ADC that can also act as a touchscreen controller and a thermal sensor. This patch adds the ADC driver which is based on the MFD for the same SoCs ADC. This also registers the thermal adc channel in the iio map array so iio_hwmon could use it without modifying the Device Tree. This registers the driver in the thermal framework. The thermal sensor requires the IP to be in touchscreen mode to return correct values. Therefore, if the user is continuously reading the ADC channel(s), the thermal framework in which the thermal sensor is registered will switch the IP in touchscreen mode to get a temperature value and requires a delay of 100ms (because of the mode switching), then the ADC will switch back to ADC mode and requires also a delay of 100ms. If the ADC readings are critical to user and the SoC temperature is not, this driver is capable of not registering the thermal sensor in the thermal framework and thus, "quicken" the ADC readings. This driver probes on three different platform_device_id to take into account slight differences (registers bit and temperature computation) between Allwinner SoCs ADCs. Signed-off-by: Quentin Schulz Acked-by: Maxime Ripard Acked-by: Jonathan Cameron Acked-for-MFD-by: Lee Jones Signed-off-by: Jonathan Cameron --- include/linux/mfd/sun4i-gpadc.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/sun4i-gpadc.h b/include/linux/mfd/sun4i-gpadc.h index d7a29f246d64..509e736d27fb 100644 --- a/include/linux/mfd/sun4i-gpadc.h +++ b/include/linux/mfd/sun4i-gpadc.h @@ -28,6 +28,7 @@ #define SUN4I_GPADC_CTRL1_TP_MODE_EN BIT(4) #define SUN4I_GPADC_CTRL1_TP_ADC_SELECT BIT(3) #define SUN4I_GPADC_CTRL1_ADC_CHAN_SELECT(x) (GENMASK(2, 0) & (x)) +#define SUN4I_GPADC_CTRL1_ADC_CHAN_MASK GENMASK(2, 0) /* TP_CTRL1 bits for sun6i SOCs */ #define SUN6I_GPADC_CTRL1_TOUCH_PAN_CALI_EN BIT(7) @@ -35,6 +36,7 @@ #define SUN6I_GPADC_CTRL1_TP_MODE_EN BIT(5) #define SUN6I_GPADC_CTRL1_TP_ADC_SELECT BIT(4) #define SUN6I_GPADC_CTRL1_ADC_CHAN_SELECT(x) (GENMASK(3, 0) & BIT(x)) +#define SUN6I_GPADC_CTRL1_ADC_CHAN_MASK GENMASK(3, 0) #define SUN4I_GPADC_CTRL2 0x08 -- cgit v1.2.3 From 7f501f0a72036dc29ad9a53811474c393634b401 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Tue, 24 May 2016 19:20:05 +0200 Subject: mtd: nand: Store nand ID in struct nand_chip Store the NAND ID in struct nand_chip to avoid passing id_data and id_len as function parameters. Signed-off-by: Boris Brezillon Acked-by: Richard Weinberger Reviewed-by: Marek Vasut --- include/linux/mtd/nand.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9591e0fbe5bd..e2c11351b1bd 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -464,6 +464,17 @@ struct nand_jedec_params { __le16 crc; } __packed; +/** + * struct nand_id - NAND id structure + * @data: buffer containing the id bytes. Currently 8 bytes large, but can + * be extended if required. + * @len: ID length. + */ +struct nand_id { + u8 data[8]; + int len; +}; + /** * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices * @lock: protection lock @@ -793,6 +804,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * @pagebuf_bitflips: [INTERN] holds the bitflip count for the page which is * currently in data_buf. * @subpagesize: [INTERN] holds the subpagesize + * @id: [INTERN] holds NAND ID * @onfi_version: [INTERN] holds the chip ONFI version (BCD encoded), * non 0 if ONFI supported. * @jedec_version: [INTERN] holds the chip JEDEC version (BCD encoded), @@ -881,6 +893,7 @@ struct nand_chip { int badblockpos; int badblockbits; + struct nand_id id; int onfi_version; int jedec_version; union { -- cgit v1.2.3 From 8cfb9ab68f90703d419870fce7ac21ac401399f2 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sat, 7 Jan 2017 15:15:57 +0100 Subject: mtd: nand: Rename the nand_manufacturers struct Drop the 's' at the end of nand_manufacturers since the struct is actually describing a single manufacturer, not a manufacturer table. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index e2c11351b1bd..9c679e8bde42 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1062,17 +1062,17 @@ struct nand_flash_dev { }; /** - * struct nand_manufacturers - NAND Flash Manufacturer ID Structure + * struct nand_manufacturer - NAND Flash Manufacturer structure * @name: Manufacturer name * @id: manufacturer ID code of device. */ -struct nand_manufacturers { +struct nand_manufacturer { int id; char *name; }; extern struct nand_flash_dev nand_flash_ids[]; -extern struct nand_manufacturers nand_manuf_ids[]; +extern struct nand_manufacturer nand_manuf_ids[]; int nand_default_bbt(struct mtd_info *mtd); int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); -- cgit v1.2.3 From bcc678c2d7a0e0af14cb3d858ebd367be378c172 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sat, 7 Jan 2017 15:48:25 +0100 Subject: mtd: nand: Do not expose the NAND manufacturer table directly There is no reason to expose the NAND manufacturer table. Provide an helper function to find manufacturers by their id. We also turn the nand_manufacturers table into a const array, since its members are not modified after the initial assignment. Finally, we remove the sentinel manufacturer entry from the manufacturers table (we already have the array size information given by ARRAY_SIZE()), and add the nand_manufacturer_name() helper to handle the "Unknown" case properly. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9c679e8bde42..6415aa16043c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1071,8 +1071,15 @@ struct nand_manufacturer { char *name; }; +const struct nand_manufacturer *nand_get_manufacturer(u8 id); + +static inline const char * +nand_manufacturer_name(const struct nand_manufacturer *manufacturer) +{ + return manufacturer ? manufacturer->name : "Unknown"; +} + extern struct nand_flash_dev nand_flash_ids[]; -extern struct nand_manufacturer nand_manuf_ids[]; int nand_default_bbt(struct mtd_info *mtd); int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); -- cgit v1.2.3 From abbe26d144ec22bb067fa414d717b9f7ca2e12bd Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 8 Jun 2016 09:32:55 +0200 Subject: mtd: nand: Add manufacturer specific initialization/detection steps A lot of NANDs are implementing generic features in a non-generic way, or are providing advanced auto-detection logic where the NAND ID bytes meaning changes with the NAND generation. Providing this vendor specific initialization step will allow us to get rid of full-id entries in the nand_ids table or all the vendor specific cases added over the time in the generic NAND ID decoding logic. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 6415aa16043c..ee9a19f42293 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -731,6 +731,20 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) return &conf->timings.sdr; } +/** + * struct nand_manufacturer_ops - NAND Manufacturer operations + * @detect: detect the NAND memory organization and capabilities + * @init: initialize all vendor specific fields (like the ->read_retry() + * implementation) if any. + * @cleanup: the ->init() function may have allocated resources, ->cleanup() + * is here to let vendor specific code release those resources. + */ +struct nand_manufacturer_ops { + void (*detect)(struct nand_chip *chip); + int (*init)(struct nand_chip *chip); + void (*cleanup)(struct nand_chip *chip); +}; + /** * struct nand_chip - NAND Private Flash Chip Data * @mtd: MTD device registered to the MTD framework @@ -835,6 +849,7 @@ nand_get_sdr_timings(const struct nand_data_interface *conf) * additional error status checks (determine if errors are * correctable). * @write_page: [REPLACEABLE] High-level page write function + * @manufacturer: [INTERN] Contains manufacturer information */ struct nand_chip { @@ -923,6 +938,11 @@ struct nand_chip { struct nand_bbt_descr *badblock_pattern; void *priv; + + struct { + const struct nand_manufacturer *desc; + void *priv; + } manufacturer; }; extern const struct mtd_ooblayout_ops nand_ooblayout_sp_ops; @@ -959,6 +979,17 @@ static inline void nand_set_controller_data(struct nand_chip *chip, void *priv) chip->priv = priv; } +static inline void nand_set_manufacturer_data(struct nand_chip *chip, + void *priv) +{ + chip->manufacturer.priv = priv; +} + +static inline void *nand_get_manufacturer_data(struct nand_chip *chip) +{ + return chip->manufacturer.priv; +} + /* * NAND Flash Manufacturer ID Codes */ @@ -1065,10 +1096,12 @@ struct nand_flash_dev { * struct nand_manufacturer - NAND Flash Manufacturer structure * @name: Manufacturer name * @id: manufacturer ID code of device. + * @ops: manufacturer operations */ struct nand_manufacturer { int id; char *name; + const struct nand_manufacturer_ops *ops; }; const struct nand_manufacturer *nand_get_manufacturer(u8 id); @@ -1246,4 +1279,6 @@ int nand_reset(struct nand_chip *chip, int chipnr); /* Free resources held by the NAND device */ void nand_cleanup(struct nand_chip *chip); +/* Default extended ID decoding function */ +void nand_decode_ext_id(struct nand_chip *chip); #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From c51d0ac59f24200dfdccc897ff7c3c9446c7599a Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 8 Jun 2016 10:22:19 +0200 Subject: mtd: nand: Move Samsung specific init/detection logic in nand_samsung.c Move Samsung specific initialization and detection logic into nand_samsung.c. This is part of the "separate vendor specific code from core" cleanup process. Signed-off-by: Boris Brezillon Acked-by: Richard Weinberger --- include/linux/mtd/nand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index ee9a19f42293..2f83cb55392f 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1114,6 +1114,8 @@ nand_manufacturer_name(const struct nand_manufacturer *manufacturer) extern struct nand_flash_dev nand_flash_ids[]; +extern const struct nand_manufacturer_ops samsung_nand_manuf_ops; + int nand_default_bbt(struct mtd_info *mtd); int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); int nand_isreserved_bbt(struct mtd_info *mtd, loff_t offs); -- cgit v1.2.3 From 01389b6bd2f4f7649cdbb4a99a15d9e0c05d6f8c Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 8 Jun 2016 10:30:18 +0200 Subject: mtd: nand: Move Hynix specific init/detection logic in nand_hynix.c Move Hynix specific initialization and detection logic into nand_hynix.c. This is part of the "separate vendor specific code from core" cleanup process. Signed-off-by: Boris Brezillon Acked-by: Richard Weinberger --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 2f83cb55392f..74e3a231cb56 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1115,6 +1115,7 @@ nand_manufacturer_name(const struct nand_manufacturer *manufacturer) extern struct nand_flash_dev nand_flash_ids[]; extern const struct nand_manufacturer_ops samsung_nand_manuf_ops; +extern const struct nand_manufacturer_ops hynix_nand_manuf_ops; int nand_default_bbt(struct mtd_info *mtd); int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); -- cgit v1.2.3 From 9b2d61f80b060ce3ea5af2a99e148b0b214932b2 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 8 Jun 2016 10:34:57 +0200 Subject: mtd: nand: Move Toshiba specific init/detection logic in nand_toshiba.c Move Toshiba specific initialization and detection logic into nand_toshiba.c. This is part of the "separate vendor specific code from core" cleanup process. Signed-off-by: Boris Brezillon Acked-by: Richard Weinberger --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 74e3a231cb56..dd9e3b5ddd4f 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1114,6 +1114,7 @@ nand_manufacturer_name(const struct nand_manufacturer *manufacturer) extern struct nand_flash_dev nand_flash_ids[]; +extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops; extern const struct nand_manufacturer_ops samsung_nand_manuf_ops; extern const struct nand_manufacturer_ops hynix_nand_manuf_ops; -- cgit v1.2.3 From 10d4e75c36f6c16311dde1461f318210da357219 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 8 Jun 2016 10:38:57 +0200 Subject: mtd: nand: Move Micron specific init logic in nand_micron.c Move Micron specific initialization logic into nand_micron.c. This is part of the "separate vendor specific code from core" cleanup process. Signed-off-by: Boris Brezillon Acked-by: Richard Weinberger --- include/linux/mtd/nand.h | 21 +-------------------- 1 file changed, 1 insertion(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index dd9e3b5ddd4f..7d0f18ecbf57 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -366,26 +366,6 @@ struct onfi_ext_param_page { */ } __packed; -struct nand_onfi_vendor_micron { - u8 two_plane_read; - u8 read_cache; - u8 read_unique_id; - u8 dq_imped; - u8 dq_imped_num_settings; - u8 dq_imped_feat_addr; - u8 rb_pulldown_strength; - u8 rb_pulldown_strength_feat_addr; - u8 rb_pulldown_strength_num_settings; - u8 otp_mode; - u8 otp_page_start; - u8 otp_data_prot_addr; - u8 otp_num_pages; - u8 otp_feat_addr; - u8 read_retry_options; - u8 reserved[72]; - u8 param_revision; -} __packed; - struct jedec_ecc_info { u8 ecc_bits; u8 codeword_size; @@ -1117,6 +1097,7 @@ extern struct nand_flash_dev nand_flash_ids[]; extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops; extern const struct nand_manufacturer_ops samsung_nand_manuf_ops; extern const struct nand_manufacturer_ops hynix_nand_manuf_ops; +extern const struct nand_manufacturer_ops micron_nand_manuf_ops; int nand_default_bbt(struct mtd_info *mtd); int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); -- cgit v1.2.3 From 229204da53b31d576fcc1c93a33626943ea8202c Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 8 Jun 2016 10:42:23 +0200 Subject: mtd: nand: Move AMD/Spansion specific init/detection logic in nand_amd.c Move AMD/Spansion specific initialization/detection logic into nand_amd.c. This is part of the "separate vendor specific code from core" cleanup process. Signed-off-by: Boris Brezillon Acked-by: Richard Weinberger --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 7d0f18ecbf57..97dce42778e9 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1098,6 +1098,7 @@ extern const struct nand_manufacturer_ops toshiba_nand_manuf_ops; extern const struct nand_manufacturer_ops samsung_nand_manuf_ops; extern const struct nand_manufacturer_ops hynix_nand_manuf_ops; extern const struct nand_manufacturer_ops micron_nand_manuf_ops; +extern const struct nand_manufacturer_ops amd_nand_manuf_ops; int nand_default_bbt(struct mtd_info *mtd); int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); -- cgit v1.2.3 From 3b5206f4be9b65d2f0f85b3239cf117a1d0de7ce Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Wed, 8 Jun 2016 10:43:26 +0200 Subject: mtd: nand: Move Macronix specific initialization in nand_macronix.c Move Macronix specific initialization logic into nand_macronix.c. This is part of the "separate vendor specific code from core" cleanup process. Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 97dce42778e9..c7de017c7f4c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1099,6 +1099,7 @@ extern const struct nand_manufacturer_ops samsung_nand_manuf_ops; extern const struct nand_manufacturer_ops hynix_nand_manuf_ops; extern const struct nand_manufacturer_ops micron_nand_manuf_ops; extern const struct nand_manufacturer_ops amd_nand_manuf_ops; +extern const struct nand_manufacturer_ops macronix_nand_manuf_ops; int nand_default_bbt(struct mtd_info *mtd); int nand_markbad_bbt(struct mtd_info *mtd, loff_t offs); -- cgit v1.2.3 From 4b9502e63b5e2b1b5ef491919d3219b9440fe0b3 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Wed, 8 Mar 2017 10:00:40 +0200 Subject: kernel: convert css_set.refcount from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 6a3f850cabab..c74b78ecd583 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -13,6 +13,7 @@ #include #include #include +#include #include #include #include @@ -156,7 +157,7 @@ struct css_set { struct cgroup_subsys_state *subsys[CGROUP_SUBSYS_COUNT]; /* reference count */ - atomic_t refcount; + refcount_t refcount; /* the default cgroup associated with this css_set */ struct cgroup *dfl_cgrp; -- cgit v1.2.3 From 915e70f9263d56fbf103742265025f7a492aa625 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 22 Feb 2017 13:01:21 +0100 Subject: staging, android: remove lowmemory killer from the tree Lowmemory killer is sitting in the staging tree since 2008 without any serious interest for fixing issues brought up by the MM folks. The main objection is that the implementation is basically broken by design: - it hooks into slab shrinker API which is not suitable for this purpose. lowmem_count implementation just shows this nicely. There is no scaling based on the memory pressure and no feedback to the generic shrinker infrastructure. Moreover lowmem_scan is called way too often for the heavy work it performs. - it is not reclaim context aware - no NUMA and/or memcg awareness. As the code stands right now it just adds a maintenance overhead when core MM changes have to update lowmemorykiller.c as well. It also seems that the alternative LMK implementation will be solely in the userspace so this code has no perspective it seems. The staging tree is supposed to be for a code which needs to be put in shape before it can be merged which is not the case here obviously. Signed-off-by: Michal Hocko Signed-off-by: Greg Kroah-Hartman --- include/linux/sched.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d67eee84fd43..942c2250301b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1256,7 +1256,6 @@ extern struct pid *cad_pid; #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ -#define PFA_LMK_WAITING 3 /* Lowmemorykiller is waiting */ #define TASK_PFA_TEST(name, func) \ @@ -1282,9 +1281,6 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) -TASK_PFA_TEST(LMK_WAITING, lmk_waiting) -TASK_PFA_SET(LMK_WAITING, lmk_waiting) - static inline void tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) { -- cgit v1.2.3 From 967c9cca2cc50569efc65945325c173cecba83bd Mon Sep 17 00:00:00 2001 From: Jens Wiklander Date: Wed, 11 Mar 2015 14:39:39 +0100 Subject: tee: generic TEE subsystem MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initial patch for generic TEE subsystem. This subsystem provides: * Registration/un-registration of TEE drivers. * Shared memory between normal world and secure world. * Ioctl interface for interaction with user space. * Sysfs implementation_id of TEE driver A TEE (Trusted Execution Environment) driver is a driver that interfaces with a trusted OS running in some secure environment, for example, TrustZone on ARM cpus, or a separate secure co-processor etc. The TEE subsystem can serve a TEE driver for a Global Platform compliant TEE, but it's not limited to only Global Platform TEEs. This patch builds on other similar implementations trying to solve the same problem: * "optee_linuxdriver" by among others Jean-michel DELORME and Emmanuel MICHEL * "Generic TrustZone Driver" by Javier González Acked-by: Andreas Dannenberg Tested-by: Jerome Forissier (HiKey) Tested-by: Volodymyr Babchuk (RCAR H3) Tested-by: Scott Branden Reviewed-by: Javier González Signed-off-by: Jens Wiklander --- include/linux/tee_drv.h | 277 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 277 insertions(+) create mode 100644 include/linux/tee_drv.h (limited to 'include/linux') diff --git a/include/linux/tee_drv.h b/include/linux/tee_drv.h new file mode 100644 index 000000000000..0f175b8f6456 --- /dev/null +++ b/include/linux/tee_drv.h @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2015-2016, Linaro Limited + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + */ + +#ifndef __TEE_DRV_H +#define __TEE_DRV_H + +#include +#include +#include +#include + +/* + * The file describes the API provided by the generic TEE driver to the + * specific TEE driver. + */ + +#define TEE_SHM_MAPPED 0x1 /* Memory mapped by the kernel */ +#define TEE_SHM_DMA_BUF 0x2 /* Memory with dma-buf handle */ + +struct tee_device; +struct tee_shm; +struct tee_shm_pool; + +/** + * struct tee_context - driver specific context on file pointer data + * @teedev: pointer to this drivers struct tee_device + * @list_shm: List of shared memory object owned by this context + * @data: driver specific context data, managed by the driver + */ +struct tee_context { + struct tee_device *teedev; + struct list_head list_shm; + void *data; +}; + +struct tee_param_memref { + size_t shm_offs; + size_t size; + struct tee_shm *shm; +}; + +struct tee_param_value { + u64 a; + u64 b; + u64 c; +}; + +struct tee_param { + u64 attr; + union { + struct tee_param_memref memref; + struct tee_param_value value; + } u; +}; + +/** + * struct tee_driver_ops - driver operations vtable + * @get_version: returns version of driver + * @open: called when the device file is opened + * @release: release this open file + * @open_session: open a new session + * @close_session: close a session + * @invoke_func: invoke a trusted function + * @cancel_req: request cancel of an ongoing invoke or open + * @supp_revc: called for supplicant to get a command + * @supp_send: called for supplicant to send a response + */ +struct tee_driver_ops { + void (*get_version)(struct tee_device *teedev, + struct tee_ioctl_version_data *vers); + int (*open)(struct tee_context *ctx); + void (*release)(struct tee_context *ctx); + int (*open_session)(struct tee_context *ctx, + struct tee_ioctl_open_session_arg *arg, + struct tee_param *param); + int (*close_session)(struct tee_context *ctx, u32 session); + int (*invoke_func)(struct tee_context *ctx, + struct tee_ioctl_invoke_arg *arg, + struct tee_param *param); + int (*cancel_req)(struct tee_context *ctx, u32 cancel_id, u32 session); + int (*supp_recv)(struct tee_context *ctx, u32 *func, u32 *num_params, + struct tee_param *param); + int (*supp_send)(struct tee_context *ctx, u32 ret, u32 num_params, + struct tee_param *param); +}; + +/** + * struct tee_desc - Describes the TEE driver to the subsystem + * @name: name of driver + * @ops: driver operations vtable + * @owner: module providing the driver + * @flags: Extra properties of driver, defined by TEE_DESC_* below + */ +#define TEE_DESC_PRIVILEGED 0x1 +struct tee_desc { + const char *name; + const struct tee_driver_ops *ops; + struct module *owner; + u32 flags; +}; + +/** + * tee_device_alloc() - Allocate a new struct tee_device instance + * @teedesc: Descriptor for this driver + * @dev: Parent device for this device + * @pool: Shared memory pool, NULL if not used + * @driver_data: Private driver data for this device + * + * Allocates a new struct tee_device instance. The device is + * removed by tee_device_unregister(). + * + * @returns a pointer to a 'struct tee_device' or an ERR_PTR on failure + */ +struct tee_device *tee_device_alloc(const struct tee_desc *teedesc, + struct device *dev, + struct tee_shm_pool *pool, + void *driver_data); + +/** + * tee_device_register() - Registers a TEE device + * @teedev: Device to register + * + * tee_device_unregister() need to be called to remove the @teedev if + * this function fails. + * + * @returns < 0 on failure + */ +int tee_device_register(struct tee_device *teedev); + +/** + * tee_device_unregister() - Removes a TEE device + * @teedev: Device to unregister + * + * This function should be called to remove the @teedev even if + * tee_device_register() hasn't been called yet. Does nothing if + * @teedev is NULL. + */ +void tee_device_unregister(struct tee_device *teedev); + +/** + * struct tee_shm_pool_mem_info - holds information needed to create a shared + * memory pool + * @vaddr: Virtual address of start of pool + * @paddr: Physical address of start of pool + * @size: Size in bytes of the pool + */ +struct tee_shm_pool_mem_info { + unsigned long vaddr; + phys_addr_t paddr; + size_t size; +}; + +/** + * tee_shm_pool_alloc_res_mem() - Create a shared memory pool from reserved + * memory range + * @priv_info: Information for driver private shared memory pool + * @dmabuf_info: Information for dma-buf shared memory pool + * + * Start and end of pools will must be page aligned. + * + * Allocation with the flag TEE_SHM_DMA_BUF set will use the range supplied + * in @dmabuf, others will use the range provided by @priv. + * + * @returns pointer to a 'struct tee_shm_pool' or an ERR_PTR on failure. + */ +struct tee_shm_pool * +tee_shm_pool_alloc_res_mem(struct tee_shm_pool_mem_info *priv_info, + struct tee_shm_pool_mem_info *dmabuf_info); + +/** + * tee_shm_pool_free() - Free a shared memory pool + * @pool: The shared memory pool to free + * + * The must be no remaining shared memory allocated from this pool when + * this function is called. + */ +void tee_shm_pool_free(struct tee_shm_pool *pool); + +/** + * tee_get_drvdata() - Return driver_data pointer + * @returns the driver_data pointer supplied to tee_register(). + */ +void *tee_get_drvdata(struct tee_device *teedev); + +/** + * tee_shm_alloc() - Allocate shared memory + * @ctx: Context that allocates the shared memory + * @size: Requested size of shared memory + * @flags: Flags setting properties for the requested shared memory. + * + * Memory allocated as global shared memory is automatically freed when the + * TEE file pointer is closed. The @flags field uses the bits defined by + * TEE_SHM_* above. TEE_SHM_MAPPED must currently always be set. If + * TEE_SHM_DMA_BUF global shared memory will be allocated and associated + * with a dma-buf handle, else driver private memory. + * + * @returns a pointer to 'struct tee_shm' + */ +struct tee_shm *tee_shm_alloc(struct tee_context *ctx, size_t size, u32 flags); + +/** + * tee_shm_free() - Free shared memory + * @shm: Handle to shared memory to free + */ +void tee_shm_free(struct tee_shm *shm); + +/** + * tee_shm_put() - Decrease reference count on a shared memory handle + * @shm: Shared memory handle + */ +void tee_shm_put(struct tee_shm *shm); + +/** + * tee_shm_va2pa() - Get physical address of a virtual address + * @shm: Shared memory handle + * @va: Virtual address to tranlsate + * @pa: Returned physical address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_va2pa(struct tee_shm *shm, void *va, phys_addr_t *pa); + +/** + * tee_shm_pa2va() - Get virtual address of a physical address + * @shm: Shared memory handle + * @pa: Physical address to tranlsate + * @va: Returned virtual address + * @returns 0 on success and < 0 on failure + */ +int tee_shm_pa2va(struct tee_shm *shm, phys_addr_t pa, void **va); + +/** + * tee_shm_get_va() - Get virtual address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @returns virtual address of the shared memory + offs if offs is within + * the bounds of this shared memory, else an ERR_PTR + */ +void *tee_shm_get_va(struct tee_shm *shm, size_t offs); + +/** + * tee_shm_get_pa() - Get physical address of a shared memory plus an offset + * @shm: Shared memory handle + * @offs: Offset from start of this shared memory + * @pa: Physical address to return + * @returns 0 if offs is within the bounds of this shared memory, else an + * error code. + */ +int tee_shm_get_pa(struct tee_shm *shm, size_t offs, phys_addr_t *pa); + +/** + * tee_shm_get_id() - Get id of a shared memory object + * @shm: Shared memory handle + * @returns id + */ +int tee_shm_get_id(struct tee_shm *shm); + +/** + * tee_shm_get_from_id() - Find shared memory object and increase reference + * count + * @ctx: Context owning the shared memory + * @id: Id of shared memory object + * @returns a pointer to 'struct tee_shm' on success or an ERR_PTR on failure + */ +struct tee_shm *tee_shm_get_from_id(struct tee_context *ctx, int id); + +#endif /*__TEE_DRV_H*/ -- cgit v1.2.3 From 688769f643bfce894f14dc7141bfc6c010f52750 Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Thu, 9 Mar 2017 15:45:14 -0600 Subject: PCI/MSI: Make pci_msi_shutdown() and pci_msix_shutdown() static pci_msi_shutdown() and pci_msix_shutdown() are used only in drivers/pci/msi.c, so make them static. Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eb3da1a04e6c..10917c122974 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1297,11 +1297,9 @@ struct msix_entry { #ifdef CONFIG_PCI_MSI int pci_msi_vec_count(struct pci_dev *dev); -void pci_msi_shutdown(struct pci_dev *dev); void pci_disable_msi(struct pci_dev *dev); int pci_msix_vec_count(struct pci_dev *dev); int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec); -void pci_msix_shutdown(struct pci_dev *dev); void pci_disable_msix(struct pci_dev *dev); void pci_restore_msi_state(struct pci_dev *dev); int pci_msi_enabled(void); @@ -1327,13 +1325,11 @@ int pci_irq_get_node(struct pci_dev *pdev, int vec); #else static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } -static inline void pci_msi_shutdown(struct pci_dev *dev) { } static inline void pci_disable_msi(struct pci_dev *dev) { } static inline int pci_msix_vec_count(struct pci_dev *dev) { return -ENOSYS; } static inline int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec) { return -ENOSYS; } -static inline void pci_msix_shutdown(struct pci_dev *dev) { } static inline void pci_disable_msix(struct pci_dev *dev) { } static inline void pci_restore_msi_state(struct pci_dev *dev) { } static inline int pci_msi_enabled(void) { return 0; } -- cgit v1.2.3 From abb521e36b9286c262971974ebaeda2d67dadd86 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 8 Mar 2017 08:57:00 -0800 Subject: ethtool: add CRC32 as an RSS hash function CRC32 engines are usually easily available in hardware and generate OK spread for RSS hash. Add CRC32 RSS hash function to ethtool API. Signed-off-by: Jakub Kicinski Signed-off-by: David S. Miller --- include/linux/ethtool.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 9ded8c6d8176..83cc9863444b 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -60,6 +60,7 @@ enum ethtool_phys_id_state { enum { ETH_RSS_HASH_TOP_BIT, /* Configurable RSS hash function - Toeplitz */ ETH_RSS_HASH_XOR_BIT, /* Configurable RSS hash function - Xor */ + ETH_RSS_HASH_CRC32_BIT, /* Configurable RSS hash function - Crc32 */ /* * Add your fresh new hash function bits above and remember to update @@ -73,6 +74,7 @@ enum { #define ETH_RSS_HASH_TOP __ETH_RSS_HASH(TOP) #define ETH_RSS_HASH_XOR __ETH_RSS_HASH(XOR) +#define ETH_RSS_HASH_CRC32 __ETH_RSS_HASH(CRC32) #define ETH_RSS_HASH_UNKNOWN 0 #define ETH_RSS_HASH_NO_CHANGE 0 -- cgit v1.2.3 From d976a525c371276cebd2517349d1d3568a0e48b5 Mon Sep 17 00:00:00 2001 From: Joao Pinto Date: Fri, 10 Mar 2017 18:24:51 +0000 Subject: net: stmmac: multiple queues dt configuration This patch adds the multiple queues configuration in the Device Tree. It was also created a set of structures to keep the RX and TX queues configurations to be used in the driver. Signed-off-by: Joao Pinto Signed-off-by: David S. Miller --- include/linux/stmmac.h | 30 ++++++++++++++++++++++++++++++ 1 file changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index fc273e9d5f67..266ff2af91e5 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -28,6 +28,9 @@ #include +#define MTL_MAX_RX_QUEUES 8 +#define MTL_MAX_TX_QUEUES 8 + #define STMMAC_RX_COE_NONE 0 #define STMMAC_RX_COE_TYPE1 1 #define STMMAC_RX_COE_TYPE2 2 @@ -44,6 +47,18 @@ #define STMMAC_CSR_150_250M 0x4 /* MDC = clk_scr_i/102 */ #define STMMAC_CSR_250_300M 0x5 /* MDC = clk_scr_i/122 */ +/* MTL algorithms identifiers */ +#define MTL_TX_ALGORITHM_WRR 0x0 +#define MTL_TX_ALGORITHM_WFQ 0x1 +#define MTL_TX_ALGORITHM_DWRR 0x2 +#define MTL_TX_ALGORITHM_SP 0x3 +#define MTL_RX_ALGORITHM_SP 0x4 +#define MTL_RX_ALGORITHM_WSP 0x5 + +/* RX Queue Mode */ +#define MTL_RX_DCB 0x0 +#define MTL_RX_AVB 0x1 + /* The MDC clock could be set higher than the IEEE 802.3 * specified frequency limit 0f 2.5 MHz, by programming a clock divider * of value different than the above defined values. The resultant MDIO @@ -109,6 +124,15 @@ struct stmmac_axi { bool axi_rb; }; +struct stmmac_rxq_cfg { + u8 mode_to_use; + u8 chan; +}; + +struct stmmac_txq_cfg { + u8 weight; +}; + struct plat_stmmacenet_data { int bus_id; int phy_addr; @@ -133,6 +157,12 @@ struct plat_stmmacenet_data { int unicast_filter_entries; int tx_fifo_size; int rx_fifo_size; + u8 rx_queues_to_use; + u8 tx_queues_to_use; + u8 rx_sched_algorithm; + u8 tx_sched_algorithm; + struct stmmac_rxq_cfg rx_queues_cfg[MTL_MAX_RX_QUEUES]; + struct stmmac_txq_cfg tx_queues_cfg[MTL_MAX_TX_QUEUES]; void (*fix_mac_speed)(void *priv, unsigned int speed); int (*init)(struct platform_device *pdev, void *priv); void (*exit)(struct platform_device *pdev, void *priv); -- cgit v1.2.3 From 19d9187317979cf0c25f67017d2676149abc46b2 Mon Sep 17 00:00:00 2001 From: Joao Pinto Date: Fri, 10 Mar 2017 18:24:59 +0000 Subject: net: stmmac: configuration of CBS in case of a TX AVB queue This patch adds the configuration of the AVB Credit-Based Shaper. Signed-off-by: Joao Pinto Signed-off-by: David S. Miller --- include/linux/stmmac.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 266ff2af91e5..be47b859e954 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -55,9 +55,9 @@ #define MTL_RX_ALGORITHM_SP 0x4 #define MTL_RX_ALGORITHM_WSP 0x5 -/* RX Queue Mode */ -#define MTL_RX_DCB 0x0 -#define MTL_RX_AVB 0x1 +/* RX/TX Queue Mode */ +#define MTL_QUEUE_DCB 0x0 +#define MTL_QUEUE_AVB 0x1 /* The MDC clock could be set higher than the IEEE 802.3 * specified frequency limit 0f 2.5 MHz, by programming a clock divider @@ -131,6 +131,12 @@ struct stmmac_rxq_cfg { struct stmmac_txq_cfg { u8 weight; + u8 mode_to_use; + /* Credit Base Shaper parameters */ + u32 send_slope; + u32 idle_slope; + u32 high_credit; + u32 low_credit; }; struct plat_stmmacenet_data { -- cgit v1.2.3 From bd174169c7a12a37b3b4aa2221f084ade010b182 Mon Sep 17 00:00:00 2001 From: David Windsor Date: Fri, 10 Mar 2017 10:34:12 -0500 Subject: locking/refcount: Add refcount_t API kernel-doc comments Signed-off-by: David Windsor Acked-by: Peter Zijlstra Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: elena.reshetova@intel.com Cc: kernel-hardening@lists.openwall.com Link: http://lkml.kernel.org/r/1489160052-20293-1-git-send-email-dwindsor@gmail.com Signed-off-by: Ingo Molnar --- include/linux/refcount.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/refcount.h b/include/linux/refcount.h index 0023fee4bbbc..b34aa649d204 100644 --- a/include/linux/refcount.h +++ b/include/linux/refcount.h @@ -6,17 +6,36 @@ #include #include +/** + * refcount_t - variant of atomic_t specialized for reference counts + * @refs: atomic_t counter field + * + * The counter saturates at UINT_MAX and will not move once + * there. This avoids wrapping the counter and causing 'spurious' + * use-after-free bugs. + */ typedef struct refcount_struct { atomic_t refs; } refcount_t; #define REFCOUNT_INIT(n) { .refs = ATOMIC_INIT(n), } +/** + * refcount_set - set a refcount's value + * @r: the refcount + * @n: value to which the refcount will be set + */ static inline void refcount_set(refcount_t *r, unsigned int n) { atomic_set(&r->refs, n); } +/** + * refcount_read - get a refcount's value + * @r: the refcount + * + * Return: the refcount's value + */ static inline unsigned int refcount_read(const refcount_t *r) { return atomic_read(&r->refs); -- cgit v1.2.3 From 0c68f666d4cc8835ed888ffdd58f76d4d8e2da51 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Sat, 11 Mar 2017 16:13:03 -0500 Subject: etherdevice: remove unused eth_addr_greater eth_addr_greater() was introduced for the mv88e6xxx driver, but is not used anymore. There is no other user, thus remove this function. Signed-off-by: Vivien Didelot Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/etherdevice.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index c62b709b1ce0..2d9f80848d4b 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -446,21 +446,6 @@ static inline void eth_addr_dec(u8 *addr) u64_to_ether_addr(u, addr); } -/** - * ether_addr_greater - Compare two Ethernet addresses - * @addr1: Pointer to a six-byte array containing the Ethernet address - * @addr2: Pointer other six-byte array containing the Ethernet address - * - * Compare two Ethernet addresses, returns true addr1 is greater than addr2 - */ -static inline bool ether_addr_greater(const u8 *addr1, const u8 *addr2) -{ - u64 u1 = ether_addr_to_u64(addr1); - u64 u2 = ether_addr_to_u64(addr2); - - return u1 > u2; -} - /** * is_etherdev_addr - Tell if given Ethernet address belongs to the device. * @dev: Pointer to a device structure -- cgit v1.2.3 From 1b028f784e8c341e762c264f70dc0ca1418c8b7a Mon Sep 17 00:00:00 2001 From: Dmitry Safonov Date: Mon, 6 Mar 2017 17:17:19 +0300 Subject: x86/mm: Introduce mmap_compat_base() for 32-bit mmap() mmap() uses a base address, from which it starts to look for a free space for allocation. The base address is stored in mm->mmap_base, which is calculated during exec(). The address depends on task's size, set rlimit for stack, ASLR randomization. The base depends on the task size and the number of random bits which are different for 64-bit and 32bit applications. Due to the fact, that the base address is fixed, its mmap() from a compat (32bit) syscall issued by a 64bit task will return a address which is based on the 64bit base address and does not fit into the 32bit address space (4GB). The returned pointer is truncated to 32bit, which results in an invalid address. To solve store a seperate compat address base plus a compat legacy address base in mm_struct. These bases are calculated at exec() time and can be used later to address the 32bit compat mmap() issued by 64 bit applications. As a consequence of this change 32-bit applications issuing a 64-bit syscall (after doing a long jump) will get a 64-bit mapping now. Before this change 32-bit applications always got a 32bit mapping. [ tglx: Massaged changelog and added a comment ] Signed-off-by: Dmitry Safonov Cc: 0x7f454c46@gmail.com Cc: linux-mm@kvack.org Cc: Andy Lutomirski Cc: Cyrill Gorcunov Cc: Borislav Petkov Cc: "Kirill A. Shutemov" Link: http://lkml.kernel.org/r/20170306141721.9188-4-dsafonov@virtuozzo.com Signed-off-by: Thomas Gleixner --- include/linux/mm_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index f60f45fe226f..45cdb27791a3 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -367,6 +367,11 @@ struct mm_struct { #endif unsigned long mmap_base; /* base of mmap area */ unsigned long mmap_legacy_base; /* base of mmap area in bottom-up allocations */ +#ifdef CONFIG_HAVE_ARCH_COMPAT_MMAP_BASES + /* Base adresses for compatible mmap() */ + unsigned long mmap_compat_base; + unsigned long mmap_compat_legacy_base; +#endif unsigned long task_size; /* size of task vm space */ unsigned long highest_vm_end; /* highest vma end address */ pgd_t * pgd; -- cgit v1.2.3 From c6182ac96096f35c7216e4e6a3c64c7374dadeb7 Mon Sep 17 00:00:00 2001 From: George McCollister Date: Thu, 9 Mar 2017 08:14:43 -0600 Subject: regulator: pfuze100-regulator: add coin support Add support for PF0200 coin cell/super capacitor charger which works as a current limited voltage source via the LICELL pin. When VIN goes below a certain threshold LICELL is used to provide power for VSNVS which is usually used to hold up secure non-volatile storage and the real-time clock on the SoC. Signed-off-by: George McCollister Signed-off-by: Mark Brown --- include/linux/regulator/pfuze100.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regulator/pfuze100.h b/include/linux/regulator/pfuze100.h index 70c6c66c5bcf..e0ccf46f66cf 100644 --- a/include/linux/regulator/pfuze100.h +++ b/include/linux/regulator/pfuze100.h @@ -48,6 +48,7 @@ #define PFUZE200_VGEN4 10 #define PFUZE200_VGEN5 11 #define PFUZE200_VGEN6 12 +#define PFUZE200_COIN 13 #define PFUZE3000_SW1A 0 #define PFUZE3000_SW1B 1 -- cgit v1.2.3 From e422267322cd319e2695a535e47c5b1feeac45eb Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Wed, 8 Mar 2017 02:11:36 +0530 Subject: perf: Add PERF_RECORD_NAMESPACES to include namespaces related info With the advert of container technologies like docker, that depend on namespaces for isolation, there is a need for tracing support for namespaces. This patch introduces new PERF_RECORD_NAMESPACES event for recording namespaces related info. By recording info for every namespace, it is left to userspace to take a call on the definition of a container and trace containers by updating perf tool accordingly. Each namespace has a combination of device and inode numbers. Though every namespace has the same device number currently, that may change in future to avoid the need for a namespace of namespaces. Considering such possibility, record both device and inode numbers separately for each namespace. Signed-off-by: Hari Bathini Acked-by: Jiri Olsa Acked-by: Peter Zijlstra Cc: Alexander Shishkin Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Aravinda Prasad Cc: Brendan Gregg Cc: Daniel Borkmann Cc: Eric Biederman Cc: Sargun Dhillon Cc: Steven Rostedt Link: http://lkml.kernel.org/r/148891929686.25309.2827618988917007768.stgit@hbathini.in.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/perf_event.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 000fdb211c7d..f19a82362851 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1112,6 +1112,7 @@ extern int perf_unregister_guest_info_callbacks(struct perf_guest_info_callbacks extern void perf_event_exec(void); extern void perf_event_comm(struct task_struct *tsk, bool exec); +extern void perf_event_namespaces(struct task_struct *tsk); extern void perf_event_fork(struct task_struct *tsk); /* Callchains */ @@ -1315,6 +1316,7 @@ static inline int perf_unregister_guest_info_callbacks static inline void perf_event_mmap(struct vm_area_struct *vma) { } static inline void perf_event_exec(void) { } static inline void perf_event_comm(struct task_struct *tsk, bool exec) { } +static inline void perf_event_namespaces(struct task_struct *tsk) { } static inline void perf_event_fork(struct task_struct *tsk) { } static inline void perf_event_init(void) { } static inline int perf_swevent_get_recursion_context(void) { return -1; } -- cgit v1.2.3 From be086e7c53f1fac51eed14523b28f2214b548dd2 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Sat, 11 Mar 2017 18:39:18 +0200 Subject: qed*: Utilize Firmware 8.15.3.0 This patch advances the qed* drivers into using the newer firmware - This solves several firmware bugs, mostly related [but not limited to] various init/deinit issues in various offloaded protocols. It also introduces a major 4-Cached SGE change in firmware, which can be seen in the storage drivers' changes. In addition, this firmware is required for supporting the new QL41xxx series of adapters; While this patch doesn't add the actual support, the firmware contains the necessary initialization & firmware logic to operate such adapters [actual support would be added later on]. Changes from Previous versions: ------------------------------- - V2 - fix kbuild-test robot warnings Signed-off-by: Tomer Tayar Signed-off-by: Ram Amrani Signed-off-by: Manish Rangankar Signed-off-by: Chad Dupuis Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 30 ++++- include/linux/qed/eth_common.h | 3 + include/linux/qed/fcoe_common.h | 180 +++++++++++---------------- include/linux/qed/iscsi_common.h | 241 ++++++++++++++----------------------- include/linux/qed/roce_common.h | 17 +++ include/linux/qed/storage_common.h | 30 ++++- include/linux/qed/tcp_common.h | 1 + 7 files changed, 229 insertions(+), 273 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h index 52966b9bfde3..fbab6e0514f0 100644 --- a/include/linux/qed/common_hsi.h +++ b/include/linux/qed/common_hsi.h @@ -100,8 +100,8 @@ #define MAX_NUM_LL2_TX_STATS_COUNTERS 32 #define FW_MAJOR_VERSION 8 -#define FW_MINOR_VERSION 10 -#define FW_REVISION_VERSION 10 +#define FW_MINOR_VERSION 15 +#define FW_REVISION_VERSION 3 #define FW_ENGINEERING_VERSION 0 /***********************/ @@ -187,6 +187,9 @@ /* DEMS */ #define DQ_DEMS_LEGACY 0 +#define DQ_DEMS_TOE_MORE_TO_SEND 3 +#define DQ_DEMS_TOE_LOCAL_ADV_WND 4 +#define DQ_DEMS_ROCE_CQ_CONS 7 /* XCM agg val selection */ #define DQ_XCM_AGG_VAL_SEL_WORD2 0 @@ -214,6 +217,9 @@ #define DQ_XCM_ISCSI_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3 #define DQ_XCM_ISCSI_EXP_STAT_SN_CMD DQ_XCM_AGG_VAL_SEL_REG6 #define DQ_XCM_ROCE_SQ_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_TOE_TX_BD_PROD_CMD DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_TOE_MORE_TO_SEND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG3 +#define DQ_XCM_TOE_LOCAL_ADV_WND_SEQ_CMD DQ_XCM_AGG_VAL_SEL_REG4 /* UCM agg val selection (HW) */ #define DQ_UCM_AGG_VAL_SEL_WORD0 0 @@ -269,6 +275,8 @@ #define DQ_XCM_ISCSI_DQ_FLUSH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) #define DQ_XCM_ISCSI_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) #define DQ_XCM_ISCSI_PROC_ONLY_CLEANUP_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF23) +#define DQ_XCM_TOE_DQ_FLUSH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_TOE_SLOW_PATH_CMD BIT(DQ_XCM_AGG_FLG_SHIFT_CF22) /* UCM agg counter flag selection (HW) */ #define DQ_UCM_AGG_FLG_SHIFT_CF0 0 @@ -285,6 +293,9 @@ #define DQ_UCM_ETH_PMD_RX_ARM_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) #define DQ_UCM_ROCE_CQ_ARM_SE_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF4) #define DQ_UCM_ROCE_CQ_ARM_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) +#define DQ_UCM_TOE_TIMER_STOP_ALL_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF3) +#define DQ_UCM_TOE_SLOW_PATH_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF4) +#define DQ_UCM_TOE_DQ_CF_CMD BIT(DQ_UCM_AGG_FLG_SHIFT_CF5) /* TCM agg counter flag selection (HW) */ #define DQ_TCM_AGG_FLG_SHIFT_CF0 0 @@ -301,6 +312,9 @@ #define DQ_TCM_FCOE_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) #define DQ_TCM_ISCSI_FLUSH_Q0_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) #define DQ_TCM_ISCSI_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) +#define DQ_TCM_TOE_FLUSH_Q0_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) +#define DQ_TCM_TOE_TIMER_STOP_ALL_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF3) +#define DQ_TCM_IWARP_POST_RQ_CF_CMD BIT(DQ_TCM_AGG_FLG_SHIFT_CF1) /* PWM address mapping */ #define DQ_PWM_OFFSET_DPM_BASE 0x0 @@ -689,6 +703,16 @@ struct iscsi_eqe_data { #define ISCSI_EQE_DATA_RESERVED0_SHIFT 7 }; +struct rdma_eqe_destroy_qp { + __le32 cid; + u8 reserved[4]; +}; + +union rdma_eqe_data { + struct regpair async_handle; + struct rdma_eqe_destroy_qp rdma_destroy_qp_data; +}; + struct malicious_vf_eqe_data { u8 vf_id; u8 err_id; @@ -705,9 +729,9 @@ union event_ring_data { u8 bytes[8]; struct vf_pf_channel_eqe_data vf_pf_channel; struct iscsi_eqe_data iscsi_info; + union rdma_eqe_data rdma_data; struct malicious_vf_eqe_data malicious_vf; struct initial_cleanup_eqe_data vf_init_cleanup; - struct regpair roce_handle; }; /* Event Ring Entry */ diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h index 4b402fb0eaad..34d93eb5bfba 100644 --- a/include/linux/qed/eth_common.h +++ b/include/linux/qed/eth_common.h @@ -49,6 +49,9 @@ #define ETH_RX_CQE_PAGE_SIZE_BYTES 4096 #define ETH_RX_NUM_NEXT_PAGE_BDS 2 +#define ETH_MAX_TUNN_LSO_INNER_IPV4_OFFSET 253 +#define ETH_MAX_TUNN_LSO_INNER_IPV6_OFFSET 251 + #define ETH_TX_MIN_BDS_PER_NON_LSO_PKT 1 #define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET 18 #define ETH_TX_MAX_BDS_PER_LSO_PACKET 255 diff --git a/include/linux/qed/fcoe_common.h b/include/linux/qed/fcoe_common.h index 2e417a45c5f7..947a635d04bb 100644 --- a/include/linux/qed/fcoe_common.h +++ b/include/linux/qed/fcoe_common.h @@ -109,13 +109,6 @@ struct fcoe_conn_terminate_ramrod_data { struct regpair terminate_params_addr; }; -struct fcoe_fast_sgl_ctx { - struct regpair sgl_start_addr; - __le32 sgl_byte_offset; - __le16 task_reuse_cnt; - __le16 init_offset_in_first_sge; -}; - struct fcoe_slow_sgl_ctx { struct regpair base_sgl_addr; __le16 curr_sge_off; @@ -124,23 +117,16 @@ struct fcoe_slow_sgl_ctx { __le16 reserved; }; -struct fcoe_sge { - struct regpair sge_addr; - __le16 size; - __le16 reserved0; - u8 reserved1[3]; - u8 is_valid_sge; -}; - -union fcoe_data_desc_ctx { - struct fcoe_fast_sgl_ctx fast; - struct fcoe_slow_sgl_ctx slow; - struct fcoe_sge single_sge; -}; - union fcoe_dix_desc_ctx { struct fcoe_slow_sgl_ctx dix_sgl; - struct fcoe_sge cached_dix_sge; + struct scsi_sge cached_dix_sge; +}; + +struct fcoe_fast_sgl_ctx { + struct regpair sgl_start_addr; + __le32 sgl_byte_offset; + __le16 task_reuse_cnt; + __le16 init_offset_in_first_sge; }; struct fcoe_fcp_cmd_payload { @@ -172,57 +158,6 @@ enum fcoe_mode_type { MAX_FCOE_MODE_TYPE }; -struct fcoe_mstorm_fcoe_task_st_ctx_fp { - __le16 flags; -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_RSRV0_MASK 0x7FFF -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_RSRV0_SHIFT 0 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_MP_INCLUDE_FC_HEADER_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_FP_MP_INCLUDE_FC_HEADER_SHIFT 15 - __le16 difDataResidue; - __le16 parent_id; - __le16 single_sge_saved_offset; - __le32 data_2_trns_rem; - __le32 offset_in_io; - union fcoe_dix_desc_ctx dix_desc; - union fcoe_data_desc_ctx data_desc; -}; - -struct fcoe_mstorm_fcoe_task_st_ctx_non_fp { - __le16 flags; -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HOST_INTERFACE_MASK 0x3 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HOST_INTERFACE_SHIFT 0 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_TO_PEER_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_TO_PEER_SHIFT 2 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_APP_TAG_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_APP_TAG_SHIFT 3 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_INTERVAL_SIZE_LOG_MASK 0xF -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_INTERVAL_SIZE_LOG_SHIFT 4 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_BLOCK_SIZE_MASK 0x3 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_BLOCK_SIZE_SHIFT 8 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RESERVED_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RESERVED_SHIFT 10 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HAS_FIRST_PACKET_ARRIVED_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_HAS_FIRST_PACKET_ARRIVED_SHIFT 11 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_REF_TAG_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_VALIDATE_DIX_REF_TAG_SHIFT 12 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_CACHED_SGE_FLG_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIX_CACHED_SGE_FLG_SHIFT 13 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_OFFSET_IN_IO_VALID_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_OFFSET_IN_IO_VALID_SHIFT 14 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_SUPPORTED_MASK 0x1 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_DIF_SUPPORTED_SHIFT 15 - u8 tx_rx_sgl_mode; -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE_MASK 0x7 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_TX_SGL_MODE_SHIFT 0 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE_MASK 0x7 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RX_SGL_MODE_SHIFT 3 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RSRV1_MASK 0x3 -#define FCOE_MSTORM_FCOE_TASK_ST_CTX_NON_FP_RSRV1_SHIFT 6 - u8 rsrv2; - __le32 num_prm_zero_read; - struct regpair rsp_buf_addr; -}; - struct fcoe_rx_stat { struct regpair fcoe_rx_byte_cnt; struct regpair fcoe_rx_data_pkt_cnt; @@ -236,16 +171,6 @@ struct fcoe_rx_stat { __le32 rsrv; }; -enum fcoe_sgl_mode { - FCOE_SLOW_SGL, - FCOE_SINGLE_FAST_SGE, - FCOE_2_FAST_SGE, - FCOE_3_FAST_SGE, - FCOE_4_FAST_SGE, - FCOE_MUL_FAST_SGES, - MAX_FCOE_SGL_MODE -}; - struct fcoe_stat_ramrod_data { struct regpair stat_params_addr; }; @@ -328,22 +253,24 @@ union fcoe_tx_info_union_ctx { struct ystorm_fcoe_task_st_ctx { u8 task_type; u8 sgl_mode; -#define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK 0x7 +#define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK 0x1 #define YSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_SHIFT 0 -#define YSTORM_FCOE_TASK_ST_CTX_RSRV_MASK 0x1F -#define YSTORM_FCOE_TASK_ST_CTX_RSRV_SHIFT 3 +#define YSTORM_FCOE_TASK_ST_CTX_RSRV_MASK 0x7F +#define YSTORM_FCOE_TASK_ST_CTX_RSRV_SHIFT 1 u8 cached_dix_sge; u8 expect_first_xfer; __le32 num_pbf_zero_write; union protection_info_union_ctx protection_info_union; __le32 data_2_trns_rem; + struct scsi_sgl_params sgl_params; + u8 reserved1[12]; union fcoe_tx_info_union_ctx tx_info_union; union fcoe_dix_desc_ctx dix_desc; - union fcoe_data_desc_ctx data_desc; + struct scsi_cached_sges data_desc; __le16 ox_id; __le16 rx_id; __le32 task_rety_identifier; - __le32 reserved1[2]; + u8 reserved2[8]; }; struct ystorm_fcoe_task_ag_ctx { @@ -484,22 +411,22 @@ struct tstorm_fcoe_task_ag_ctx { struct fcoe_tstorm_fcoe_task_st_ctx_read_write { union fcoe_cleanup_addr_exp_ro_union cleanup_addr_exp_ro_union; __le16 flags; -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_MASK 0x7 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_MASK 0x1 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RX_SGL_MODE_SHIFT 0 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_MASK 0x1 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_SHIFT 3 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_EXP_FIRST_FRAME_SHIFT 1 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_MASK 0x1 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_SHIFT 4 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_ACTIVE_SHIFT 2 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_MASK 0x1 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_SHIFT 5 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SEQ_TIMEOUT_SHIFT 3 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_MASK 0x1 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_SHIFT 6 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_SINGLE_PKT_IN_EX_SHIFT 4 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_MASK 0x1 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_SHIFT 7 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_OOO_RX_SEQ_STAT_SHIFT 5 #define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_MASK 0x3 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_SHIFT 8 -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_MASK 0x3F -#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_SHIFT 10 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_CQ_ADD_ADV_SHIFT 6 +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_MASK 0xFF +#define FCOE_TSTORM_FCOE_TASK_ST_CTX_READ_WRITE_RSRV1_SHIFT 8 __le16 seq_cnt; u8 seq_id; u8 ooo_rx_seq_id; @@ -582,8 +509,34 @@ struct mstorm_fcoe_task_ag_ctx { }; struct mstorm_fcoe_task_st_ctx { - struct fcoe_mstorm_fcoe_task_st_ctx_non_fp non_fp; - struct fcoe_mstorm_fcoe_task_st_ctx_fp fp; + struct regpair rsp_buf_addr; + __le32 rsrv[2]; + struct scsi_sgl_params sgl_params; + __le32 data_2_trns_rem; + __le32 data_buffer_offset; + __le16 parent_id; + __le16 flags; +#define MSTORM_FCOE_TASK_ST_CTX_INTERVAL_SIZE_LOG_MASK 0xF +#define MSTORM_FCOE_TASK_ST_CTX_INTERVAL_SIZE_LOG_SHIFT 0 +#define MSTORM_FCOE_TASK_ST_CTX_HOST_INTERFACE_MASK 0x3 +#define MSTORM_FCOE_TASK_ST_CTX_HOST_INTERFACE_SHIFT 4 +#define MSTORM_FCOE_TASK_ST_CTX_DIF_TO_PEER_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_DIF_TO_PEER_SHIFT 6 +#define MSTORM_FCOE_TASK_ST_CTX_MP_INCLUDE_FC_HEADER_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_MP_INCLUDE_FC_HEADER_SHIFT 7 +#define MSTORM_FCOE_TASK_ST_CTX_DIX_BLOCK_SIZE_MASK 0x3 +#define MSTORM_FCOE_TASK_ST_CTX_DIX_BLOCK_SIZE_SHIFT 8 +#define MSTORM_FCOE_TASK_ST_CTX_VALIDATE_DIX_REF_TAG_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_VALIDATE_DIX_REF_TAG_SHIFT 10 +#define MSTORM_FCOE_TASK_ST_CTX_DIX_CACHED_SGE_FLG_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_DIX_CACHED_SGE_FLG_SHIFT 11 +#define MSTORM_FCOE_TASK_ST_CTX_DIF_SUPPORTED_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_DIF_SUPPORTED_SHIFT 12 +#define MSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_MASK 0x1 +#define MSTORM_FCOE_TASK_ST_CTX_TX_SGL_MODE_SHIFT 13 +#define MSTORM_FCOE_TASK_ST_CTX_RESERVED_MASK 0x3 +#define MSTORM_FCOE_TASK_ST_CTX_RESERVED_SHIFT 14 + struct scsi_cached_sges data_desc; }; struct ustorm_fcoe_task_ag_ctx { @@ -646,6 +599,7 @@ struct ustorm_fcoe_task_ag_ctx { struct fcoe_task_context { struct ystorm_fcoe_task_st_ctx ystorm_st_context; + struct regpair ystorm_st_padding[2]; struct tdif_task_context tdif_context; struct ystorm_fcoe_task_ag_ctx ystorm_ag_context; struct tstorm_fcoe_task_ag_ctx tstorm_ag_context; @@ -668,20 +622,20 @@ struct fcoe_tx_stat { struct fcoe_wqe { __le16 task_id; __le16 flags; -#define FCOE_WQE_REQ_TYPE_MASK 0xF -#define FCOE_WQE_REQ_TYPE_SHIFT 0 -#define FCOE_WQE_SGL_MODE_MASK 0x7 -#define FCOE_WQE_SGL_MODE_SHIFT 4 -#define FCOE_WQE_CONTINUATION_MASK 0x1 -#define FCOE_WQE_CONTINUATION_SHIFT 7 -#define FCOE_WQE_INVALIDATE_PTU_MASK 0x1 -#define FCOE_WQE_INVALIDATE_PTU_SHIFT 8 -#define FCOE_WQE_SUPER_IO_MASK 0x1 -#define FCOE_WQE_SUPER_IO_SHIFT 9 -#define FCOE_WQE_SEND_AUTO_RSP_MASK 0x1 -#define FCOE_WQE_SEND_AUTO_RSP_SHIFT 10 -#define FCOE_WQE_RESERVED0_MASK 0x1F -#define FCOE_WQE_RESERVED0_SHIFT 11 +#define FCOE_WQE_REQ_TYPE_MASK 0xF +#define FCOE_WQE_REQ_TYPE_SHIFT 0 +#define FCOE_WQE_SGL_MODE_MASK 0x1 +#define FCOE_WQE_SGL_MODE_SHIFT 4 +#define FCOE_WQE_CONTINUATION_MASK 0x1 +#define FCOE_WQE_CONTINUATION_SHIFT 5 +#define FCOE_WQE_SEND_AUTO_RSP_MASK 0x1 +#define FCOE_WQE_SEND_AUTO_RSP_SHIFT 6 +#define FCOE_WQE_RESERVED_MASK 0x1 +#define FCOE_WQE_RESERVED_SHIFT 7 +#define FCOE_WQE_NUM_SGES_MASK 0xF +#define FCOE_WQE_NUM_SGES_SHIFT 8 +#define FCOE_WQE_RESERVED1_MASK 0xF +#define FCOE_WQE_RESERVED1_SHIFT 12 union fcoe_additional_info_union additional_info_union; }; diff --git a/include/linux/qed/iscsi_common.h b/include/linux/qed/iscsi_common.h index 4c5747babcf6..69949f8e354b 100644 --- a/include/linux/qed/iscsi_common.h +++ b/include/linux/qed/iscsi_common.h @@ -39,17 +39,9 @@ /* iSCSI HSI constants */ #define ISCSI_DEFAULT_MTU (1500) -/* Current iSCSI HSI version number composed of two fields (16 bit) */ -#define ISCSI_HSI_MAJOR_VERSION (0) -#define ISCSI_HSI_MINOR_VERSION (0) - /* KWQ (kernel work queue) layer codes */ #define ISCSI_SLOW_PATH_LAYER_CODE (6) -/* CQE completion status */ -#define ISCSI_EQE_COMPLETION_SUCCESS (0x0) -#define ISCSI_EQE_RST_CONN_RCVD (0x1) - /* iSCSI parameter defaults */ #define ISCSI_DEFAULT_HEADER_DIGEST (0) #define ISCSI_DEFAULT_DATA_DIGEST (0) @@ -68,6 +60,10 @@ #define ISCSI_MIN_VAL_MAX_OUTSTANDING_R2T (1) #define ISCSI_MAX_VAL_MAX_OUTSTANDING_R2T (0xff) +#define ISCSI_AHS_CNTL_SIZE 4 + +#define ISCSI_WQE_NUM_SGES_SLOWIO (0xf) + /* iSCSI reserved params */ #define ISCSI_ITT_ALL_ONES (0xffffffff) #define ISCSI_TTT_ALL_ONES (0xffffffff) @@ -173,19 +169,6 @@ struct iscsi_async_msg_hdr { __le32 reserved7; }; -struct iscsi_sge { - struct regpair sge_addr; - __le16 sge_len; - __le16 reserved0; - __le32 reserved1; -}; - -struct iscsi_cached_sge_ctx { - struct iscsi_sge sge; - struct regpair reserved; - __le32 dsgl_curr_offset[2]; -}; - struct iscsi_cmd_hdr { __le16 reserved1; u8 flags_attr; @@ -229,8 +212,13 @@ struct iscsi_common_hdr { #define ISCSI_COMMON_HDR_DATA_SEG_LEN_SHIFT 0 #define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_MASK 0xFF #define ISCSI_COMMON_HDR_TOTAL_AHS_LEN_SHIFT 24 - __le32 lun_reserved[4]; - __le32 data[6]; + struct regpair lun_reserved; + __le32 itt; + __le32 ttt; + __le32 cmdstat_sn; + __le32 exp_statcmd_sn; + __le32 max_cmd_sn; + __le32 data[3]; }; struct iscsi_conn_offload_params { @@ -246,8 +234,10 @@ struct iscsi_conn_offload_params { #define ISCSI_CONN_OFFLOAD_PARAMS_TCP_ON_CHIP_1B_SHIFT 0 #define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_MASK 0x1 #define ISCSI_CONN_OFFLOAD_PARAMS_TARGET_MODE_SHIFT 1 -#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_MASK 0x3F -#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT 2 +#define ISCSI_CONN_OFFLOAD_PARAMS_RESTRICTED_MODE_MASK 0x1 +#define ISCSI_CONN_OFFLOAD_PARAMS_RESTRICTED_MODE_SHIFT 2 +#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_MASK 0x1F +#define ISCSI_CONN_OFFLOAD_PARAMS_RESERVED1_SHIFT 3 u8 pbl_page_size_log; u8 pbe_page_size_log; u8 default_cq; @@ -278,8 +268,12 @@ struct iscsi_conn_update_ramrod_params { #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_INITIAL_R2T_SHIFT 2 #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_MASK 0x1 #define ISCSI_CONN_UPDATE_RAMROD_PARAMS_IMMEDIATE_DATA_SHIFT 3 -#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_MASK 0xF -#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_SHIFT 4 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_BLOCK_SIZE_MASK 0x1 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_BLOCK_SIZE_SHIFT 4 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_ON_HOST_EN_MASK 0x1 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_DIF_ON_HOST_EN_SHIFT 5 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_MASK 0x3 +#define ISCSI_CONN_UPDATE_RAMROD_PARAMS_RESERVED1_SHIFT 6 u8 reserved0[3]; __le32 max_seq_size; __le32 max_send_pdu_length; @@ -312,7 +306,7 @@ struct iscsi_ext_cdb_cmd_hdr { __le32 expected_transfer_length; __le32 cmd_sn; __le32 exp_stat_sn; - struct iscsi_sge cdb_sge; + struct scsi_sge cdb_sge; }; struct iscsi_login_req_hdr { @@ -519,8 +513,8 @@ struct iscsi_logout_response_hdr { __le32 exp_cmd_sn; __le32 max_cmd_sn; __le32 reserved4; - __le16 time2retain; - __le16 time2wait; + __le16 time_2_retain; + __le16 time_2_wait; __le32 reserved5[1]; }; @@ -602,7 +596,7 @@ struct iscsi_tmf_response_hdr { #define ISCSI_TMF_RESPONSE_HDR_TOTAL_AHS_LEN_SHIFT 24 struct regpair reserved0; __le32 itt; - __le32 rtt; + __le32 reserved1; __le32 stat_sn; __le32 exp_cmd_sn; __le32 max_cmd_sn; @@ -641,7 +635,7 @@ struct iscsi_reject_hdr { #define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_MASK 0xFF #define ISCSI_REJECT_HDR_TOTAL_AHS_LEN_SHIFT 24 struct regpair reserved0; - __le32 reserved1; + __le32 all_ones; __le32 reserved2; __le32 stat_sn; __le32 exp_cmd_sn; @@ -688,7 +682,9 @@ struct iscsi_cqe_solicited { __le16 itid; u8 task_type; u8 fw_dbg_field; - __le32 reserved1[2]; + u8 caused_conn_err; + u8 reserved0[3]; + __le32 reserved1[1]; union iscsi_task_hdr iscsi_hdr; }; @@ -727,35 +723,6 @@ enum iscsi_cqe_unsolicited_type { MAX_ISCSI_CQE_UNSOLICITED_TYPE }; -struct iscsi_virt_sgl_ctx { - struct regpair sgl_base; - struct regpair dsgl_base; - __le32 sgl_initial_offset; - __le32 dsgl_initial_offset; - __le32 dsgl_curr_offset[2]; -}; - -struct iscsi_sgl_var_params { - u8 sgl_ptr; - u8 dsgl_ptr; - __le16 sge_offset; - __le16 dsge_offset; -}; - -struct iscsi_phys_sgl_ctx { - struct regpair sgl_base; - struct regpair dsgl_base; - u8 sgl_size; - u8 dsgl_size; - __le16 reserved; - struct iscsi_sgl_var_params var_params[2]; -}; - -union iscsi_data_desc_ctx { - struct iscsi_virt_sgl_ctx virt_sgl; - struct iscsi_phys_sgl_ctx phys_sgl; - struct iscsi_cached_sge_ctx cached_sge; -}; struct iscsi_debug_modes { u8 flags; @@ -771,8 +738,10 @@ struct iscsi_debug_modes { #define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_REJECT_OR_ASYNC_SHIFT 4 #define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_MASK 0x1 #define ISCSI_DEBUG_MODES_ASSERT_IF_RECV_NOP_SHIFT 5 -#define ISCSI_DEBUG_MODES_RESERVED0_MASK 0x3 -#define ISCSI_DEBUG_MODES_RESERVED0_SHIFT 6 +#define ISCSI_DEBUG_MODES_ASSERT_IF_DATA_DIGEST_ERROR_MASK 0x1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_DATA_DIGEST_ERROR_SHIFT 6 +#define ISCSI_DEBUG_MODES_ASSERT_IF_DIF_ERROR_MASK 0x1 +#define ISCSI_DEBUG_MODES_ASSERT_IF_DIF_ERROR_SHIFT 7 }; struct iscsi_dif_flags { @@ -806,7 +775,6 @@ enum iscsi_eqe_opcode { ISCSI_EVENT_TYPE_ASYN_FIN_WAIT2, ISCSI_EVENT_TYPE_ISCSI_CONN_ERROR, ISCSI_EVENT_TYPE_TCP_CONN_ERROR, - ISCSI_EVENT_TYPE_ASYN_DELETE_OOO_ISLES, MAX_ISCSI_EQE_OPCODE }; @@ -856,31 +824,11 @@ enum iscsi_error_types { ISCSI_CONN_ERROR_PROTOCOL_ERR_DIF_TX, ISCSI_CONN_ERROR_SENSE_DATA_LENGTH, ISCSI_CONN_ERROR_DATA_PLACEMENT_ERROR, + ISCSI_CONN_ERROR_INVALID_ITT, ISCSI_ERROR_UNKNOWN, MAX_ISCSI_ERROR_TYPES }; -struct iscsi_mflags { - u8 mflags; -#define ISCSI_MFLAGS_SLOW_IO_MASK 0x1 -#define ISCSI_MFLAGS_SLOW_IO_SHIFT 0 -#define ISCSI_MFLAGS_SINGLE_SGE_MASK 0x1 -#define ISCSI_MFLAGS_SINGLE_SGE_SHIFT 1 -#define ISCSI_MFLAGS_RESERVED_MASK 0x3F -#define ISCSI_MFLAGS_RESERVED_SHIFT 2 -}; - -struct iscsi_sgl { - struct regpair sgl_addr; - __le16 updated_sge_size; - __le16 updated_sge_offset; - __le32 byte_offset; -}; - -union iscsi_mstorm_sgl { - struct iscsi_sgl sgl_struct; - struct iscsi_sge single_sge; -}; enum iscsi_ramrod_cmd_id { ISCSI_RAMROD_CMD_ID_UNUSED = 0, @@ -896,10 +844,10 @@ enum iscsi_ramrod_cmd_id { struct iscsi_reg1 { __le32 reg1_map; -#define ISCSI_REG1_NUM_FAST_SGES_MASK 0x7 -#define ISCSI_REG1_NUM_FAST_SGES_SHIFT 0 -#define ISCSI_REG1_RESERVED1_MASK 0x1FFFFFFF -#define ISCSI_REG1_RESERVED1_SHIFT 3 +#define ISCSI_REG1_NUM_SGES_MASK 0xF +#define ISCSI_REG1_NUM_SGES_SHIFT 0 +#define ISCSI_REG1_RESERVED1_MASK 0xFFFFFFF +#define ISCSI_REG1_RESERVED1_SHIFT 4 }; union iscsi_seq_num { @@ -967,22 +915,33 @@ struct iscsi_spe_func_init { }; struct ystorm_iscsi_task_state { - union iscsi_data_desc_ctx sgl_ctx_union; - __le32 buffer_offset[2]; - __le16 bytes_nxt_dif; - __le16 rxmit_bytes_nxt_dif; - union iscsi_seq_num seq_num_union; - u8 dif_bytes_leftover; - u8 rxmit_dif_bytes_leftover; - __le16 reuse_count; - struct iscsi_dif_flags dif_flags; - u8 local_comp; + struct scsi_cached_sges data_desc; + struct scsi_sgl_params sgl_params; __le32 exp_r2t_sn; - __le32 sgl_offset[2]; + __le32 buffer_offset; + union iscsi_seq_num seq_num; + struct iscsi_dif_flags dif_flags; + u8 flags; +#define YSTORM_ISCSI_TASK_STATE_LOCAL_COMP_MASK 0x1 +#define YSTORM_ISCSI_TASK_STATE_LOCAL_COMP_SHIFT 0 +#define YSTORM_ISCSI_TASK_STATE_SLOW_IO_MASK 0x1 +#define YSTORM_ISCSI_TASK_STATE_SLOW_IO_SHIFT 1 +#define YSTORM_ISCSI_TASK_STATE_RESERVED0_MASK 0x3F +#define YSTORM_ISCSI_TASK_STATE_RESERVED0_SHIFT 2 +}; + +struct ystorm_iscsi_task_rxmit_opt { + __le32 fast_rxmit_sge_offset; + __le32 scan_start_buffer_offset; + __le32 fast_rxmit_buffer_offset; + u8 scan_start_sgl_index; + u8 fast_rxmit_sgl_index; + __le16 reserved; }; struct ystorm_iscsi_task_st_ctx { struct ystorm_iscsi_task_state state; + struct ystorm_iscsi_task_rxmit_opt rxmit_opt; union iscsi_task_hdr pdu_hdr; }; @@ -1152,25 +1111,16 @@ struct ustorm_iscsi_task_ag_ctx { }; struct mstorm_iscsi_task_st_ctx { - union iscsi_mstorm_sgl sgl_union; - struct iscsi_dif_flags dif_flags; - struct iscsi_mflags flags; - u8 sgl_size; - u8 host_sge_index; - __le16 dix_cur_sge_offset; - __le16 dix_cur_sge_size; - __le32 data_offset_rtid; - u8 dif_offset; - u8 dix_sgl_size; - u8 dix_sge_index; + struct scsi_cached_sges data_desc; + struct scsi_sgl_params sgl_params; + __le32 rem_task_size; + __le32 data_buffer_offset; u8 task_type; + struct iscsi_dif_flags dif_flags; + u8 reserved0[2]; struct regpair sense_db; - struct regpair dix_sgl_cur_sge; - __le32 rem_task_size; - __le16 reuse_count; - __le16 dif_data_residue; - u8 reserved0[4]; - __le32 reserved1[1]; + __le32 expected_itt; + __le32 reserved1; }; struct ustorm_iscsi_task_st_ctx { @@ -1184,7 +1134,7 @@ struct ustorm_iscsi_task_st_ctx { #define USTORM_ISCSI_TASK_ST_CTX_AHS_EXIST_SHIFT 0 #define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_MASK 0x7F #define USTORM_ISCSI_TASK_ST_CTX_RESERVED1_SHIFT 1 - u8 reserved2; + struct iscsi_dif_flags dif_flags; __le16 reserved3; __le32 reserved4; __le32 reserved5; @@ -1207,10 +1157,10 @@ struct ustorm_iscsi_task_st_ctx { #define USTORM_ISCSI_TASK_ST_CTX_LOCAL_COMP_SHIFT 2 #define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_MASK 0x1 #define USTORM_ISCSI_TASK_ST_CTX_Q0_R2TQE_WRITE_SHIFT 3 -#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_MASK 0x1 -#define USTORM_ISCSI_TASK_ST_CTX_TOTALDATAACKED_DONE_SHIFT 4 -#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_MASK 0x1 -#define USTORM_ISCSI_TASK_ST_CTX_HQSCANNED_DONE_SHIFT 5 +#define USTORM_ISCSI_TASK_ST_CTX_TOTAL_DATA_ACKED_DONE_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_TOTAL_DATA_ACKED_DONE_SHIFT 4 +#define USTORM_ISCSI_TASK_ST_CTX_HQ_SCANNED_DONE_MASK 0x1 +#define USTORM_ISCSI_TASK_ST_CTX_HQ_SCANNED_DONE_SHIFT 5 #define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_MASK 0x1 #define USTORM_ISCSI_TASK_ST_CTX_R2T2RECV_DONE_SHIFT 6 #define USTORM_ISCSI_TASK_ST_CTX_RESERVED0_MASK 0x1 @@ -1220,7 +1170,6 @@ struct ustorm_iscsi_task_st_ctx { struct iscsi_task_context { struct ystorm_iscsi_task_st_ctx ystorm_st_context; - struct regpair ystorm_st_padding[2]; struct ystorm_iscsi_task_ag_ctx ystorm_ag_context; struct regpair ystorm_ag_padding[2]; struct tdif_task_context tdif_context; @@ -1272,32 +1221,22 @@ struct iscsi_uhqe { #define ISCSI_UHQE_TASK_ID_LO_SHIFT 24 }; -struct iscsi_wqe_field { - __le32 contlen_cdbsize_field; -#define ISCSI_WQE_FIELD_CONT_LEN_MASK 0xFFFFFF -#define ISCSI_WQE_FIELD_CONT_LEN_SHIFT 0 -#define ISCSI_WQE_FIELD_CDB_SIZE_MASK 0xFF -#define ISCSI_WQE_FIELD_CDB_SIZE_SHIFT 24 -}; - -union iscsi_wqe_field_union { - struct iscsi_wqe_field cont_field; - __le32 prev_tid; -}; struct iscsi_wqe { __le16 task_id; u8 flags; #define ISCSI_WQE_WQE_TYPE_MASK 0x7 #define ISCSI_WQE_WQE_TYPE_SHIFT 0 -#define ISCSI_WQE_NUM_FAST_SGES_MASK 0x7 -#define ISCSI_WQE_NUM_FAST_SGES_SHIFT 3 -#define ISCSI_WQE_PTU_INVALIDATE_MASK 0x1 -#define ISCSI_WQE_PTU_INVALIDATE_SHIFT 6 +#define ISCSI_WQE_NUM_SGES_MASK 0xF +#define ISCSI_WQE_NUM_SGES_SHIFT 3 #define ISCSI_WQE_RESPONSE_MASK 0x1 #define ISCSI_WQE_RESPONSE_SHIFT 7 struct iscsi_dif_flags prot_flags; - union iscsi_wqe_field_union cont_prevtid_union; + __le32 contlen_cdbsize; +#define ISCSI_WQE_CONT_LEN_MASK 0xFFFFFF +#define ISCSI_WQE_CONT_LEN_SHIFT 0 +#define ISCSI_WQE_CDB_SIZE_MASK 0xFF +#define ISCSI_WQE_CDB_SIZE_SHIFT 24 }; enum iscsi_wqe_type { @@ -1318,17 +1257,15 @@ struct iscsi_xhqe { u8 total_ahs_length; u8 opcode; u8 flags; -#define ISCSI_XHQE_NUM_FAST_SGES_MASK 0x7 -#define ISCSI_XHQE_NUM_FAST_SGES_SHIFT 0 -#define ISCSI_XHQE_FINAL_MASK 0x1 -#define ISCSI_XHQE_FINAL_SHIFT 3 -#define ISCSI_XHQE_SUPER_IO_MASK 0x1 -#define ISCSI_XHQE_SUPER_IO_SHIFT 4 -#define ISCSI_XHQE_STATUS_BIT_MASK 0x1 -#define ISCSI_XHQE_STATUS_BIT_SHIFT 5 -#define ISCSI_XHQE_RESERVED_MASK 0x3 -#define ISCSI_XHQE_RESERVED_SHIFT 6 - union iscsi_seq_num seq_num_union; +#define ISCSI_XHQE_FINAL_MASK 0x1 +#define ISCSI_XHQE_FINAL_SHIFT 0 +#define ISCSI_XHQE_STATUS_BIT_MASK 0x1 +#define ISCSI_XHQE_STATUS_BIT_SHIFT 1 +#define ISCSI_XHQE_NUM_SGES_MASK 0xF +#define ISCSI_XHQE_NUM_SGES_SHIFT 2 +#define ISCSI_XHQE_RESERVED0_MASK 0x3 +#define ISCSI_XHQE_RESERVED0_SHIFT 6 + union iscsi_seq_num seq_num; __le16 reserved1; }; diff --git a/include/linux/qed/roce_common.h b/include/linux/qed/roce_common.h index bad02df213df..866f063026de 100644 --- a/include/linux/qed/roce_common.h +++ b/include/linux/qed/roce_common.h @@ -38,4 +38,21 @@ #define ROCE_MAX_QPS (32 * 1024) +enum roce_async_events_type { + ROCE_ASYNC_EVENT_NONE = 0, + ROCE_ASYNC_EVENT_COMM_EST = 1, + ROCE_ASYNC_EVENT_SQ_DRAINED, + ROCE_ASYNC_EVENT_SRQ_LIMIT, + ROCE_ASYNC_EVENT_LAST_WQE_REACHED, + ROCE_ASYNC_EVENT_CQ_ERR, + ROCE_ASYNC_EVENT_LOCAL_INVALID_REQUEST_ERR, + ROCE_ASYNC_EVENT_LOCAL_CATASTROPHIC_ERR, + ROCE_ASYNC_EVENT_LOCAL_ACCESS_ERR, + ROCE_ASYNC_EVENT_QP_CATASTROPHIC_ERR, + ROCE_ASYNC_EVENT_CQ_OVERFLOW_ERR, + ROCE_ASYNC_EVENT_SRQ_EMPTY, + ROCE_ASYNC_EVENT_DESTROY_QP_DONE, + MAX_ROCE_ASYNC_EVENTS_TYPE +}; + #endif /* __ROCE_COMMON__ */ diff --git a/include/linux/qed/storage_common.h b/include/linux/qed/storage_common.h index 03f3e37ab059..08df82a096b6 100644 --- a/include/linux/qed/storage_common.h +++ b/include/linux/qed/storage_common.h @@ -40,6 +40,8 @@ #define BDQ_ID_IMM_DATA (1) #define BDQ_NUM_IDS (2) +#define SCSI_NUM_SGES_SLOW_SGL_THR 8 + #define BDQ_MAX_EXTERNAL_RING_SIZE (1 << 15) struct scsi_bd { @@ -52,6 +54,16 @@ struct scsi_bdq_ram_drv_data { __le16 reserved0[3]; }; +struct scsi_sge { + struct regpair sge_addr; + __le32 sge_len; + __le32 reserved; +}; + +struct scsi_cached_sges { + struct scsi_sge sge[4]; +}; + struct scsi_drv_cmdq { __le16 cmdq_cons; __le16 reserved0; @@ -99,11 +111,19 @@ struct scsi_ram_per_bdq_resource_drv_data { struct scsi_bdq_ram_drv_data drv_data_per_bdq_id[BDQ_NUM_IDS]; }; -struct scsi_sge { - struct regpair sge_addr; - __le16 sge_len; - __le16 reserved0; - __le32 reserved1; +enum scsi_sgl_mode { + SCSI_TX_SLOW_SGL, + SCSI_FAST_SGL, + MAX_SCSI_SGL_MODE +}; + +struct scsi_sgl_params { + struct regpair sgl_addr; + __le32 sgl_total_length; + __le32 sge_offset; + __le16 sgl_num_sges; + u8 sgl_index; + u8 reserved; }; struct scsi_terminate_extra_params { diff --git a/include/linux/qed/tcp_common.h b/include/linux/qed/tcp_common.h index 46fe7856f1b2..a5e843268f0e 100644 --- a/include/linux/qed/tcp_common.h +++ b/include/linux/qed/tcp_common.h @@ -173,6 +173,7 @@ enum tcp_seg_placement_event { TCP_EVENT_ADD_ISLE_RIGHT, TCP_EVENT_ADD_ISLE_LEFT, TCP_EVENT_JOIN, + TCP_EVENT_DELETE_ISLES, TCP_EVENT_NOP, MAX_TCP_SEG_PLACEMENT_EVENT }; -- cgit v1.2.3 From 8961df8950b1235cb7594e143a31bcc63757b660 Mon Sep 17 00:00:00 2001 From: Richard Genoud Date: Fri, 3 Mar 2017 15:13:44 +0100 Subject: tty/serial: atmel: move atmel_serial header into driver directory atmel_serial.h is only used by atmel_serial.c, so there's no need for it to lie in include/linux. Suggested-by: Joe Perches Signed-off-by: Richard Genoud Signed-off-by: Greg Kroah-Hartman --- include/linux/atmel_serial.h | 169 ------------------------------------------- 1 file changed, 169 deletions(-) delete mode 100644 include/linux/atmel_serial.h (limited to 'include/linux') diff --git a/include/linux/atmel_serial.h b/include/linux/atmel_serial.h deleted file mode 100644 index bd2560502f3c..000000000000 --- a/include/linux/atmel_serial.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * include/linux/atmel_serial.h - * - * Copyright (C) 2005 Ivan Kokshaysky - * Copyright (C) SAN People - * - * USART registers. - * Based on AT91RM9200 datasheet revision E. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef ATMEL_SERIAL_H -#define ATMEL_SERIAL_H - -#define ATMEL_US_CR 0x00 /* Control Register */ -#define ATMEL_US_RSTRX BIT(2) /* Reset Receiver */ -#define ATMEL_US_RSTTX BIT(3) /* Reset Transmitter */ -#define ATMEL_US_RXEN BIT(4) /* Receiver Enable */ -#define ATMEL_US_RXDIS BIT(5) /* Receiver Disable */ -#define ATMEL_US_TXEN BIT(6) /* Transmitter Enable */ -#define ATMEL_US_TXDIS BIT(7) /* Transmitter Disable */ -#define ATMEL_US_RSTSTA BIT(8) /* Reset Status Bits */ -#define ATMEL_US_STTBRK BIT(9) /* Start Break */ -#define ATMEL_US_STPBRK BIT(10) /* Stop Break */ -#define ATMEL_US_STTTO BIT(11) /* Start Time-out */ -#define ATMEL_US_SENDA BIT(12) /* Send Address */ -#define ATMEL_US_RSTIT BIT(13) /* Reset Iterations */ -#define ATMEL_US_RSTNACK BIT(14) /* Reset Non Acknowledge */ -#define ATMEL_US_RETTO BIT(15) /* Rearm Time-out */ -#define ATMEL_US_DTREN BIT(16) /* Data Terminal Ready Enable */ -#define ATMEL_US_DTRDIS BIT(17) /* Data Terminal Ready Disable */ -#define ATMEL_US_RTSEN BIT(18) /* Request To Send Enable */ -#define ATMEL_US_RTSDIS BIT(19) /* Request To Send Disable */ -#define ATMEL_US_TXFCLR BIT(24) /* Transmit FIFO Clear */ -#define ATMEL_US_RXFCLR BIT(25) /* Receive FIFO Clear */ -#define ATMEL_US_TXFLCLR BIT(26) /* Transmit FIFO Lock Clear */ -#define ATMEL_US_FIFOEN BIT(30) /* FIFO enable */ -#define ATMEL_US_FIFODIS BIT(31) /* FIFO disable */ - -#define ATMEL_US_MR 0x04 /* Mode Register */ -#define ATMEL_US_USMODE GENMASK(3, 0) /* Mode of the USART */ -#define ATMEL_US_USMODE_NORMAL 0 -#define ATMEL_US_USMODE_RS485 1 -#define ATMEL_US_USMODE_HWHS 2 -#define ATMEL_US_USMODE_MODEM 3 -#define ATMEL_US_USMODE_ISO7816_T0 4 -#define ATMEL_US_USMODE_ISO7816_T1 6 -#define ATMEL_US_USMODE_IRDA 8 -#define ATMEL_US_USCLKS GENMASK(5, 4) /* Clock Selection */ -#define ATMEL_US_USCLKS_MCK (0 << 4) -#define ATMEL_US_USCLKS_MCK_DIV8 (1 << 4) -#define ATMEL_US_USCLKS_SCK (3 << 4) -#define ATMEL_US_CHRL GENMASK(7, 6) /* Character Length */ -#define ATMEL_US_CHRL_5 (0 << 6) -#define ATMEL_US_CHRL_6 (1 << 6) -#define ATMEL_US_CHRL_7 (2 << 6) -#define ATMEL_US_CHRL_8 (3 << 6) -#define ATMEL_US_SYNC BIT(8) /* Synchronous Mode Select */ -#define ATMEL_US_PAR GENMASK(11, 9) /* Parity Type */ -#define ATMEL_US_PAR_EVEN (0 << 9) -#define ATMEL_US_PAR_ODD (1 << 9) -#define ATMEL_US_PAR_SPACE (2 << 9) -#define ATMEL_US_PAR_MARK (3 << 9) -#define ATMEL_US_PAR_NONE (4 << 9) -#define ATMEL_US_PAR_MULTI_DROP (6 << 9) -#define ATMEL_US_NBSTOP GENMASK(13, 12) /* Number of Stop Bits */ -#define ATMEL_US_NBSTOP_1 (0 << 12) -#define ATMEL_US_NBSTOP_1_5 (1 << 12) -#define ATMEL_US_NBSTOP_2 (2 << 12) -#define ATMEL_US_CHMODE GENMASK(15, 14) /* Channel Mode */ -#define ATMEL_US_CHMODE_NORMAL (0 << 14) -#define ATMEL_US_CHMODE_ECHO (1 << 14) -#define ATMEL_US_CHMODE_LOC_LOOP (2 << 14) -#define ATMEL_US_CHMODE_REM_LOOP (3 << 14) -#define ATMEL_US_MSBF BIT(16) /* Bit Order */ -#define ATMEL_US_MODE9 BIT(17) /* 9-bit Character Length */ -#define ATMEL_US_CLKO BIT(18) /* Clock Output Select */ -#define ATMEL_US_OVER BIT(19) /* Oversampling Mode */ -#define ATMEL_US_INACK BIT(20) /* Inhibit Non Acknowledge */ -#define ATMEL_US_DSNACK BIT(21) /* Disable Successive NACK */ -#define ATMEL_US_MAX_ITER GENMASK(26, 24) /* Max Iterations */ -#define ATMEL_US_FILTER BIT(28) /* Infrared Receive Line Filter */ - -#define ATMEL_US_IER 0x08 /* Interrupt Enable Register */ -#define ATMEL_US_RXRDY BIT(0) /* Receiver Ready */ -#define ATMEL_US_TXRDY BIT(1) /* Transmitter Ready */ -#define ATMEL_US_RXBRK BIT(2) /* Break Received / End of Break */ -#define ATMEL_US_ENDRX BIT(3) /* End of Receiver Transfer */ -#define ATMEL_US_ENDTX BIT(4) /* End of Transmitter Transfer */ -#define ATMEL_US_OVRE BIT(5) /* Overrun Error */ -#define ATMEL_US_FRAME BIT(6) /* Framing Error */ -#define ATMEL_US_PARE BIT(7) /* Parity Error */ -#define ATMEL_US_TIMEOUT BIT(8) /* Receiver Time-out */ -#define ATMEL_US_TXEMPTY BIT(9) /* Transmitter Empty */ -#define ATMEL_US_ITERATION BIT(10) /* Max number of Repetitions Reached */ -#define ATMEL_US_TXBUFE BIT(11) /* Transmission Buffer Empty */ -#define ATMEL_US_RXBUFF BIT(12) /* Reception Buffer Full */ -#define ATMEL_US_NACK BIT(13) /* Non Acknowledge */ -#define ATMEL_US_RIIC BIT(16) /* Ring Indicator Input Change */ -#define ATMEL_US_DSRIC BIT(17) /* Data Set Ready Input Change */ -#define ATMEL_US_DCDIC BIT(18) /* Data Carrier Detect Input Change */ -#define ATMEL_US_CTSIC BIT(19) /* Clear to Send Input Change */ -#define ATMEL_US_RI BIT(20) /* RI */ -#define ATMEL_US_DSR BIT(21) /* DSR */ -#define ATMEL_US_DCD BIT(22) /* DCD */ -#define ATMEL_US_CTS BIT(23) /* CTS */ - -#define ATMEL_US_IDR 0x0c /* Interrupt Disable Register */ -#define ATMEL_US_IMR 0x10 /* Interrupt Mask Register */ -#define ATMEL_US_CSR 0x14 /* Channel Status Register */ -#define ATMEL_US_RHR 0x18 /* Receiver Holding Register */ -#define ATMEL_US_THR 0x1c /* Transmitter Holding Register */ -#define ATMEL_US_SYNH BIT(15) /* Transmit/Receive Sync */ - -#define ATMEL_US_BRGR 0x20 /* Baud Rate Generator Register */ -#define ATMEL_US_CD GENMASK(15, 0) /* Clock Divider */ -#define ATMEL_US_FP_OFFSET 16 /* Fractional Part */ -#define ATMEL_US_FP_MASK 0x7 - -#define ATMEL_US_RTOR 0x24 /* Receiver Time-out Register for USART */ -#define ATMEL_UA_RTOR 0x28 /* Receiver Time-out Register for UART */ -#define ATMEL_US_TO GENMASK(15, 0) /* Time-out Value */ - -#define ATMEL_US_TTGR 0x28 /* Transmitter Timeguard Register */ -#define ATMEL_US_TG GENMASK(7, 0) /* Timeguard Value */ - -#define ATMEL_US_FIDI 0x40 /* FI DI Ratio Register */ -#define ATMEL_US_NER 0x44 /* Number of Errors Register */ -#define ATMEL_US_IF 0x4c /* IrDA Filter Register */ - -#define ATMEL_US_CMPR 0x90 /* Comparaison Register */ -#define ATMEL_US_FMR 0xa0 /* FIFO Mode Register */ -#define ATMEL_US_TXRDYM(data) (((data) & 0x3) << 0) /* TX Ready Mode */ -#define ATMEL_US_RXRDYM(data) (((data) & 0x3) << 4) /* RX Ready Mode */ -#define ATMEL_US_ONE_DATA 0x0 -#define ATMEL_US_TWO_DATA 0x1 -#define ATMEL_US_FOUR_DATA 0x2 -#define ATMEL_US_FRTSC BIT(7) /* FIFO RTS pin Control */ -#define ATMEL_US_TXFTHRES(thr) (((thr) & 0x3f) << 8) /* TX FIFO Threshold */ -#define ATMEL_US_RXFTHRES(thr) (((thr) & 0x3f) << 16) /* RX FIFO Threshold */ -#define ATMEL_US_RXFTHRES2(thr) (((thr) & 0x3f) << 24) /* RX FIFO Threshold2 */ - -#define ATMEL_US_FLR 0xa4 /* FIFO Level Register */ -#define ATMEL_US_TXFL(reg) (((reg) >> 0) & 0x3f) /* TX FIFO Level */ -#define ATMEL_US_RXFL(reg) (((reg) >> 16) & 0x3f) /* RX FIFO Level */ - -#define ATMEL_US_FIER 0xa8 /* FIFO Interrupt Enable Register */ -#define ATMEL_US_FIDR 0xac /* FIFO Interrupt Disable Register */ -#define ATMEL_US_FIMR 0xb0 /* FIFO Interrupt Mask Register */ -#define ATMEL_US_FESR 0xb4 /* FIFO Event Status Register */ -#define ATMEL_US_TXFEF BIT(0) /* Transmit FIFO Empty Flag */ -#define ATMEL_US_TXFFF BIT(1) /* Transmit FIFO Full Flag */ -#define ATMEL_US_TXFTHF BIT(2) /* Transmit FIFO Threshold Flag */ -#define ATMEL_US_RXFEF BIT(3) /* Receive FIFO Empty Flag */ -#define ATMEL_US_RXFFF BIT(4) /* Receive FIFO Full Flag */ -#define ATMEL_US_RXFTHF BIT(5) /* Receive FIFO Threshold Flag */ -#define ATMEL_US_TXFPTEF BIT(6) /* Transmit FIFO Pointer Error Flag */ -#define ATMEL_US_RXFPTEF BIT(7) /* Receive FIFO Pointer Error Flag */ -#define ATMEL_US_TXFLOCK BIT(8) /* Transmit FIFO Lock (FESR only) */ -#define ATMEL_US_RXFTHF2 BIT(9) /* Receive FIFO Threshold Flag 2 */ - -#define ATMEL_US_NAME 0xf0 /* Ip Name */ -#define ATMEL_US_VERSION 0xfc /* Ip Version */ - -#endif -- cgit v1.2.3 From 83ee102a6998f808ac4c626e8f72344f0a355527 Mon Sep 17 00:00:00 2001 From: Doug Berger Date: Mon, 13 Mar 2017 17:41:32 -0700 Subject: net: phy: bcm7xxx: add support for 28nm EPHY This commit adds support for the internal fast ethernet 10/100 PHY found in the BCM7260, BCM7268, and BCM7271 devices. Signed-off-by: Doug Berger Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 55e517130311..abcda9b458ab 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -25,6 +25,9 @@ #define PHY_ID_BCM57780 0x03625d90 #define PHY_ID_BCM7250 0xae025280 +#define PHY_ID_BCM7260 0xae025190 +#define PHY_ID_BCM7268 0xae025090 +#define PHY_ID_BCM7271 0xae0253b0 #define PHY_ID_BCM7278 0xae0251a0 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 -- cgit v1.2.3 From 9c79ddaa0f962d1f26537a670b0652ff509a6fe0 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Tue, 14 Mar 2017 16:23:54 +0200 Subject: qed*: Add support for QL41xxx adapters This adds the necessary infrastructure changes for initializing and working with the new series of QL41xxx adapaters. It also adds 2 new PCI device-IDs to qede: - 0x8070 for QL41xxx PFs - 0x8090 for VFs spawning from QL41xxx PFs Signed-off-by: Tomer Tayar Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 48 ++++++++++++++++++++++++++++++----------- include/linux/qed/rdma_common.h | 3 ++- 2 files changed, 38 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index fde56c436f71..8e0065c52857 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -300,6 +300,11 @@ struct qed_sb_info { struct qed_dev *cdev; }; +enum qed_dev_type { + QED_DEV_TYPE_BB, + QED_DEV_TYPE_AH, +}; + struct qed_dev_info { unsigned long pci_mem_start; unsigned long pci_mem_end; @@ -325,6 +330,8 @@ struct qed_dev_info { u16 mtu; bool wol_support; + + enum qed_dev_type dev_type; }; enum qed_sb_type { @@ -752,7 +759,7 @@ enum qed_mf_mode { QED_MF_NPAR, }; -struct qed_eth_stats { +struct qed_eth_stats_common { u64 no_buff_discards; u64 packet_too_big_discard; u64 ttl0_discard; @@ -784,11 +791,6 @@ struct qed_eth_stats { u64 rx_256_to_511_byte_packets; u64 rx_512_to_1023_byte_packets; u64 rx_1024_to_1518_byte_packets; - u64 rx_1519_to_1522_byte_packets; - u64 rx_1519_to_2047_byte_packets; - u64 rx_2048_to_4095_byte_packets; - u64 rx_4096_to_9216_byte_packets; - u64 rx_9217_to_16383_byte_packets; u64 rx_crc_errors; u64 rx_mac_crtl_frames; u64 rx_pause_frames; @@ -805,14 +807,8 @@ struct qed_eth_stats { u64 tx_256_to_511_byte_packets; u64 tx_512_to_1023_byte_packets; u64 tx_1024_to_1518_byte_packets; - u64 tx_1519_to_2047_byte_packets; - u64 tx_2048_to_4095_byte_packets; - u64 tx_4096_to_9216_byte_packets; - u64 tx_9217_to_16383_byte_packets; u64 tx_pause_frames; u64 tx_pfc_frames; - u64 tx_lpi_entry_count; - u64 tx_total_collisions; u64 brb_truncates; u64 brb_discards; u64 rx_mac_bytes; @@ -827,6 +823,34 @@ struct qed_eth_stats { u64 tx_mac_ctrl_frames; }; +struct qed_eth_stats_bb { + u64 rx_1519_to_1522_byte_packets; + u64 rx_1519_to_2047_byte_packets; + u64 rx_2048_to_4095_byte_packets; + u64 rx_4096_to_9216_byte_packets; + u64 rx_9217_to_16383_byte_packets; + u64 tx_1519_to_2047_byte_packets; + u64 tx_2048_to_4095_byte_packets; + u64 tx_4096_to_9216_byte_packets; + u64 tx_9217_to_16383_byte_packets; + u64 tx_lpi_entry_count; + u64 tx_total_collisions; +}; + +struct qed_eth_stats_ah { + u64 rx_1519_to_max_byte_packets; + u64 tx_1519_to_max_byte_packets; +}; + +struct qed_eth_stats { + struct qed_eth_stats_common common; + + union { + struct qed_eth_stats_bb bb; + struct qed_eth_stats_ah ah; + }; +}; + #define QED_SB_IDX 0x0002 #define RX_PI 0 diff --git a/include/linux/qed/rdma_common.h b/include/linux/qed/rdma_common.h index f773aa5e746f..72c770f9f666 100644 --- a/include/linux/qed/rdma_common.h +++ b/include/linux/qed/rdma_common.h @@ -52,7 +52,8 @@ #define RDMA_MAX_PDS (64 * 1024) #define RDMA_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS -#define RDMA_NUM_STATISTIC_COUNTERS_BB MAX_NUM_VPORTS_BB +#define RDMA_NUM_STATISTIC_COUNTERS_K2 MAX_NUM_VPORTS_K2 +#define RDMA_NUM_STATISTIC_COUNTERS_BB MAX_NUM_VPORTS_BB #define RDMA_TASK_TYPE (PROTOCOLID_ROCE) -- cgit v1.2.3 From a26356ab9392e0c5f8ad87d76c42e7c58c036d24 Mon Sep 17 00:00:00 2001 From: Thomas Petazzoni Date: Tue, 28 Feb 2017 15:31:16 +0100 Subject: of/pci: Remove unused MSI controller helpers All users of the small MSI controller API have been migrated to use the generic MSI infrastructure instead. We no longer need a global chained list of msi_controller. Instead, MSI controllers are now represented as IRQ domains attached to OF nodes, and the resolution between a device requesting an MSI and the corresponding MSI controller is done by the generic interrupt resolution logic. Therefore, this API is now completely useless, and can be removed from the kernel. Signed-off-by: Thomas Petazzoni Signed-off-by: Bjorn Helgaas Acked-by: Marc Zyngier Acked-by: Rob Herring --- include/linux/of_pci.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index 0e0974eceb80..518c8d20647a 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -85,15 +85,4 @@ static inline int of_pci_get_host_bridge_resources(struct device_node *dev, } #endif -#if defined(CONFIG_OF) && defined(CONFIG_PCI_MSI) -int of_pci_msi_chip_add(struct msi_controller *chip); -void of_pci_msi_chip_remove(struct msi_controller *chip); -struct msi_controller *of_pci_find_msi_chip_by_node(struct device_node *of_node); -#else -static inline int of_pci_msi_chip_add(struct msi_controller *chip) { return -EINVAL; } -static inline void of_pci_msi_chip_remove(struct msi_controller *chip) { } -static inline struct msi_controller * -of_pci_find_msi_chip_by_node(struct device_node *of_node) { return NULL; } -#endif - #endif -- cgit v1.2.3 From 85c73d50e57eb8ad43955fe38714bc5fba1acd92 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 2 Mar 2017 15:48:05 +0200 Subject: gpio: acpi: Add managed variant of acpi_dev_add_driver_gpios() Introduce device managed variant of acpi_dev_add_driver_gpios() and its counterpart acpi_dev_remove_driver_gpios(). The functions in most cases are used in driver's ->probe() and ->remove() callbacks, that's why it's useful to have managed variant of them. Signed-off-by: Andy Shevchenko Acked-by: Rafael J. Wysocki Signed-off-by: Linus Walleij --- include/linux/acpi.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 673acda012af..c8eaaad4a9ed 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -952,6 +952,10 @@ static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev) adev->driver_gpios = NULL; } +int devm_acpi_dev_add_driver_gpios(struct device *dev, + const struct acpi_gpio_mapping *gpios); +void devm_acpi_dev_remove_driver_gpios(struct device *dev); + int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index); #else static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev, @@ -961,6 +965,13 @@ static inline int acpi_dev_add_driver_gpios(struct acpi_device *adev, } static inline void acpi_dev_remove_driver_gpios(struct acpi_device *adev) {} +static inline int devm_acpi_dev_add_driver_gpios(struct device *dev, + const struct acpi_gpio_mapping *gpios) +{ + return -ENXIO; +} +static inline void devm_acpi_dev_remove_driver_gpios(struct device *dev) {} + static inline int acpi_dev_gpio_irq_get(struct acpi_device *adev, int index) { return -ENXIO; -- cgit v1.2.3 From 22c403676dbbb7c6f186099527af7f065498ef45 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sun, 12 Feb 2017 17:13:55 -0800 Subject: gpio: return NULL from gpiod_get_optional when GPIOLIB is disabled Given the intent behind gpiod_get_optional() and friends it does not make sense to return -ENOSYS when GPIOLIB is disabled: the driver is expected to work just fine without gpio so let's behave as if gpio was not found. Otherwise we have to special-case -ENOSYS in drivers. Note that there was objection that someone might forget to enable GPIOLIB when dealing with a platform that has device that actually specifies optional gpio and we'll break it. I find this unconvincing as that would have to be the *only GPIO* in the system, which is extremely unlikely. Signed-off-by: Dmitry Torokhov Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 2484b2fcc6eb..13daf08e25bb 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -179,14 +179,14 @@ static inline struct gpio_desc *__must_check gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline struct gpio_desc *__must_check gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline struct gpio_descs *__must_check @@ -200,7 +200,7 @@ static inline struct gpio_descs *__must_check gpiod_get_array_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline void gpiod_put(struct gpio_desc *desc) @@ -240,14 +240,14 @@ static inline struct gpio_desc *__must_check devm_gpiod_get_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline struct gpio_desc *__must_check devm_gpiod_get_index_optional(struct device *dev, const char *con_id, unsigned int index, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline struct gpio_descs *__must_check @@ -261,7 +261,7 @@ static inline struct gpio_descs *__must_check devm_gpiod_get_array_optional(struct device *dev, const char *con_id, enum gpiod_flags flags) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline void devm_gpiod_put(struct device *dev, struct gpio_desc *desc) -- cgit v1.2.3 From 1d585e70905e03e8c19c9aaf523ec246ae6b18a1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 8 Mar 2017 13:56:06 +0530 Subject: trace/kprobes: Fix check for kretprobe offset within function entry perf specifies an offset from _text and since this offset is fed directly into the arch-specific helper, kprobes tracer rejects installation of kretprobes through perf. Fix this by looking up the actual offset from a function for the specified sym+offset. Refactor and reuse existing routines to limit code duplication -- we repurpose kprobe_addr() for determining final kprobe address and we split out the function entry offset determination into a separate generic helper. Before patch: naveen@ubuntu:~/linux/tools/perf$ sudo ./perf probe -v do_open%return probe-definition(0): do_open%return symbol:do_open file:(null) line:0 offset:0 return:1 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /boot/vmlinux for symbols Open Debuginfo file: /boot/vmlinux Try to find probe point from debuginfo. Matched function: do_open [2d0c7ff] Probe point found: do_open+0 Matched function: do_open [35d76dc] found inline addr: 0xc0000000004ba9c4 Failed to find "do_open%return", because do_open is an inlined function and has no return point. An error occurred in debuginfo analysis (-22). Trying to use symbols. Opening /sys/kernel/debug/tracing//README write=0 Opening /sys/kernel/debug/tracing//kprobe_events write=1 Writing event: r:probe/do_open _text+4469776 Failed to write event: Invalid argument Error: Failed to add events. Reason: Invalid argument (Code: -22) naveen@ubuntu:~/linux/tools/perf$ dmesg | tail [ 33.568656] Given offset is not valid for return probe. After patch: naveen@ubuntu:~/linux/tools/perf$ sudo ./perf probe -v do_open%return probe-definition(0): do_open%return symbol:do_open file:(null) line:0 offset:0 return:1 lazy:(null) 0 arguments Looking at the vmlinux_path (8 entries long) Using /boot/vmlinux for symbols Open Debuginfo file: /boot/vmlinux Try to find probe point from debuginfo. Matched function: do_open [2d0c7d6] Probe point found: do_open+0 Matched function: do_open [35d76b3] found inline addr: 0xc0000000004ba9e4 Failed to find "do_open%return", because do_open is an inlined function and has no return point. An error occurred in debuginfo analysis (-22). Trying to use symbols. Opening /sys/kernel/debug/tracing//README write=0 Opening /sys/kernel/debug/tracing//kprobe_events write=1 Writing event: r:probe/do_open _text+4469808 Writing event: r:probe/do_open_1 _text+4956344 Added new events: probe:do_open (on do_open%return) probe:do_open_1 (on do_open%return) You can now use it in all perf tools, such as: perf record -e probe:do_open_1 -aR sleep 1 naveen@ubuntu:~/linux/tools/perf$ sudo cat /sys/kernel/debug/kprobes/list c000000000041370 k kretprobe_trampoline+0x0 [OPTIMIZED] c0000000004ba0b8 r do_open+0x8 [DISABLED] c000000000443430 r do_open+0x0 [DISABLED] Signed-off-by: Naveen N. Rao Acked-by: Masami Hiramatsu Cc: Ananth N Mavinakayanahalli Cc: Michael Ellerman Cc: Steven Rostedt Cc: linuxppc-dev@lists.ozlabs.org Link: http://lkml.kernel.org/r/d8cd1ef420ec22e3643ac332fdabcffc77319a42.1488961018.git.naveen.n.rao@linux.vnet.ibm.com Signed-off-by: Arnaldo Carvalho de Melo --- include/linux/kprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 177bdf6c6aeb..47e4da5b4fa2 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -268,6 +268,7 @@ extern void show_registers(struct pt_regs *regs); extern void kprobes_inc_nmissed_count(struct kprobe *p); extern bool arch_within_kprobe_blacklist(unsigned long addr); extern bool arch_function_offset_within_entry(unsigned long offset); +extern bool function_offset_within_entry(kprobe_opcode_t *addr, const char *sym, unsigned long offset); extern bool within_kprobe_blacklist(unsigned long addr); -- cgit v1.2.3 From 56f36acd215cf7c28372b2fdb4f33f6900e97e05 Mon Sep 17 00:00:00 2001 From: Amritha Nambiar Date: Wed, 15 Mar 2017 10:39:25 -0700 Subject: mqprio: Modify mqprio to pass user parameters via ndo_setup_tc. The configurable priority to traffic class mapping and the user specified queue ranges are used to configure the traffic class, overriding the hardware defaults when the 'hw' option is set to 0. However, when the 'hw' option is non-zero, the hardware QOS defaults are used. This patch makes it so that we can pass the data the user provided to ndo_setup_tc. This allows us to pull in the queue configuration if the user requested it as well as any additional hardware offload type requested by using a value other than 1 for the hw value. Finally it also provides a means for the device driver to return the level supported for the offload type via the qopt->hw value. Previously we were just always assuming the value to be 1, in the future values beyond just 1 may be supported. Signed-off-by: Amritha Nambiar Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97456b2539e4..b7365b587818 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -786,11 +786,11 @@ struct tc_cls_u32_offload; struct tc_to_netdev { unsigned int type; union { - u8 tc; struct tc_cls_u32_offload *cls_u32; struct tc_cls_flower_offload *cls_flower; struct tc_cls_matchall_offload *cls_mall; struct tc_cls_bpf_offload *cls_bpf; + struct tc_mqprio_qopt *mqprio; }; bool egress_dev; }; -- cgit v1.2.3 From f4c0b0aa58d9b7e30ab0a95e33da84d53b3d764a Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Mon, 20 Feb 2017 15:33:50 +0200 Subject: perf/core: Keep AUX flags in the output handle In preparation for adding more flags to perf AUX records, introduce a separate API for setting the flags for a session, rather than appending more bool arguments to perf_aux_output_end. This allows to set each flag at the time a corresponding condition is detected, instead of tracking it in each driver's private state. Signed-off-by: Will Deacon Signed-off-by: Alexander Shishkin Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: vince@deater.net Link: http://lkml.kernel.org/r/20170220133352.17995-3-alexander.shishkin@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/coresight.h | 2 +- include/linux/perf_event.h | 8 +++++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index 2a5982c37dfb..035c16c9a505 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -201,7 +201,7 @@ struct coresight_ops_sink { void *sink_config); unsigned long (*reset_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, - void *sink_config, bool *lost); + void *sink_config); void (*update_buffer)(struct coresight_device *csdev, struct perf_output_handle *handle, void *sink_config); diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index f19a82362851..b6e75c9d4791 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -801,6 +801,7 @@ struct perf_output_handle { struct ring_buffer *rb; unsigned long wakeup; unsigned long size; + u64 aux_flags; union { void *addr; unsigned long head; @@ -849,10 +850,11 @@ perf_cgroup_from_task(struct task_struct *task, struct perf_event_context *ctx) extern void *perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event); extern void perf_aux_output_end(struct perf_output_handle *handle, - unsigned long size, bool truncated); + unsigned long size); extern int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size); extern void *perf_get_aux(struct perf_output_handle *handle); +extern void perf_aux_output_flag(struct perf_output_handle *handle, u64 flags); extern int perf_pmu_register(struct pmu *pmu, const char *name, int type); extern void perf_pmu_unregister(struct pmu *pmu); @@ -1268,8 +1270,8 @@ static inline void * perf_aux_output_begin(struct perf_output_handle *handle, struct perf_event *event) { return NULL; } static inline void -perf_aux_output_end(struct perf_output_handle *handle, unsigned long size, - bool truncated) { } +perf_aux_output_end(struct perf_output_handle *handle, unsigned long size) + { } static inline int perf_aux_output_skip(struct perf_output_handle *handle, unsigned long size) { return -EINVAL; } -- cgit v1.2.3 From 6419c4af777a773a45a1b1af735de0fcd9a7dcc7 Mon Sep 17 00:00:00 2001 From: "J. R. Okajima" Date: Fri, 3 Feb 2017 01:38:17 +0900 Subject: locking/lockdep: Add new check to lock_downgrade() Commit: f8319483f57f ("locking/lockdep: Provide a type check for lock_is_held") didn't fully cover rwsems as downgrade_write() was left out. Introduce lock_downgrade() and use it to add new checks. See-also: http://marc.info/?l=linux-kernel&m=148581164003149&w=2 Originally-written-by: Peter Zijlstra Signed-off-by: J. R. Okajima Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1486053497-9948-3-git-send-email-hooanon05g@gmail.com [ Rewrote the changelog. ] Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 1e327bb80838..fffe49f188e6 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -361,6 +361,8 @@ static inline void lock_set_subclass(struct lockdep_map *lock, lock_set_class(lock, lock->name, lock->key, subclass, ip); } +extern void lock_downgrade(struct lockdep_map *lock, unsigned long ip); + extern void lockdep_set_current_reclaim_state(gfp_t gfp_mask); extern void lockdep_clear_current_reclaim_state(void); extern void lockdep_trace_alloc(gfp_t mask); @@ -411,6 +413,7 @@ static inline void lockdep_on(void) # define lock_acquire(l, s, t, r, c, n, i) do { } while (0) # define lock_release(l, n, i) do { } while (0) +# define lock_downgrade(l, i) do { } while (0) # define lock_set_class(l, n, k, s, i) do { } while (0) # define lock_set_subclass(l, s, i) do { } while (0) # define lockdep_set_current_reclaim_state(g) do { } while (0) -- cgit v1.2.3 From 383776fa7527745224446337f2dcfb0f0d1b8b56 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 27 Feb 2017 15:37:36 +0100 Subject: locking/lockdep: Handle statically initialized PER_CPU locks properly If a PER_CPU struct which contains a spin_lock is statically initialized via: DEFINE_PER_CPU(struct foo, bla) = { .lock = __SPIN_LOCK_UNLOCKED(bla.lock) }; then lockdep assigns a seperate key to each lock because the logic for assigning a key to statically initialized locks is to use the address as the key. With per CPU locks the address is obvioulsy different on each CPU. That's wrong, because all locks should have the same key. To solve this the following modifications are required: 1) Extend the is_kernel/module_percpu_addr() functions to hand back the canonical address of the per CPU address, i.e. the per CPU address minus the per CPU offset. 2) Check the lock address with these functions and if the per CPU check matches use the returned canonical address as the lock key, so all per CPU locks have the same key. 3) Move the static_obj(key) check into look_up_lock_class() so this check can be avoided for statically initialized per CPU locks. That's required because the canonical address fails the static_obj(key) check for obvious reasons. Reported-by: Mike Galbraith Signed-off-by: Thomas Gleixner [ Merged Dan's fixups for !MODULES and !SMP into this patch. ] Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Dan Murphy Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/20170227143736.pectaimkjkan5kow@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/module.h | 6 ++++++ include/linux/percpu.h | 1 + 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 0297c5cd7cdf..9ad68561d8c2 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -493,6 +493,7 @@ static inline int module_is_live(struct module *mod) struct module *__module_text_address(unsigned long addr); struct module *__module_address(unsigned long addr); bool is_module_address(unsigned long addr); +bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr); bool is_module_percpu_address(unsigned long addr); bool is_module_text_address(unsigned long addr); @@ -660,6 +661,11 @@ static inline bool is_module_percpu_address(unsigned long addr) return false; } +static inline bool __is_module_percpu_address(unsigned long addr, unsigned long *can_addr) +{ + return false; +} + static inline bool is_module_text_address(unsigned long addr) { return false; diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 56939d3f6e53..491b3f5a5f8a 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -110,6 +110,7 @@ extern int __init pcpu_page_first_chunk(size_t reserved_size, #endif extern void __percpu *__alloc_reserved_percpu(size_t size, size_t align); +extern bool __is_kernel_percpu_address(unsigned long addr, unsigned long *can_addr); extern bool is_kernel_percpu_address(unsigned long addr); #if !defined(CONFIG_SMP) || !defined(CONFIG_HAVE_SETUP_PER_CPU_AREA) -- cgit v1.2.3 From ef88f33fc1ee0a12a1e5eee7e4f70b7743100a19 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 2 Mar 2017 12:51:15 +0100 Subject: USB: serial: clean up endpoint and port-counter types Use unsigned-char type for the endpoint and port counters. Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 704a1ab8240c..85b475933848 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -159,10 +159,10 @@ struct usb_serial { unsigned char minors_reserved:1; unsigned char num_ports; unsigned char num_port_pointers; - char num_interrupt_in; - char num_interrupt_out; - char num_bulk_in; - char num_bulk_out; + unsigned char num_interrupt_in; + unsigned char num_interrupt_out; + unsigned char num_bulk_in; + unsigned char num_bulk_out; struct usb_serial_port *port[MAX_NUM_PORTS]; struct kref kref; struct mutex disc_mutex; @@ -227,13 +227,14 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data) struct usb_serial_driver { const char *description; const struct usb_device_id *id_table; - char num_ports; struct list_head driver_list; struct device_driver driver; struct usb_driver *usb_driver; struct usb_dynids dynids; + unsigned char num_ports; + size_t bulk_in_size; size_t bulk_out_size; -- cgit v1.2.3 From ff0c5703a4b11fca86886e5b7ce40c396bef8381 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 2 Mar 2017 12:51:17 +0100 Subject: USB: serial: allow up to 16 ports per device Raise the arbitrary limit of how many ports a single device can claim from eight to 16. This specifically enables the upper eight ports of some mxuport devices. Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index 85b475933848..ee4394d8932f 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -20,7 +20,7 @@ #include /* The maximum number of ports one device can grab at once */ -#define MAX_NUM_PORTS 8 +#define MAX_NUM_PORTS 16 /* parity check flag */ #define RELEVANT_IFLAG(iflag) (iflag & (IGNBRK|BRKINT|IGNPAR|PARMRK|INPCK)) -- cgit v1.2.3 From 92e6b2c675e1d247317ec41a078f49aaade7f716 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 2 Mar 2017 12:51:19 +0100 Subject: USB: serial: add endpoint sanity check to core Allow drivers to specify a minimum number of endpoints per type, which USB serial core will verify after subdriver probe has returned (where the current alternate setting may have been changed). Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index ee4394d8932f..f1b8a8493762 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -188,6 +188,10 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data) * @id_table: pointer to a list of usb_device_id structures that define all * of the devices this structure can support. * @num_ports: the number of different ports this device will have. + * @num_bulk_in: minimum number of bulk-in endpoints + * @num_bulk_out: minimum number of bulk-out endpoints + * @num_interrupt_in: minimum number of interrupt-in endpoints + * @num_interrupt_out: minimum number of interrupt-out endpoints * @bulk_in_size: minimum number of bytes to allocate for bulk-in buffer * (0 = end-point size) * @bulk_out_size: bytes to allocate for bulk-out buffer (0 = end-point size) @@ -235,6 +239,11 @@ struct usb_serial_driver { unsigned char num_ports; + unsigned char num_bulk_in; + unsigned char num_bulk_out; + unsigned char num_interrupt_in; + unsigned char num_interrupt_out; + size_t bulk_in_size; size_t bulk_out_size; -- cgit v1.2.3 From 81ed18ab3098b6519274545e80a29caacb77d160 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 15 Mar 2017 18:26:42 -0700 Subject: bpf: add helper inlining infra and optimize map_array lookup Optimize bpf_call -> bpf_map_lookup_elem() -> array_map_lookup_elem() into a sequence of bpf instructions. When JIT is on the sequence of bpf instructions is the sequence of native cpu instructions with significantly faster performance than indirect call and two function's prologue/epilogue. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/linux/bpf_verifier.h | 5 ++++- include/linux/filter.h | 10 ++++++++++ 3 files changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 909fc033173a..da8c64ca8dc9 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -35,6 +35,7 @@ struct bpf_map_ops { void *(*map_fd_get_ptr)(struct bpf_map *map, struct file *map_file, int fd); void (*map_fd_put_ptr)(void *ptr); + u32 (*map_gen_lookup)(struct bpf_map *map, struct bpf_insn *insn_buf); }; struct bpf_map { diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index a13b031dc6b8..5efb4db44e1e 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -66,7 +66,10 @@ struct bpf_verifier_state_list { }; struct bpf_insn_aux_data { - enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + union { + enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ + struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + }; }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ diff --git a/include/linux/filter.h b/include/linux/filter.h index fbf7b39e8103..dffa072b7b79 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -693,6 +693,11 @@ static inline bool bpf_jit_is_ebpf(void) # endif } +static inline bool ebpf_jit_enabled(void) +{ + return bpf_jit_enable && bpf_jit_is_ebpf(); +} + static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return fp->jited && bpf_jit_is_ebpf(); @@ -753,6 +758,11 @@ void bpf_prog_kallsyms_del(struct bpf_prog *fp); #else /* CONFIG_BPF_JIT */ +static inline bool ebpf_jit_enabled(void) +{ + return false; +} + static inline bool bpf_prog_ebpf_jited(const struct bpf_prog *fp) { return false; -- cgit v1.2.3 From 2c93e790e8253552227bf9b46a8d49dca3f71b06 Mon Sep 17 00:00:00 2001 From: yuan linyu Date: Sat, 25 Feb 2017 19:20:55 +0800 Subject: usb: add CONFIG_USB_PCI for system have both PCI HW and non-PCI based USB HW a lot of embeded system SOC (e.g. freescale T2080) have both PCI and USB modules. But USB module is controlled by registers directly, it have no relationship with PCI module. when say N here it will not build PCI related code in USB driver. Signed-off-by: yuan linyu Acked-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 40edf6a8533e..dff130151235 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -453,7 +453,7 @@ extern int usb_hcd_find_raw_port_number(struct usb_hcd *hcd, int port1); struct platform_device; extern void usb_hcd_platform_shutdown(struct platform_device *dev); -#ifdef CONFIG_PCI +#ifdef CONFIG_USB_PCI struct pci_dev; struct pci_device_id; extern int usb_hcd_pci_probe(struct pci_dev *dev, @@ -466,7 +466,7 @@ extern int usb_hcd_amd_remote_wakeup_quirk(struct pci_dev *dev); #ifdef CONFIG_PM extern const struct dev_pm_ops usb_hcd_pci_pm_ops; #endif -#endif /* CONFIG_PCI */ +#endif /* CONFIG_USB_PCI */ /* pci-ish (pdev null is ok) buffer alloc/mapping support */ void usb_init_pool_max(void); -- cgit v1.2.3 From 5095cb89c62acc78b4cfaeb9a4072979d010510a Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 21 Feb 2017 19:59:47 +0900 Subject: usb: of: add functions to bind a companion controller EHCI controllers will have a companion controller. However, on platform bus, there was difficult to bind them in previous code. So, this patch adds helper functions to bind them using a "companion" property. Signed-off-by: Yoshihiro Shimoda Acked-by: Rob Herring Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/of.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h index 5ff9032ee1b4..4031f47629ec 100644 --- a/include/linux/usb/of.h +++ b/include/linux/usb/of.h @@ -18,6 +18,7 @@ int of_usb_update_otg_caps(struct device_node *np, struct usb_otg_caps *otg_caps); struct device_node *usb_of_get_child_node(struct device_node *parent, int portnum); +struct device *usb_of_get_companion_dev(struct device *dev); #else static inline enum usb_dr_mode of_usb_get_dr_mode_by_phy(struct device_node *np, int arg0) @@ -38,6 +39,10 @@ static inline struct device_node *usb_of_get_child_node { return NULL; } +static inline struct device *usb_of_get_companion_dev(struct device *dev) +{ + return NULL; +} #endif #if IS_ENABLED(CONFIG_OF) && IS_ENABLED(CONFIG_USB_SUPPORT) -- cgit v1.2.3 From 9d1d994d33fe4863c8caeeaac264664815f4c321 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Thu, 16 Mar 2017 08:14:16 -0700 Subject: linux/serdev.h: Replace 'ctrl->serdev' with 'serdev' Replace 'ctrl->serdev' with 'serdev' in serdev_controller_write_wakeup() and serdev_controller_receive_buf(). Cc: Rob Herring Cc: cphealy@gmail.com Cc: linux-serial@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Rob Herring Signed-off-by: Andrey Smirnov Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 9519da6253a8..5176cdc2057f 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -165,7 +165,7 @@ static inline void serdev_controller_write_wakeup(struct serdev_controller *ctrl if (!serdev || !serdev->ops->write_wakeup) return; - serdev->ops->write_wakeup(ctrl->serdev); + serdev->ops->write_wakeup(serdev); } static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, @@ -177,7 +177,7 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, if (!serdev || !serdev->ops->receive_buf) return -EINVAL; - return serdev->ops->receive_buf(ctrl->serdev, data, count); + return serdev->ops->receive_buf(serdev, data, count); } #if IS_ENABLED(CONFIG_SERIAL_DEV_BUS) -- cgit v1.2.3 From 0f4f0c8ff1da9171bca0dc01ce5551e8b6d2f0f3 Mon Sep 17 00:00:00 2001 From: Moritz Fischer Date: Mon, 27 Feb 2017 09:19:00 -0600 Subject: fpga: Add flag to indicate bitstream needs decrypting Add a flag that is passed to the write_init() callback, indicating that the bitstream is encrypted. The low-level driver will deal with the flag, or return an error, if encrypted bitstreams are not supported. Signed-off-by: Moritz Fischer Acked-by: Michal Simek Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- include/linux/fpga/fpga-mgr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index 57beb5d09bfc..e2ef94fd37af 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -70,6 +70,7 @@ enum fpga_mgr_states { */ #define FPGA_MGR_PARTIAL_RECONFIG BIT(0) #define FPGA_MGR_EXTERNAL_CONFIG BIT(1) +#define FPGA_MGR_ENCRYPTED_BITSTREAM BIT(2) /** * struct fpga_image_info - information specific to a FPGA image -- cgit v1.2.3 From 8b1f91fb4c1a8a860b8edc0c383821b2ff8a1ece Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 4 Mar 2017 18:27:12 -0700 Subject: vmbus: remove useless return's No need for empty return at end of void function Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 62bbf3c1aa4a..2b1ed66824be 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -1500,8 +1500,6 @@ static inline void hv_signal_on_read(struct vmbus_channel *channel) cached_write_sz = hv_get_cached_bytes_to_write(rbi); if (cached_write_sz < pending_sz) vmbus_setevent(channel); - - return; } static inline void -- cgit v1.2.3 From 2a9d7de2038e87bb2a1085ac73c4246c260263f0 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 4 Mar 2017 18:27:17 -0700 Subject: vmbus: cleanup header file style Minor changes to align hyper-v vmbus include files with current linux kernel style. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 2b1ed66824be..de9b80ff6698 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -524,10 +524,10 @@ struct vmbus_channel_open_channel { u32 target_vp; /* - * The upstream ring buffer begins at offset zero in the memory - * described by RingBufferGpadlHandle. The downstream ring buffer - * follows it at this offset (in pages). - */ + * The upstream ring buffer begins at offset zero in the memory + * described by RingBufferGpadlHandle. The downstream ring buffer + * follows it at this offset (in pages). + */ u32 downstream_ringbuffer_pageoffset; /* User-specific data to be passed along to the server endpoint. */ @@ -1006,7 +1006,7 @@ extern int vmbus_open(struct vmbus_channel *channel, u32 recv_ringbuffersize, void *userdata, u32 userdatalen, - void(*onchannel_callback)(void *context), + void (*onchannel_callback)(void *context), void *context); extern void vmbus_close(struct vmbus_channel *channel); @@ -1421,7 +1421,7 @@ struct hyperv_service_callback { char *log_msg; uuid_le data; struct vmbus_channel *channel; - void (*callback) (void *context); + void (*callback)(void *context); }; #define MAX_SRV_VER 0x7ffffff -- cgit v1.2.3 From 4827ee1dca5691c9fc568883170a568db94f9b38 Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sat, 4 Mar 2017 18:27:18 -0700 Subject: vmbus: expose debug info for drivers Allow driver to get debug information about state of the ring. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index de9b80ff6698..1fa727fe5f93 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -491,6 +491,12 @@ struct vmbus_channel_rescind_offer { u32 child_relid; } __packed; +static inline u32 +hv_ringbuffer_pending_size(const struct hv_ring_buffer_info *rbi) +{ + return rbi->ring_buffer->pending_send_sz; +} + /* * Request Offer -- no parameters, SynIC message contains the partition ID * Set Snoop -- no parameters, SynIC message contains the partition ID @@ -1148,6 +1154,17 @@ static inline void *hv_get_drvdata(struct hv_device *dev) return dev_get_drvdata(&dev->device); } +struct hv_ring_buffer_debug_info { + u32 current_interrupt_mask; + u32 current_read_index; + u32 current_write_index; + u32 bytes_avail_toread; + u32 bytes_avail_towrite; +}; + +void hv_ringbuffer_get_debuginfo(const struct hv_ring_buffer_info *ring_info, + struct hv_ring_buffer_debug_info *debug_info); + /* Vmbus interface */ #define vmbus_driver_register(driver) \ __vmbus_driver_register(driver, THIS_MODULE, KBUILD_MODNAME) -- cgit v1.2.3 From b5bc980a4929bb2a449fef3e0b7131466815d0b1 Mon Sep 17 00:00:00 2001 From: Martyn Welch Date: Sat, 4 Mar 2017 00:34:29 +0000 Subject: docs: Add kernel-doc comments to VME driver API Add kernel-doc comments to the VME driver API and structures. This documentation will be integrated into the RST documentation in a later patch. Signed-off-by: Martyn Welch Signed-off-by: Greg Kroah-Hartman --- include/linux/vme.h | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vme.h b/include/linux/vme.h index ec5e8bf6118e..25874da3f2e1 100644 --- a/include/linux/vme.h +++ b/include/linux/vme.h @@ -92,7 +92,7 @@ extern struct bus_type vme_bus_type; #define VME_SLOT_ALL -2 /** - * Structure representing a VME device + * struct vme_dev - Structure representing a VME device * @num: The device number * @bridge: Pointer to the bridge device this device is on * @dev: Internal device structure @@ -107,6 +107,16 @@ struct vme_dev { struct list_head bridge_list; }; +/** + * struct vme_driver - Structure representing a VME driver + * @name: Driver name, should be unique among VME drivers and usually the same + * as the module name. + * @match: Callback used to determine whether probe should be run. + * @probe: Callback for device binding, called when new device is detected. + * @remove: Callback, called on device removal. + * @driver: Underlying generic device driver structure. + * @devices: List of VME devices (struct vme_dev) associated with this driver. + */ struct vme_driver { const char *name; int (*match)(struct vme_dev *); -- cgit v1.2.3 From 77f88796cee819b9c4562b0b6b44691b3b7755b1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 16 Mar 2017 16:54:24 -0400 Subject: cgroup, kthread: close race window where new kthreads can be migrated to non-root cgroups Creation of a kthread goes through a couple interlocked stages between the kthread itself and its creator. Once the new kthread starts running, it initializes itself and wakes up the creator. The creator then can further configure the kthread and then let it start doing its job by waking it up. In this configuration-by-creator stage, the creator is the only one that can wake it up but the kthread is visible to userland. When altering the kthread's attributes from userland is allowed, this is fine; however, for cases where CPU affinity is critical, kthread_bind() is used to first disable affinity changes from userland and then set the affinity. This also prevents the kthread from being migrated into non-root cgroups as that can affect the CPU affinity and many other things. Unfortunately, the cgroup side of protection is racy. While the PF_NO_SETAFFINITY flag prevents further migrations, userland can win the race before the creator sets the flag with kthread_bind() and put the kthread in a non-root cgroup, which can lead to all sorts of problems including incorrect CPU affinity and starvation. This bug got triggered by userland which periodically tries to migrate all processes in the root cpuset cgroup to a non-root one. Per-cpu workqueue workers got caught while being created and ended up with incorrected CPU affinity breaking concurrency management and sometimes stalling workqueue execution. This patch adds task->no_cgroup_migration which disallows the task to be migrated by userland. kthreadd starts with the flag set making every child kthread start in the root cgroup with migration disallowed. The flag is cleared after the kthread finishes initialization by which time PF_NO_SETAFFINITY is set if the kthread should stay in the root cgroup. It'd be better to wait for the initialization instead of failing but I couldn't think of a way of implementing that without adding either a new PF flag, or sleeping and retrying from waiting side. Even if userland depends on changing cgroup membership of a kthread, it either has to be synchronized with kthread_create() or periodically repeat, so it's unlikely that this would break anything. v2: Switch to a simpler implementation using a new task_struct bit field suggested by Oleg. Signed-off-by: Tejun Heo Suggested-by: Oleg Nesterov Cc: Linus Torvalds Cc: Andrew Morton Cc: Peter Zijlstra (Intel) Cc: Thomas Gleixner Reported-and-debugged-by: Chris Mason Cc: stable@vger.kernel.org # v4.3+ (we can't close the race on < v4.3) Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 21 +++++++++++++++++++++ include/linux/sched.h | 4 ++++ 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index f6b43fbb141c..af9c86e958bd 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -570,6 +570,25 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } +static inline void cgroup_init_kthreadd(void) +{ + /* + * kthreadd is inherited by all kthreads, keep it in the root so + * that the new kthreads are guaranteed to stay in the root until + * initialization is finished. + */ + current->no_cgroup_migration = 1; +} + +static inline void cgroup_kthread_ready(void) +{ + /* + * This kthread finished initialization. The creator should have + * set PF_NO_SETAFFINITY if this kthread should stay in the root. + */ + current->no_cgroup_migration = 0; +} + #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; @@ -590,6 +609,8 @@ static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } +static inline void cgroup_init_kthreadd(void) {} +static inline void cgroup_kthread_ready(void) {} static inline bool task_under_cgroup_hierarchy(struct task_struct *task, struct cgroup *ancestor) diff --git a/include/linux/sched.h b/include/linux/sched.h index d67eee84fd43..4cf9a59a4d08 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -604,6 +604,10 @@ struct task_struct { #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif +#ifdef CONFIG_CGROUPS + /* disallow userland-initiated cgroup migration */ + unsigned no_cgroup_migration:1; +#endif unsigned long atomic_flags; /* Flags requiring atomic access. */ -- cgit v1.2.3 From 016da20148a1692e34d35d1f1787400a2a2d2c58 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 16 Mar 2017 18:08:13 -0700 Subject: hrtimer: Remove hrtimer_peek_ahead_timers() leftovers This function was removed in commit c6eb3f70d448 (hrtimer: Get rid of hrtimer softirq, 2015-04-14) but the prototype wasn't ever deleted. Delete it now. Signed-off-by: Stephen Boyd Link: http://lkml.kernel.org/r/20170317010814.2591-1-sboyd@codeaurora.org Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 249e579ecd4c..23d58fcd4d9a 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -276,8 +276,6 @@ static inline int hrtimer_is_hres_active(struct hrtimer *timer) return timer->base->cpu_base->hres_active; } -extern void hrtimer_peek_ahead_timers(void); - /* * The resolution of the clocks. The resolution value is returned in * the clock_getres() system call to give application programmers an @@ -300,8 +298,6 @@ extern unsigned int hrtimer_resolution; #define hrtimer_resolution (unsigned int)LOW_RES_NSEC -static inline void hrtimer_peek_ahead_timers(void) { } - static inline int hrtimer_is_hres_active(struct hrtimer *timer) { return 0; -- cgit v1.2.3 From d5b72a2123dfaf9416b1a1177b4be041f8a8b6d4 Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 17 Mar 2017 17:34:49 +0100 Subject: dma-fence: add dma_fence_match_context helper Add a helper to check if all fences in a fence array are from a given context. For convenience, the function can also handle being given a non-array fence. Signed-off-by: Philipp Zabel Reviewed-by: Gustavo Padovan Acked-by: Sumit Semwal Signed-off-by: Sumit Semwal Link: http://patchwork.freedesktop.org/patch/msgid/1489768492-25190-1-git-send-email-p.zabel@pengutronix.de --- include/linux/dma-fence-array.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-fence-array.h b/include/linux/dma-fence-array.h index 5900945f962d..332a5420243c 100644 --- a/include/linux/dma-fence-array.h +++ b/include/linux/dma-fence-array.h @@ -83,4 +83,6 @@ struct dma_fence_array *dma_fence_array_create(int num_fences, u64 context, unsigned seqno, bool signal_on_any); +bool dma_fence_match_context(struct dma_fence *fence, u64 context); + #endif /* __LINUX_DMA_FENCE_ARRAY_H */ -- cgit v1.2.3 From b59f65fa076a8eac2ff3a8ab7f8e1705b9fa86cb Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 16 Mar 2017 18:26:53 +0300 Subject: mm/gup: Implement the dev_pagemap() logic in the generic get_user_pages_fast() function This is a preparation patch for the transition of x86 to the generic GUP_fast() implementation. Prepare generic GUP_fast() to handle dev_pagemap(). At the moment, it's only implemented on x86. On non-x86, the new code will be compiled out. Signed-off-by: Kirill A. Shutemov Cc: Andrew Morton Cc: Aneesh Kumar K . V Cc: Borislav Petkov Cc: Catalin Marinas Cc: Dan Williams Cc: Dann Frazier Cc: Dave Hansen Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Rik van Riel Cc: Steve Capper Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170316152655.37789-6-kirill.shutemov@linux.intel.com Signed-off-by: Ingo Molnar --- include/linux/mm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 5f01c88f0800..e197d3ca3e8a 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -430,6 +430,10 @@ static inline int pud_devmap(pud_t pud) { return 0; } +static inline int pgd_devmap(pgd_t pgd) +{ + return 0; +} #endif /* -- cgit v1.2.3 From 3843832fc8cadc2d48ba4ea4cd350a696906ac42 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Tue, 28 Feb 2017 17:19:24 +0200 Subject: clk: tegra: Handle UTMIPLL IDDQ Export UTMIPLL IDDQ functions. These will be needed when powergating the XUSB partition. Signed-off-by: BH Hsieh Signed-off-by: Peter De Schrijver Signed-off-by: Thierry Reding --- include/linux/clk/tegra.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index 7007a5f48080..e17d32831e28 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -125,5 +125,7 @@ extern void tegra210_xusb_pll_hw_control_enable(void); extern void tegra210_xusb_pll_hw_sequence_start(void); extern void tegra210_sata_pll_hw_control_enable(void); extern void tegra210_sata_pll_hw_sequence_start(void); +extern void tegra210_put_utmipll_in_iddq(void); +extern void tegra210_put_utmipll_out_iddq(void); #endif /* __LINUX_CLK_TEGRA_H_ */ -- cgit v1.2.3 From 59af78d78db8bde6a63e09772aa44192f772fa96 Mon Sep 17 00:00:00 2001 From: Peter De Schrijver Date: Wed, 15 Mar 2017 17:42:05 +0200 Subject: clk: tegra: Add SATA seq input control This will be used by the powergating driver to ensure proper sequencer state when the SATA domain is powergated. Signed-off-by: Peter De Schrijver Signed-off-by: Thierry Reding --- include/linux/clk/tegra.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk/tegra.h b/include/linux/clk/tegra.h index e17d32831e28..d23c9cf26993 100644 --- a/include/linux/clk/tegra.h +++ b/include/linux/clk/tegra.h @@ -125,6 +125,7 @@ extern void tegra210_xusb_pll_hw_control_enable(void); extern void tegra210_xusb_pll_hw_sequence_start(void); extern void tegra210_sata_pll_hw_control_enable(void); extern void tegra210_sata_pll_hw_sequence_start(void); +extern void tegra210_set_sata_pll_seq_sw(bool state); extern void tegra210_put_utmipll_in_iddq(void); extern void tegra210_put_utmipll_out_iddq(void); -- cgit v1.2.3 From 79170fda313ed5be2394f87aa2a00d597f8ed4a1 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 20 Mar 2017 01:16:24 -0700 Subject: x86/syscalls/32: Wire up arch_prctl on x86-32 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hook up arch_prctl to call do_arch_prctl() on x86-32, and in 32 bit compat mode on x86-64. This allows to have arch_prctls that are not specific to 64 bits. On UML, simply stub out this syscall. Signed-off-by: Kyle Huey Cc: Grzegorz Andrejczuk Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Peter Zijlstra Cc: Dave Hansen Cc: Andi Kleen Cc: linux-kselftest@vger.kernel.org Cc: Nadav Amit Cc: Robert O'Callahan Cc: Richard Weinberger Cc: "Rafael J. Wysocki" Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Len Brown Cc: Shuah Khan Cc: user-mode-linux-devel@lists.sourceforge.net Cc: Jeff Dike Cc: Alexander Viro Cc: user-mode-linux-user@lists.sourceforge.net Cc: David Matlack Cc: Boris Ostrovsky Cc: Dmitry Safonov Cc: linux-fsdevel@vger.kernel.org Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/20170320081628.18952-7-khuey@kylehuey.com Signed-off-by: Thomas Gleixner --- include/linux/compat.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index aef47be2a5c1..af9dbc44fd92 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -723,6 +723,8 @@ asmlinkage long compat_sys_sched_rr_get_interval(compat_pid_t pid, asmlinkage long compat_sys_fanotify_mark(int, unsigned int, __u32, __u32, int, const char __user *); +asmlinkage long compat_sys_arch_prctl(int option, unsigned long arg2); + /* * For most but not all architectures, "am I in a compat syscall?" and * "am I a compat task?" are the same question. For architectures on which -- cgit v1.2.3 From e9ea1e7f53b852147cbd568b0568c7ad97ec21a3 Mon Sep 17 00:00:00 2001 From: Kyle Huey Date: Mon, 20 Mar 2017 01:16:26 -0700 Subject: x86/arch_prctl: Add ARCH_[GET|SET]_CPUID MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Intel supports faulting on the CPUID instruction beginning with Ivy Bridge. When enabled, the processor will fault on attempts to execute the CPUID instruction with CPL>0. Exposing this feature to userspace will allow a ptracer to trap and emulate the CPUID instruction. When supported, this feature is controlled by toggling bit 0 of MSR_MISC_FEATURES_ENABLES. It is documented in detail in Section 2.3.2 of https://bugzilla.kernel.org/attachment.cgi?id=243991 Implement a new pair of arch_prctls, available on both x86-32 and x86-64. ARCH_GET_CPUID: Returns the current CPUID state, either 0 if CPUID faulting is enabled (and thus the CPUID instruction is not available) or 1 if CPUID faulting is not enabled. ARCH_SET_CPUID: Set the CPUID state to the second argument. If cpuid_enabled is 0 CPUID faulting will be activated, otherwise it will be deactivated. Returns ENODEV if CPUID faulting is not supported on this system. The state of the CPUID faulting flag is propagated across forks, but reset upon exec. Signed-off-by: Kyle Huey Cc: Grzegorz Andrejczuk Cc: kvm@vger.kernel.org Cc: Radim Krčmář Cc: Peter Zijlstra Cc: Dave Hansen Cc: Andi Kleen Cc: linux-kselftest@vger.kernel.org Cc: Nadav Amit Cc: Robert O'Callahan Cc: Richard Weinberger Cc: "Rafael J. Wysocki" Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Len Brown Cc: Shuah Khan Cc: user-mode-linux-devel@lists.sourceforge.net Cc: Jeff Dike Cc: Alexander Viro Cc: user-mode-linux-user@lists.sourceforge.net Cc: David Matlack Cc: Boris Ostrovsky Cc: Dmitry Safonov Cc: linux-fsdevel@vger.kernel.org Cc: Paolo Bonzini Link: http://lkml.kernel.org/r/20170320081628.18952-9-khuey@kylehuey.com Signed-off-by: Thomas Gleixner --- include/linux/thread_info.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 58373875e8ee..55125d674338 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -101,6 +101,10 @@ static inline void check_object_size(const void *ptr, unsigned long n, { } #endif /* CONFIG_HARDENED_USERCOPY */ +#ifndef arch_setup_new_exec +static inline void arch_setup_new_exec(void) { } +#endif + #endif /* __KERNEL__ */ #endif /* _LINUX_THREAD_INFO_H */ -- cgit v1.2.3 From 233ed09d7fdacf592ee91e6c97ce5f4364fbe7c0 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Fri, 17 Mar 2017 12:48:08 -0600 Subject: chardev: add helper function to register char devs with a struct device Credit for this patch goes is shared with Dan Williams [1]. I've taken things one step further to make the helper function more useful and clean up calling code. There's a common pattern in the kernel whereby a struct cdev is placed in a structure along side a struct device which manages the life-cycle of both. In the naive approach, the reference counting is broken and the struct device can free everything before the chardev code is entirely released. Many developers have solved this problem by linking the internal kobjs in this fashion: cdev.kobj.parent = &parent_dev.kobj; The cdev code explicitly gets and puts a reference to it's kobj parent. So this seems like it was intended to be used this way. Dmitrty Torokhov first put this in place in 2012 with this commit: 2f0157f char_dev: pin parent kobject and the first instance of the fix was then done in the input subsystem in the following commit: 4a215aa Input: fix use-after-free introduced with dynamic minor changes Subsequently over the years, however, this issue seems to have tripped up multiple developers independently. For example, see these commits: 0d5b7da iio: Prevent race between IIO chardev opening and IIO device (by Lars-Peter Clausen in 2013) ba0ef85 tpm: Fix initialization of the cdev (by Jason Gunthorpe in 2015) 5b28dde [media] media: fix use-after-free in cdev_put() when app exits after driver unbind (by Shauh Khan in 2016) This technique is similarly done in at least 15 places within the kernel and probably should have been done so in another, at least, 5 places. The kobj line also looks very suspect in that one would not expect drivers to have to mess with kobject internals in this way. Even highly experienced kernel developers can be surprised by this code, as seen in [2]. To help alleviate this situation, and hopefully prevent future wasted effort on this problem, this patch introduces a helper function to register a char device along with its parent struct device. This creates a more regular API for tying a char device to its parent without the developer having to set members in the underlying kobject. This patch introduce cdev_device_add and cdev_device_del which replaces a common pattern including setting the kobj parent, calling cdev_add and then calling device_add. It also introduces cdev_set_parent for the few cases that set the kobject parent without using device_add. [1] https://lkml.org/lkml/2017/2/13/700 [2] https://lkml.org/lkml/2017/2/10/370 Signed-off-by: Logan Gunthorpe Signed-off-by: Dan Williams Reviewed-by: Hans Verkuil Reviewed-by: Alexandre Belloni Signed-off-by: Greg Kroah-Hartman --- include/linux/cdev.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cdev.h b/include/linux/cdev.h index f8763615a5f2..408bc09ce497 100644 --- a/include/linux/cdev.h +++ b/include/linux/cdev.h @@ -4,6 +4,7 @@ #include #include #include +#include struct file_operations; struct inode; @@ -26,6 +27,10 @@ void cdev_put(struct cdev *p); int cdev_add(struct cdev *, dev_t, unsigned); +void cdev_set_parent(struct cdev *p, struct kobject *kobj); +int cdev_device_add(struct cdev *cdev, struct device *dev); +void cdev_device_del(struct cdev *cdev, struct device *dev); + void cdev_del(struct cdev *); void cd_forget(struct inode *); -- cgit v1.2.3 From 50dd1bd1e2395a78e5adfad96487092e215483e0 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 13 Mar 2017 16:54:00 +0530 Subject: drm/edid: check for HF-VSDB block This patch implements a small function that finds if a given CEA db is hdmi-forum vendor specific data block or not. V2: Rebase. V3: Added R-B from Jose. V4: Rebase V5: Rebase V6: Rebase V7: Rebase V8: Rebase V9: Rebase V10: Rebase Signed-off-by: Thierry Reding Signed-off-by: Shashank Sharma Reviewed-by: Jose Abreu Signed-off-by: Jani Nikula Link: http://patchwork.freedesktop.org/patch/msgid/1489404244-16608-3-git-send-email-shashank.sharma@intel.com --- include/linux/hdmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index edbb4fc674ed..d271ff23984f 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -35,6 +35,7 @@ enum hdmi_infoframe_type { }; #define HDMI_IEEE_OUI 0x000c03 +#define HDMI_FORUM_IEEE_OUI 0xc45dd8 #define HDMI_INFOFRAME_HEADER_SIZE 4 #define HDMI_AVI_INFOFRAME_SIZE 13 #define HDMI_SPD_INFOFRAME_SIZE 25 -- cgit v1.2.3 From aeb9dd1de98c1a5f2007ea5d2a154c1244caf8a0 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Tue, 21 Mar 2017 16:01:30 +0800 Subject: usb/early: Add driver for xhci debug capability XHCI debug capability (DbC) is an optional but standalone functionality provided by an xHCI host controller. Software learns this capability by walking through the extended capability list of the host. XHCI specification describes DbC in section 7.6. This patch introduces the code to probe and initialize the debug capability hardware during early boot. With hardware initialized, the debug target (system on which this code is running) will present a debug device through the debug port (normally the first USB3 port). The debug device is fully compliant with the USB framework and provides the equivalent of a very high performance (USB3) full-duplex serial link between the debug host and target. The DbC functionality is independent of the xHCI host. There isn't any precondition from the xHCI host side for the DbC to work. One use for this feature is kernel debugging, for example when your machine crashes very early before the regular console code is initialized. Other uses include simpler, lockless logging instead of a full-blown printk console driver and klogd. Signed-off-by: Lu Baolu Acked-by: Thomas Gleixner Cc: Greg Kroah-Hartman Cc: Linus Torvalds Cc: Mathias Nyman Cc: Peter Zijlstra Cc: linux-usb@vger.kernel.org Link: http://lkml.kernel.org/r/1490083293-3792-3-git-send-email-baolu.lu@linux.intel.com [ Small fix to the Kconfig help text. ] Signed-off-by: Ingo Molnar --- include/linux/usb/xhci-dbgp.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/linux/usb/xhci-dbgp.h (limited to 'include/linux') diff --git a/include/linux/usb/xhci-dbgp.h b/include/linux/usb/xhci-dbgp.h new file mode 100644 index 000000000000..80c1cca1f529 --- /dev/null +++ b/include/linux/usb/xhci-dbgp.h @@ -0,0 +1,29 @@ +/* + * Standalone xHCI debug capability driver + * + * Copyright (C) 2016 Intel Corporation + * + * Author: Lu Baolu + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_XHCI_DBGP_H +#define __LINUX_XHCI_DBGP_H + +#ifdef CONFIG_EARLY_PRINTK_USB_XDBC +int __init early_xdbc_parse_parameter(char *s); +int __init early_xdbc_setup_hardware(void); +void __init early_xdbc_register_console(void); +#else +static inline int __init early_xdbc_setup_hardware(void) +{ + return -ENODEV; +} +static inline void __init early_xdbc_register_console(void) +{ +} +#endif /* CONFIG_EARLY_PRINTK_USB_XDBC */ +#endif /* __LINUX_XHCI_DBGP_H */ -- cgit v1.2.3 From 9143059fafd4eebed2d43ffb5455178d4010e60a Mon Sep 17 00:00:00 2001 From: Benjamin Tissoires Date: Wed, 8 Mar 2017 15:11:14 +0100 Subject: HID: remove initial reading of reports at connect It looks like a bunch of devices do not like to be polled for their reports at init time. When you look into the details, it seems that for those that are requiring the quirk HID_QUIRK_NO_INIT_REPORTS, the driver fails to retrieve part of the features/inputs while others (more generic) work. IMO, it should be acceptable to remove the need for the quirk in the general case. On the small amount of cases where we actually need to read the current values, the driver in charge (hid-mt or wacom) already retrieves the features manually. There are 2 cases where we might need to retrieve the reports at init: 1. hiddev devices with specific use-space tool 2. a device that would require the driver to fetch a specific feature/input at plug For case 2, I have seen this a few time on hid-multitouch. It is solved in hid-multitouch directly by fetching the feature. I hope it won't be too common and this can be solved on a per-case basis (crossing fingers). For case 1, we moved the implementation of HID_QUIRK_NO_INIT_REPORTS in hiddev. When somebody starts calling ioctls that needs an initial update, the hiddev device will fetch the initial state of the reports to mimic the current behavior. This adds a small amount of time during the first HIDIOCGUSAGE(S), but it should be acceptable in most cases. To keep the currently known broken devices, we have to keep around HID_QUIRK_NO_INIT_REPORTS, but the scope will only be for hiddev. Note that I don't think hidraw would be affected and I checked that the FF drivers that need to interact with the report fields are all using output reports, which are not initialized by usbhid_init_reports(). NO_INIT_INPUT_REPORTS is then replaced by HID_QUIRK_NO_INIT_REPORTS: there is no point keeping it for just one device. Signed-off-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 28f38e2b8f30..b2e472c3e595 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -322,7 +322,7 @@ struct hid_item { #define HID_QUIRK_MULTI_INPUT 0x00000040 #define HID_QUIRK_HIDINPUT_FORCE 0x00000080 #define HID_QUIRK_NO_EMPTY_INPUT 0x00000100 -#define HID_QUIRK_NO_INIT_INPUT_REPORTS 0x00000200 +/* 0x00000200 reserved for backward compatibility, was NO_INIT_INPUT_REPORTS */ #define HID_QUIRK_ALWAYS_POLL 0x00000400 #define HID_QUIRK_SKIP_OUTPUT_REPORTS 0x00010000 #define HID_QUIRK_SKIP_OUTPUT_REPORT_ID 0x00020000 -- cgit v1.2.3 From 733aca90300b76575b8a465dc49cbed7a991fd8b Mon Sep 17 00:00:00 2001 From: Jaejoong Kim Date: Fri, 3 Mar 2017 17:54:01 +0900 Subject: HID: hiddev: reallocate hiddev's minor number We need to store the minor number each drivers. In case of hidraw, the minor number is stored stores in struct hidraw. But hiddev's minor is located in struct hid_device. The hid-core driver announces a kernel message which driver is loaded when HID device connected, but hiddev's minor number is always zero. To proper display hiddev's minor number, we need to store the minor number asked from usb core and do some refactoring work (move from hiddev.c to hiddev.h) to access hiddev in hid-core. [jkosina@suse.cz: rebase on top of newer codebase] Reviewed-by: Benjamin Tissoires Signed-off-by: Jaejoong Kim Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 - include/linux/hiddev.h | 12 ++++++++++++ 2 files changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index b2e472c3e595..24de54835e52 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -541,7 +541,6 @@ struct hid_device { /* device report descriptor */ struct list_head inputs; /* The list of inputs */ void *hiddev; /* The hiddev structure */ void *hidraw; - int minor; /* Hiddev minor number */ int open; /* is the device open by anyone? */ char name[128]; /* Device name */ diff --git a/include/linux/hiddev.h b/include/linux/hiddev.h index a5dd8148660b..921622222957 100644 --- a/include/linux/hiddev.h +++ b/include/linux/hiddev.h @@ -32,6 +32,18 @@ * In-kernel definitions. */ +struct hiddev { + int minor; + int exist; + int open; + struct mutex existancelock; + wait_queue_head_t wait; + struct hid_device *hid; + struct list_head list; + spinlock_t list_lock; + bool initialized; +}; + struct hid_device; struct hid_usage; struct hid_field; -- cgit v1.2.3 From 4875253fddd7b6d322f028ad023d44b6efb7f73b Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Mar 2017 08:56:07 -0700 Subject: blk-stat: move BLK_RQ_STAT_BATCH definition to blk-stat.c This is an implementation detail that no-one outside of blk-stat.c uses. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index d703acb55d0f..e213c5e7500b 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -287,8 +287,6 @@ struct blk_issue_stat { u64 time; }; -#define BLK_RQ_STAT_BATCH 64 - struct blk_rq_stat { s64 mean; u64 min; -- cgit v1.2.3 From 34dbad5d26e2f4b88e60f0e9ad03f99480802812 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 21 Mar 2017 08:56:08 -0700 Subject: blk-stat: convert to callback-based statistics reporting Currently, statistics are gathered in ~0.13s windows, and users grab the statistics whenever they need them. This is not ideal for both in-tree users: 1. Writeback throttling wants its own dynamically sized window of statistics. Since the blk-stats statistics are reset after every window and the wbt windows don't line up with the blk-stats windows, wbt doesn't see every I/O. 2. Polling currently grabs the statistics on every I/O. Again, depending on how the window lines up, we may miss some I/Os. It's also unnecessary overhead to get the statistics on every I/O; the hybrid polling heuristic would be just as happy with the statistics from the previous full window. This reworks the blk-stats infrastructure to be callback-based: users register a callback that they want called at a given time with all of the statistics from the window during which the callback was active. Users can dynamically bucketize the statistics. wbt and polling both currently use read vs. write, but polling can be extended to further subdivide based on request size. The callbacks are kept on an RCU list, and each callback has percpu stats buffers. There will only be a few users, so the overhead on the I/O completion side is low. The stats flushing is also simplified considerably: since the timer function is responsible for clearing the statistics, we don't have to worry about stale statistics. wbt is a trivial conversion. After the conversion, the windowing problem mentioned above is fixed. For polling, we register an extra callback that caches the previous window's statistics in the struct request_queue for the hybrid polling heuristic to use. Since we no longer have a single stats buffer for the request queue, this also removes the sysfs and debugfs stats entries. To replace those, we add a debugfs entry for the poll statistics. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - include/linux/blkdev.h | 10 ++++++++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e213c5e7500b..270119a501fb 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -294,7 +294,6 @@ struct blk_rq_stat { s32 nr_samples; s32 nr_batch; u64 batch; - s64 time; }; #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5a7da607ca04..1a7dc42a8918 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -40,6 +40,8 @@ struct blkcg_gq; struct blk_flush_queue; struct pr_ops; struct rq_wb; +struct blk_queue_stats; +struct blk_stat_callback; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -388,6 +390,7 @@ struct request_queue { int nr_rqs[2]; /* # allocated [a]sync rqs */ int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ + struct blk_queue_stats *stats; struct rq_wb *rq_wb; /* @@ -505,8 +508,6 @@ struct request_queue { unsigned int nr_sorted; unsigned int in_flight[2]; - struct blk_rq_stat rq_stats[2]; - /* * Number of active block driver functions for which blk_drain_queue() * must wait. Must be incremented around functions that unlock the @@ -516,6 +517,10 @@ struct request_queue { unsigned int rq_timeout; int poll_nsec; + + struct blk_stat_callback *poll_cb; + struct blk_rq_stat poll_stat[2]; + struct timer_list timeout; struct work_struct timeout_work; struct list_head timeout_list; @@ -611,6 +616,7 @@ struct request_queue { #define QUEUE_FLAG_DAX 26 /* device supports DAX */ #define QUEUE_FLAG_STATS 27 /* track rq completion times */ #define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */ +#define QUEUE_FLAG_POLL_STATS 29 /* collecting stats for hybrid polling */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ -- cgit v1.2.3 From 8bae3551e93de4e8a5b959c495b06de9264be0d5 Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Thu, 16 Mar 2017 23:18:47 +0100 Subject: net: usb: usbnet: add new api ethtool_{get|set}_link_ksettings The ethtool api {get|set}_settings is deprecated. We add the new api {get|set}_link_ksettings to this driver. As I don't have the hardware, I'd be very pleased if someone may test this patch. Signed-off-by: Philippe Reynes Acked-by: Oliver Neukum Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 6e0ce8c7b8cb..5bd80078b7fe 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -265,6 +265,10 @@ extern int usbnet_get_settings(struct net_device *net, struct ethtool_cmd *cmd); extern int usbnet_set_settings(struct net_device *net, struct ethtool_cmd *cmd); +extern int usbnet_get_link_ksettings(struct net_device *net, + struct ethtool_link_ksettings *cmd); +extern int usbnet_set_link_ksettings(struct net_device *net, + const struct ethtool_link_ksettings *cmd); extern u32 usbnet_get_link(struct net_device *net); extern u32 usbnet_get_msglevel(struct net_device *); extern void usbnet_set_msglevel(struct net_device *, u32); -- cgit v1.2.3 From bde87ad64c4cee7a1d5b41d217b440e21050813e Mon Sep 17 00:00:00 2001 From: Philippe Reynes Date: Thu, 16 Mar 2017 23:18:57 +0100 Subject: net: usb: usb: remove old api ethtool_{get|set}_settings The function usbnet_{get|set}_settings is no longer used, so we remove it. Signed-off-by: Philippe Reynes Acked-by: Oliver Neukum Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 5bd80078b7fe..e2b56917450f 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -261,10 +261,6 @@ extern void usbnet_pause_rx(struct usbnet *); extern void usbnet_resume_rx(struct usbnet *); extern void usbnet_purge_paused_rxq(struct usbnet *); -extern int usbnet_get_settings(struct net_device *net, - struct ethtool_cmd *cmd); -extern int usbnet_set_settings(struct net_device *net, - struct ethtool_cmd *cmd); extern int usbnet_get_link_ksettings(struct net_device *net, struct ethtool_link_ksettings *cmd); extern int usbnet_set_link_ksettings(struct net_device *net, -- cgit v1.2.3 From a8f5102af2a7740a4b3200a27beddf27f23f921a Mon Sep 17 00:00:00 2001 From: Joao Pinto Date: Fri, 17 Mar 2017 16:11:06 +0000 Subject: net: stmmac: TX and RX queue priority configuration This patch adds the configuration of RX and TX queues' priority. Signed-off-by: Joao Pinto Signed-off-by: David S. Miller --- include/linux/stmmac.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index be47b859e954..b7d5e7ae9591 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -127,6 +127,8 @@ struct stmmac_axi { struct stmmac_rxq_cfg { u8 mode_to_use; u8 chan; + bool use_prio; + u32 prio; }; struct stmmac_txq_cfg { @@ -137,6 +139,8 @@ struct stmmac_txq_cfg { u32 idle_slope; u32 high_credit; u32 low_credit; + bool use_prio; + u32 prio; }; struct plat_stmmacenet_data { -- cgit v1.2.3 From abe80fdc6ee664b2f8515f91b45e852b65dbb1a1 Mon Sep 17 00:00:00 2001 From: Joao Pinto Date: Fri, 17 Mar 2017 16:11:07 +0000 Subject: net: stmmac: RX queue routing configuration This patch adds the configuration of RX queues' routing. Signed-off-by: Joao Pinto Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index b7d5e7ae9591..cd98ee232ad1 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -127,6 +127,7 @@ struct stmmac_axi { struct stmmac_rxq_cfg { u8 mode_to_use; u8 chan; + u8 pkt_route; bool use_prio; u32 prio; }; -- cgit v1.2.3 From f9fe1c12d126f9887441fa5bb165046f30ddd4b5 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sat, 18 Mar 2017 00:36:15 +0100 Subject: rhashtable: Add rhashtable_lookup_get_insert_fast Add rhashtable_lookup_get_insert_fast for fixed keys, similar to rhashtable_lookup_get_insert_key for explicit keys. Signed-off-by: Andreas Gruenbacher Acked-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 092292b6675e..e507290cd2c7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -915,6 +915,28 @@ static inline int rhashtable_lookup_insert_fast( return ret == NULL ? 0 : -EEXIST; } +/** + * rhashtable_lookup_get_insert_fast - lookup and insert object into hash table + * @ht: hash table + * @obj: pointer to hash head inside object + * @params: hash table parameters + * + * Just like rhashtable_lookup_insert_fast(), but this function returns the + * object if it exists, NULL if it did not and the insertion was successful, + * and an ERR_PTR otherwise. + */ +static inline void *rhashtable_lookup_get_insert_fast( + struct rhashtable *ht, struct rhash_head *obj, + const struct rhashtable_params params) +{ + const char *key = rht_obj(ht, obj); + + BUG_ON(ht->p.obj_hashfn); + + return __rhashtable_insert_fast(ht, key + ht->p.key_offset, obj, params, + false); +} + /** * rhashtable_lookup_insert_key - search and insert object to hash table * with explicit key -- cgit v1.2.3 From 61012985eb132a2fa5e4a3eddbc33528334fa377 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 16 Mar 2017 16:23:55 +0200 Subject: iommu/vt-d: Use lo_hi_readq() / lo_hi_writeq() There is already helper functions to do 64-bit I/O on 32-bit machines or buses, thus we don't need to reinvent the wheel. Signed-off-by: Andy Shevchenko Signed-off-by: Joerg Roedel --- include/linux/intel-iommu.h | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index c573a52ae440..485a5b48f038 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -30,6 +30,8 @@ #include #include #include +#include + #include #include @@ -72,24 +74,8 @@ #define OFFSET_STRIDE (9) -#ifdef CONFIG_64BIT #define dmar_readq(a) readq(a) #define dmar_writeq(a,v) writeq(v,a) -#else -static inline u64 dmar_readq(void __iomem *addr) -{ - u32 lo, hi; - lo = readl(addr); - hi = readl(addr + 4); - return (((u64) hi) << 32) + lo; -} - -static inline void dmar_writeq(void __iomem *addr, u64 val) -{ - writel((u32)val, addr); - writel((u32)(val >> 32), addr + 4); -} -#endif #define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) #define DMAR_VER_MINOR(v) ((v) & 0x0f) -- cgit v1.2.3 From 21aff52ab2c831c2f07d48e2fa8d4bab26a66992 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 20 Mar 2017 20:11:28 +0100 Subject: iommu: Add dummy implementations for !IOMMU_IOVA Currently, building code which uses the API guarded by the IOMMU_IOVA will fail to link if IOMMU_IOVA is not enabled. Often this code will be using the API provided by the IOMMU_API Kconfig symbol, but support for this can be optional, with code falling back to contiguous memory. This commit implements dummy functions for the IOVA API so that it can be compiled out. With both IOMMU_API and IOMMU_IOVA optional, code can now be built with or without support for IOMMU without having to resort to #ifdefs in the user code. Signed-off-by: Thierry Reding Signed-off-by: Joerg Roedel --- include/linux/iova.h | 91 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 91 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index f27bb2c62fca..548982ad5f2f 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -82,6 +82,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } +#ifdef CONFIG_IOMMU_IOVA int iova_cache_get(void); void iova_cache_put(void); @@ -106,5 +107,95 @@ void put_iova_domain(struct iova_domain *iovad); struct iova *split_and_remove_iova(struct iova_domain *iovad, struct iova *iova, unsigned long pfn_lo, unsigned long pfn_hi); void free_cpu_cached_iovas(unsigned int cpu, struct iova_domain *iovad); +#else +static inline int iova_cache_get(void) +{ + return -ENOTSUPP; +} + +static inline void iova_cache_put(void) +{ +} + +static inline struct iova *alloc_iova_mem(void) +{ + return NULL; +} + +static inline void free_iova_mem(struct iova *iova) +{ +} + +static inline void free_iova(struct iova_domain *iovad, unsigned long pfn) +{ +} + +static inline void __free_iova(struct iova_domain *iovad, struct iova *iova) +{ +} + +static inline struct iova *alloc_iova(struct iova_domain *iovad, + unsigned long size, + unsigned long limit_pfn, + bool size_aligned) +{ + return NULL; +} + +static inline void free_iova_fast(struct iova_domain *iovad, + unsigned long pfn, + unsigned long size) +{ +} + +static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, + unsigned long size, + unsigned long limit_pfn) +{ + return 0; +} + +static inline struct iova *reserve_iova(struct iova_domain *iovad, + unsigned long pfn_lo, + unsigned long pfn_hi) +{ + return NULL; +} + +static inline void copy_reserved_iova(struct iova_domain *from, + struct iova_domain *to) +{ +} + +static inline void init_iova_domain(struct iova_domain *iovad, + unsigned long granule, + unsigned long start_pfn, + unsigned long pfn_32bit) +{ +} + +static inline struct iova *find_iova(struct iova_domain *iovad, + unsigned long pfn) +{ + return NULL; +} + +static inline void put_iova_domain(struct iova_domain *iovad) +{ +} + +static inline struct iova *split_and_remove_iova(struct iova_domain *iovad, + struct iova *iova, + unsigned long pfn_lo, + unsigned long pfn_hi) +{ + return NULL; +} + +static inline void free_cpu_cached_iovas(unsigned int cpu, + struct iova_domain *iovad) +{ +} +#endif #endif -- cgit v1.2.3 From 273df9635385b2156851c7ee49f40658d7bcb29d Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 16 Mar 2017 17:00:19 +0000 Subject: iommu/dma: Make PCI window reservation generic Now that we're applying the IOMMU API reserved regions to our IOVA domains, we shouldn't need to privately special-case PCI windows, or indeed anything else which isn't specific to our iommu-dma layer. However, since those aren't IOMMU-specific either, rather than start duplicating code into IOMMU drivers let's transform the existing function into an iommu_get_resv_regions() helper that they can share. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/dma-iommu.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 5725c94b1f12..b6635a46fc7c 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -71,6 +71,7 @@ int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); /* The DMA API isn't _quite_ the whole story, though... */ void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg); +void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list); #else @@ -100,6 +101,10 @@ static inline void iommu_dma_map_msi_msg(int irq, struct msi_msg *msg) { } +static inline void iommu_dma_get_resv_regions(struct device *dev, struct list_head *list) +{ +} + #endif /* CONFIG_IOMMU_DMA */ #endif /* __KERNEL__ */ #endif /* __DMA_IOMMU_H */ -- cgit v1.2.3 From 4c355cdfbba537971b5c3849680b1b6453a7a383 Mon Sep 17 00:00:00 2001 From: "Reshetova, Elena" Date: Tue, 21 Mar 2017 13:59:19 +0200 Subject: net: convert sk_filter.refcnt from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index dffa072b7b79..511fe910bf1d 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -7,6 +7,7 @@ #include #include +#include #include #include #include @@ -430,7 +431,7 @@ struct bpf_prog { }; struct sk_filter { - atomic_t refcnt; + refcount_t refcnt; struct rcu_head rcu; struct bpf_prog *prog; }; -- cgit v1.2.3 From 2d72d5016f00fc7d64b95e79405787dea73669af Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 21 Mar 2017 16:12:11 +0100 Subject: net: stmmac: Use AVB mode by default Prior to the recent multi-queue changes the driver would configure the queues to use the AVB mode, but the mode then got switched to DCB. The hardware still works fine in DCB mode, but my testing capabilities are limited, so it's safer to revert to the prior setting anyway. Signed-off-by: Thierry Reding Acked-By: Joao Pinto Signed-off-by: David S. Miller --- include/linux/stmmac.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index cd98ee232ad1..3921cb9dfadb 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -56,8 +56,8 @@ #define MTL_RX_ALGORITHM_WSP 0x5 /* RX/TX Queue Mode */ -#define MTL_QUEUE_DCB 0x0 -#define MTL_QUEUE_AVB 0x1 +#define MTL_QUEUE_AVB 0x0 +#define MTL_QUEUE_DCB 0x1 /* The MDC clock could be set higher than the IEEE 802.3 * specified frequency limit 0f 2.5 MHz, by programming a clock divider -- cgit v1.2.3 From 9860118b58241169f67ba77dfeb935fcf53ce4cd Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 21 Mar 2017 16:36:37 +0000 Subject: net: phy: move phy MMD accessors to phy-core.c Move the phy_(read|write)__mmd() helpers out of line, they will become our main MMD accessor functions, and so will be a little more complex. This complexity doesn't belong in an inline function. Also move the _indirect variants as well to keep like functionality together. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 20 ++------------------ 1 file changed, 2 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 43a774873aa9..bcb4549b41d6 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -651,14 +651,7 @@ struct phy_fixup { * * Same rules as for phy_read(); */ -static inline int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum) -{ - if (!phydev->is_c45) - return -EOPNOTSUPP; - - return mdiobus_read(phydev->mdio.bus, phydev->mdio.addr, - MII_ADDR_C45 | (devad << 16) | (regnum & 0xffff)); -} +int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); /** * phy_read_mmd_indirect - reads data from the MMD registers @@ -752,16 +745,7 @@ static inline bool phy_is_pseudo_fixed_link(struct phy_device *phydev) * * Same rules as for phy_write(); */ -static inline int phy_write_mmd(struct phy_device *phydev, int devad, - u32 regnum, u16 val) -{ - if (!phydev->is_c45) - return -EOPNOTSUPP; - - regnum = MII_ADDR_C45 | ((devad & 0x1f) << 16) | (regnum & 0xffff); - - return mdiobus_write(phydev->mdio.bus, phydev->mdio.addr, regnum, val); -} +int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); /** * phy_write_mmd_indirect - writes data to the MMD registers -- cgit v1.2.3 From 1ee6b9bc6206cd0837bc16e46f580e40fe663384 Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 21 Mar 2017 16:36:43 +0000 Subject: net: phy: make phy_(read|write)_mmd() generic MMD accessors Make phy_(read|write)_mmd() generic 802.3 clause 45 register accessors for both Clause 22 and Clause 45 PHYs, using either the direct register reading for Clause 45, or the indirect method for Clause 22 PHYs. Allow this behaviour to be overriden by PHY drivers where necessary. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index bcb4549b41d6..b8feeffeb64c 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -587,6 +587,30 @@ struct phy_driver { */ void (*link_change_notify)(struct phy_device *dev); + /* + * Phy specific driver override for reading a MMD register. + * This function is optional for PHY specific drivers. When + * not provided, the default MMD read function will be used + * by phy_read_mmd(), which will use either a direct read for + * Clause 45 PHYs or an indirect read for Clause 22 PHYs. + * devnum is the MMD device number within the PHY device, + * regnum is the register within the selected MMD device. + */ + int (*read_mmd)(struct phy_device *dev, int devnum, u16 regnum); + + /* + * Phy specific driver override for writing a MMD register. + * This function is optional for PHY specific drivers. When + * not provided, the default MMD write function will be used + * by phy_write_mmd(), which will use either a direct write for + * Clause 45 PHYs, or an indirect write for Clause 22 PHYs. + * devnum is the MMD device number within the PHY device, + * regnum is the register within the selected MMD device. + * val is the value to be written. + */ + int (*write_mmd)(struct phy_device *dev, int devnum, u16 regnum, + u16 val); + /* A function provided by a phy specific driver to override the * the PHY driver framework support for reading a MMD register * from the PHY. If not supported, return -1. This function is -- cgit v1.2.3 From 3b85d8df2655a4a5831ee8233108b53e69efa1ed Mon Sep 17 00:00:00 2001 From: Russell King Date: Tue, 21 Mar 2017 16:37:03 +0000 Subject: net: phy: remove the indirect MMD read/write methods Remove the indirect MMD read/write methods which are now no longer necessary. Reviewed-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: Russell King Signed-off-by: David S. Miller --- include/linux/phy.h | 42 ------------------------------------------ 1 file changed, 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index b8feeffeb64c..2efca6b39fba 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -611,24 +611,6 @@ struct phy_driver { int (*write_mmd)(struct phy_device *dev, int devnum, u16 regnum, u16 val); - /* A function provided by a phy specific driver to override the - * the PHY driver framework support for reading a MMD register - * from the PHY. If not supported, return -1. This function is - * optional for PHY specific drivers, if not provided then the - * default MMD read function is used by the PHY framework. - */ - int (*read_mmd_indirect)(struct phy_device *dev, int ptrad, - int devnum, int regnum); - - /* A function provided by a phy specific driver to override the - * the PHY driver framework support for writing a MMD register - * from the PHY. This function is optional for PHY specific drivers, - * if not provided then the default MMD read function is used by - * the PHY framework. - */ - void (*write_mmd_indirect)(struct phy_device *dev, int ptrad, - int devnum, int regnum, u32 val); - /* Get the size and type of the eeprom contained within a plug-in * module */ int (*module_info)(struct phy_device *dev, @@ -677,17 +659,6 @@ struct phy_fixup { */ int phy_read_mmd(struct phy_device *phydev, int devad, u32 regnum); -/** - * phy_read_mmd_indirect - reads data from the MMD registers - * @phydev: The PHY device bus - * @prtad: MMD Address - * @addr: PHY address on the MII bus - * - * Description: it reads data from the MMD registers (clause 22 to access to - * clause 45) of the specified phy address. - */ -int phy_read_mmd_indirect(struct phy_device *phydev, int prtad, int devad); - /** * phy_read - Convenience function for reading a given PHY register * @phydev: the phy_device struct @@ -771,19 +742,6 @@ static inline bool phy_is_pseudo_fixed_link(struct phy_device *phydev) */ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); -/** - * phy_write_mmd_indirect - writes data to the MMD registers - * @phydev: The PHY device - * @prtad: MMD Address - * @devad: MMD DEVAD - * @data: data to write in the MMD register - * - * Description: Write data from the MMD registers of the specified - * phy address. - */ -void phy_write_mmd_indirect(struct phy_device *phydev, int prtad, - int devad, u32 data); - struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); -- cgit v1.2.3 From 0634c2958927198797bf9e55d26fb51cce4c22b4 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 22 Mar 2017 09:16:27 -0500 Subject: of: Add function for generating a DT modalias with a newline The modalias sysfs attr is lacking a newline for DT aliases on platform devices. The macio and ibmebus correctly add the newline, but open code it. Introduce a new function, of_device_modalias(), that fills the buffer with the modalias including the newline and update users of the old of_device_get_modalias function. Signed-off-by: Rob Herring Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Michael Ellerman Cc: Frank Rowand Cc: linuxppc-dev@lists.ozlabs.org Acked-by: Greg Kroah-Hartman --- include/linux/of_device.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index c12dace043f3..169ea0bd8eb4 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -34,8 +34,7 @@ extern void of_device_unregister(struct platform_device *ofdev); extern const void *of_device_get_match_data(const struct device *dev); -extern ssize_t of_device_get_modalias(struct device *dev, - char *str, ssize_t len); +extern ssize_t of_device_modalias(struct device *dev, char *str, ssize_t len); extern int of_device_request_module(struct device *dev); extern void of_device_uevent(struct device *dev, struct kobj_uevent_env *env); @@ -72,8 +71,8 @@ static inline const void *of_device_get_match_data(const struct device *dev) return NULL; } -static inline int of_device_get_modalias(struct device *dev, - char *str, ssize_t len) +static inline int of_device_modalias(struct device *dev, + char *str, ssize_t len) { return -ENODEV; } -- cgit v1.2.3 From bbea124bc99df968011e76eba105fe964a4eceab Mon Sep 17 00:00:00 2001 From: Joel Scherpelz Date: Wed, 22 Mar 2017 18:19:04 +0900 Subject: net: ipv6: Add sysctl for minimum prefix len acceptable in RIOs. This commit adds a new sysctl accept_ra_rt_info_min_plen that defines the minimum acceptable prefix length of Route Information Options. The new sysctl is intended to be used together with accept_ra_rt_info_max_plen to configure a range of acceptable prefix lengths. It is useful to prevent misconfigurations from unintentionally blackholing too much of the IPv6 address space (e.g., home routers announcing RIOs for fc00::/7, which is incorrect). Signed-off-by: Joel Scherpelz Acked-by: Lorenzo Colitti Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f0d79bd054ca..e1b442996f81 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -37,6 +37,7 @@ struct ipv6_devconf { __s32 accept_ra_rtr_pref; __s32 rtr_probe_interval; #ifdef CONFIG_IPV6_ROUTE_INFO + __s32 accept_ra_rt_info_min_plen; __s32 accept_ra_rt_info_max_plen; #endif #endif -- cgit v1.2.3 From 56f668dfe00dcf086734f1c42ea999398fad6572 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 22 Mar 2017 10:00:33 -0700 Subject: bpf: Add array of maps support This patch adds a few helper funcs to enable map-in-map support (i.e. outer_map->inner_map). The first outer_map type BPF_MAP_TYPE_ARRAY_OF_MAPS is also added in this patch. The next patch will introduce a hash of maps type. Any bpf map type can be acted as an inner_map. The exception is BPF_MAP_TYPE_PROG_ARRAY because the extra level of indirection makes it harder to verify the owner_prog_type and owner_jited. Multi-level map-in-map is not supported (i.e. map->map is ok but not map->map->map). When adding an inner_map to an outer_map, it currently checks the map_type, key_size, value_size, map_flags, max_entries and ops. The verifier also uses those map's properties to do static analysis. map_flags is needed because we need to ensure BPF_PROG_TYPE_PERF_EVENT is using a preallocated hashtab for the inner_hash also. ops and max_entries are needed to generate inlined map-lookup instructions. For simplicity reason, a simple '==' test is used for both map_flags and max_entries. The equality of ops is implied by the equality of map_type. During outer_map creation time, an inner_map_fd is needed to create an outer_map. However, the inner_map_fd's life time does not depend on the outer_map. The inner_map_fd is merely used to initialize the inner_map_meta of the outer_map. Also, for the outer_map: * It allows element update and delete from syscall * It allows element lookup from bpf_prog The above is similar to the current fd_array pattern. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index da8c64ca8dc9..3f3cdf9b15e8 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -50,6 +50,7 @@ struct bpf_map { const struct bpf_map_ops *ops; struct work_struct work; atomic_t usercnt; + struct bpf_map *inner_map_meta; }; struct bpf_map_type_list { -- cgit v1.2.3 From bcc6b1b7ebf857a9fe56202e2be3361131588c15 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 22 Mar 2017 10:00:34 -0700 Subject: bpf: Add hash of maps support This patch adds hash of maps support (hashmap->bpf_map). BPF_MAP_TYPE_HASH_OF_MAPS is added. A map-in-map contains a pointer to another map and lets call this pointer 'inner_map_ptr'. Notes on deleting inner_map_ptr from a hash map: 1. For BPF_F_NO_PREALLOC map-in-map, when deleting an inner_map_ptr, the htab_elem itself will go through a rcu grace period and the inner_map_ptr resides in the htab_elem. 2. For pre-allocated htab_elem (!BPF_F_NO_PREALLOC), when deleting an inner_map_ptr, the htab_elem may get reused immediately. This situation is similar to the existing prealloc-ated use cases. However, the bpf_map_fd_put_ptr() calls bpf_map_put() which calls inner_map->ops->map_free(inner_map) which will go through a rcu grace period (i.e. all bpf_map's map_free currently goes through a rcu grace period). Hence, the inner_map_ptr is still safe for the rcu reader side. This patch also includes BPF_MAP_TYPE_HASH_OF_MAPS to the check_map_prealloc() in the verifier. preallocation is a must for BPF_PROG_TYPE_PERF_EVENT. Hence, even we don't expect heavy updates to map-in-map, enforcing BPF_F_NO_PREALLOC for map-in-map is impossible without disallowing BPF_PROG_TYPE_PERF_EVENT from using map-in-map first. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3f3cdf9b15e8..2ae39a3e9ead 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -277,6 +277,8 @@ int bpf_stackmap_copy(struct bpf_map *map, void *key, void *value); int bpf_fd_array_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); void bpf_fd_array_map_clear(struct bpf_map *map); +int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, + void *key, void *value, u64 map_flags); /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. -- cgit v1.2.3 From b4d8c7aea15efa8c6272c58d78296f8b017c4c6a Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Thu, 23 Mar 2017 00:06:17 +0100 Subject: iommu/iova: Fix compile error with CONFIG_IOMMU_IOVA=m The #ifdef in iova.h only catches the CONFIG_IOMMU_IOVA=y case, so that compilation as a module fails with duplicate function definition errors. Fix it by catching both cases in the #if. Signed-off-by: Joerg Roedel --- include/linux/iova.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index 548982ad5f2f..e0a892ae45c0 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -82,7 +82,7 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -#ifdef CONFIG_IOMMU_IOVA +#if IS_ENABLED(CONFIG_IOMMU_IOVA) int iova_cache_get(void); void iova_cache_put(void); -- cgit v1.2.3 From b7eaf1aab9f8bd2e49fceed77ebc66c1b5800718 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 22 Mar 2017 00:08:50 +0100 Subject: cpufreq: schedutil: Avoid reducing frequency of busy CPUs prematurely The way the schedutil governor uses the PELT metric causes it to underestimate the CPU utilization in some cases. That can be easily demonstrated by running kernel compilation on a Sandy Bridge Intel processor, running turbostat in parallel with it and looking at the values written to the MSR_IA32_PERF_CTL register. Namely, the expected result would be that when all CPUs were 100% busy, all of them would be requested to run in the maximum P-state, but observation shows that this clearly isn't the case. The CPUs run in the maximum P-state for a while and then are requested to run slower and go back to the maximum P-state after a while again. That causes the actual frequency of the processor to visibly oscillate below the sustainable maximum in a jittery fashion which clearly is not desirable. That has been attributed to CPU utilization metric updates on task migration that cause the total utilization value for the CPU to be reduced by the utilization of the migrated task. If that happens, the schedutil governor may see a CPU utilization reduction and will attempt to reduce the CPU frequency accordingly right away. That may be premature, though, for example if the system is generally busy and there are other runnable tasks waiting to be run on that CPU already. This is unlikely to be an issue on systems where cpufreq policies are shared between multiple CPUs, because in those cases the policy utilization is computed as the maximum of the CPU utilization values over the whole policy and if that turns out to be low, reducing the frequency for the policy most likely is a good idea anyway. On systems with one CPU per policy, however, it may affect performance adversely and even lead to increased energy consumption in some cases. On those systems it may be addressed by taking another utilization metric into consideration, like whether or not the CPU whose frequency is about to be reduced has been idle recently, because if that's not the case, the CPU is likely to be busy in the near future and its frequency should not be reduced. To that end, use the counter of idle calls in the timekeeping code. Namely, make the schedutil governor look at that counter for the current CPU every time before its frequency is about to be reduced. If the counter has not changed since the previous iteration of the governor computations for that CPU, the CPU has been busy for all that time and its frequency should not be decreased, so if the new frequency would be lower than the one set previously, the governor will skip the frequency update. Signed-off-by: Rafael J. Wysocki Acked-by: Peter Zijlstra (Intel) Acked-by: Viresh Kumar Reviewed-by: Joel Fernandes --- include/linux/tick.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index a04fea19676f..fe01e68bf520 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -117,6 +117,7 @@ extern void tick_nohz_idle_enter(void); extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); +extern unsigned long tick_nohz_get_idle_calls(void); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ -- cgit v1.2.3 From b7d680d7bf584bce6023343304b819009a7c3336 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Mar 2017 01:36:54 +0100 Subject: bdi: Mark congested->bdi as internal congested->bdi pointer is used only to be able to remove congested structure from bdi->cgwb_congested_tree on structure release. Moreover the pointer can become NULL when we unregister the bdi. Rename the field to __bdi and add a comment to make it more explicit this is internal stuff of memcg writeback code and people should not use the field as such use will be likely race prone. We do not bother with converting congested->bdi to a proper refcounted reference. It will be slightly ugly to special-case bdi->wb.congested to avoid effectively a cyclic reference of bdi to itself and the reference gets cleared from bdi_unregister() making it impossible to reference a freed bdi. Acked-by: Tejun Heo Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index ad955817916d..8fb3dcdebc80 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -54,7 +54,9 @@ struct bdi_writeback_congested { atomic_t refcnt; /* nr of attached wb's and blkg */ #ifdef CONFIG_CGROUP_WRITEBACK - struct backing_dev_info *bdi; /* the associated bdi */ + struct backing_dev_info *__bdi; /* the associated bdi, set to NULL + * on bdi unregistration. For memcg-wb + * internal use only! */ int blkcg_id; /* ID of the associated blkcg */ struct rb_node rb_node; /* on bdi->cgwb_congestion_tree */ #endif -- cgit v1.2.3 From 5318ce7d46866e1dbc20ab9349b93753edba0b3e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Mar 2017 01:36:57 +0100 Subject: bdi: Shutdown writeback on all cgwbs in cgwb_bdi_destroy() Currently we waited for all cgwbs to get freed in cgwb_bdi_destroy() which also means that writeback has been shutdown on them. Since this wait is going away, directly shutdown writeback on cgwbs from cgwb_bdi_destroy() to avoid live writeback structures after bdi_unregister() has finished. To make that safe with concurrent shutdown from cgwb_release_workfn(), we also have to make sure wb_shutdown() returns only after the bdi_writeback structure is really shutdown. Acked-by: Tejun Heo Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 8fb3dcdebc80..8af720f22a2d 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -21,6 +21,7 @@ struct dentry; */ enum wb_state { WB_registered, /* bdi_register() was done */ + WB_shutting_down, /* wb_shutdown() in progress */ WB_writeback_running, /* Writeback is in progress */ WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ }; -- cgit v1.2.3 From 4514451e79ae5baabb85d22ba3523602e59d5218 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Mar 2017 01:36:58 +0100 Subject: bdi: Do not wait for cgwbs release in bdi_unregister() Currently we wait for all cgwbs to get released in cgwb_bdi_destroy() (called from bdi_unregister()). That is however unnecessary now when cgwb->bdi is a proper refcounted reference (thus bdi cannot get released before all cgwbs are released) and when cgwb_bdi_destroy() shuts down writeback directly. Acked-by: Tejun Heo Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 8af720f22a2d..e66d4722db8e 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -164,7 +164,6 @@ struct backing_dev_info { #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ - atomic_t usage_cnt; /* counts both cgwbs and cgwb_contested's */ #else struct bdi_writeback_congested *wb_congested; #endif -- cgit v1.2.3 From f759741d9d913eb57784a94b9bca78b376fc26a9 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Mar 2017 01:37:00 +0100 Subject: block: Fix oops in locked_inode_to_wb_and_lock_list() When block device is closed, we call inode_detach_wb() in __blkdev_put() which sets inode->i_wb to NULL. That is contrary to expectations that inode->i_wb stays valid once set during the whole inode's lifetime and leads to oops in wb_get() in locked_inode_to_wb_and_lock_list() because inode_to_wb() returned NULL. The reason why we called inode_detach_wb() is not valid anymore though. BDI is guaranteed to stay along until we call bdi_put() from bdev_evict_inode() so we can postpone calling inode_detach_wb() to that moment. Also add a warning to catch if someone uses inode_detach_wb() in a dangerous way. Reported-by: Thiago Jung Bauermann Acked-by: Tejun Heo Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/writeback.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index a3c0cbd7c888..d5815794416c 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -237,6 +237,7 @@ static inline void inode_attach_wb(struct inode *inode, struct page *page) static inline void inode_detach_wb(struct inode *inode) { if (inode->i_wb) { + WARN_ON_ONCE(!(inode->i_state & I_CLEAR)); wb_put(inode->i_wb); inode->i_wb = NULL; } -- cgit v1.2.3 From c70c176ff8c3ff0ac6ef9a831cd591ea9a66bd1a Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 23 Mar 2017 01:37:01 +0100 Subject: kobject: Export kobject_get_unless_zero() Make the function available for outside use and fortify it against NULL kobject. CC: Greg Kroah-Hartman Reviewed-by: Bart Van Assche Acked-by: Tejun Heo Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/kobject.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index e6284591599e..ca85cb80e99a 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -108,6 +108,8 @@ extern int __must_check kobject_rename(struct kobject *, const char *new_name); extern int __must_check kobject_move(struct kobject *, struct kobject *); extern struct kobject *kobject_get(struct kobject *kobj); +extern struct kobject * __must_check kobject_get_unless_zero( + struct kobject *kobj); extern void kobject_put(struct kobject *kobj); extern const void *kobject_namespace(struct kobject *kobj); -- cgit v1.2.3 From 7642747d674aff1f7cfe74ad9af7e9b12ab1d5ee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Mar 2017 15:01:49 -0400 Subject: blk-mq: remove BLK_MQ_F_DEFER_ISSUE This flag was never used since it was introduced. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b296a9006117..5b3e201c8d4f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -152,7 +152,6 @@ enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_SHARED = 1 << 1, BLK_MQ_F_SG_MERGE = 1 << 2, - BLK_MQ_F_DEFER_ISSUE = 1 << 4, BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_NO_SCHED = 1 << 6, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, -- cgit v1.2.3 From 210f7cdcf088c304ee0533ffd33d6f71a8821862 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 15 Mar 2017 14:05:14 +1100 Subject: percpu-refcount: support synchronous switch to atomic mode. percpu_ref_switch_to_atomic_sync() schedules the switch to atomic mode, then waits for it to complete. Also export percpu_ref_switch_to_* so they can be used from modules. This will be used in md/raid to count the number of pending write requests to an array. We occasionally need to check if the count is zero, but most often we don't care. We always want updates to the counter to be fast, as in some cases we count every 4K page. Signed-off-by: NeilBrown Acked-by: Tejun Heo Signed-off-by: Shaohua Li --- include/linux/percpu-refcount.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/percpu-refcount.h b/include/linux/percpu-refcount.h index 3a481a49546e..c13dceb87b60 100644 --- a/include/linux/percpu-refcount.h +++ b/include/linux/percpu-refcount.h @@ -99,6 +99,7 @@ int __must_check percpu_ref_init(struct percpu_ref *ref, void percpu_ref_exit(struct percpu_ref *ref); void percpu_ref_switch_to_atomic(struct percpu_ref *ref, percpu_ref_func_t *confirm_switch); +void percpu_ref_switch_to_atomic_sync(struct percpu_ref *ref); void percpu_ref_switch_to_percpu(struct percpu_ref *ref); void percpu_ref_kill_and_confirm(struct percpu_ref *ref, percpu_ref_func_t *confirm_kill); -- cgit v1.2.3 From a8c06e407ef969461b7f51ec72839fe382dd3c29 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Mar 2017 10:18:41 +0800 Subject: usb: separate out sysdev pointer from usb_bus For xhci-hcd platform device, all the DMA parameters are not configured properly, notably dma ops for dwc3 devices. The idea here is that you pass in the parent of_node along with the child device pointer, so it would behave exactly like the parent already does. The difference is that it also handles all the other attributes besides the mask. sysdev will represent the physical device, as seen from firmware or bus.Splitting the usb_bus->controller field into the Linux-internal device (used for the sysfs hierarchy, for printks and for power management) and a new pointer (used for DMA, DT enumeration and phy lookup) probably covers all that we really need. Signed-off-by: Arnd Bergmann Signed-off-by: Sriram Dash Tested-by: Baolin Wang Tested-by: Brian Norris Tested-by: Alexander Sverdlin Tested-by: Vivek Gautam Signed-off-by: Mathias Nyman Signed-off-by: Peter Chen Cc: Felipe Balbi Cc: Grygorii Strashko Cc: Sinjan Kumar Cc: David Fisher Cc: Catalin Marinas Cc: "Thang Q. Nguyen" Cc: Yoshihiro Shimoda Cc: Stephen Boyd Cc: Bjorn Andersson Cc: Ming Lei Cc: Jon Masters Cc: Dann Frazier Cc: Peter Chen Cc: Leo Li Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + include/linux/usb/hcd.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7e68259360de..148752640693 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -354,6 +354,7 @@ struct usb_devmap { */ struct usb_bus { struct device *controller; /* host/master side hardware */ + struct device *sysdev; /* as seen from firmware or bus */ int busnum; /* Bus number (in order of reg) */ const char *bus_name; /* stable id (PCI slot_name etc) */ u8 uses_dma; /* Does the host controller use DMA? */ diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index dff130151235..a469999a106d 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -437,6 +437,9 @@ extern int usb_hcd_alloc_bandwidth(struct usb_device *udev, struct usb_host_interface *new_alt); extern int usb_hcd_get_frame_number(struct usb_device *udev); +struct usb_hcd *__usb_create_hcd(const struct hc_driver *driver, + struct device *sysdev, struct device *dev, const char *bus_name, + struct usb_hcd *primary_hcd); extern struct usb_hcd *usb_create_hcd(const struct hc_driver *driver, struct device *dev, const char *bus_name); extern struct usb_hcd *usb_create_shared_hcd(const struct hc_driver *driver, -- cgit v1.2.3 From a9ebf306f52c756c4f9e50ee9a60cd6389d71344 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 1 Feb 2017 16:39:38 +0100 Subject: locking/atomic: Introduce atomic_try_cmpxchg() Add a new cmpxchg interface: bool try_cmpxchg(u{8,16,32,64} *ptr, u{8,16,32,64} *val, u{8,16,32,64} new); Where the boolean returns the result of the compare; and thus if the exchange happened; and in case of failure, the new value of *ptr is returned in *val. This allows simplification/improvement of loops like: for (;;) { new = val $op $imm; old = cmpxchg(ptr, val, new); if (old == val) break; val = old; } into: do { } while (!try_cmpxchg(ptr, &val, val $op $imm)); while also generating better code (GCC6 and onwards). Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index e71835bf60a9..aae5953817d6 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -423,6 +423,27 @@ #endif #endif /* atomic_cmpxchg_relaxed */ +#ifndef atomic_try_cmpxchg + +#define __atomic_try_cmpxchg(type, _p, _po, _n) \ +({ \ + typeof(_po) __po = (_po); \ + typeof(*(_po)) __o = *__po; \ + *__po = atomic_cmpxchg##type((_p), __o, (_n)); \ + (*__po == __o); \ +}) + +#define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n) +#define atomic_try_cmpxchg_relaxed(_p, _po, _n) __atomic_try_cmpxchg(_relaxed, _p, _po, _n) +#define atomic_try_cmpxchg_acquire(_p, _po, _n) __atomic_try_cmpxchg(_acquire, _p, _po, _n) +#define atomic_try_cmpxchg_release(_p, _po, _n) __atomic_try_cmpxchg(_release, _p, _po, _n) + +#else /* atomic_try_cmpxchg */ +#define atomic_try_cmpxchg_relaxed atomic_try_cmpxchg +#define atomic_try_cmpxchg_acquire atomic_try_cmpxchg +#define atomic_try_cmpxchg_release atomic_try_cmpxchg +#endif /* atomic_try_cmpxchg */ + /* cmpxchg_relaxed */ #ifndef cmpxchg_relaxed #define cmpxchg_relaxed cmpxchg @@ -996,6 +1017,27 @@ static inline int atomic_dec_if_positive(atomic_t *v) #endif #endif /* atomic64_cmpxchg_relaxed */ +#ifndef atomic64_try_cmpxchg + +#define __atomic64_try_cmpxchg(type, _p, _po, _n) \ +({ \ + typeof(_po) __po = (_po); \ + typeof(*(_po)) __o = *__po; \ + *__po = atomic64_cmpxchg##type((_p), __o, (_n)); \ + (*__po == __o); \ +}) + +#define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n) +#define atomic64_try_cmpxchg_relaxed(_p, _po, _n) __atomic64_try_cmpxchg(_relaxed, _p, _po, _n) +#define atomic64_try_cmpxchg_acquire(_p, _po, _n) __atomic64_try_cmpxchg(_acquire, _p, _po, _n) +#define atomic64_try_cmpxchg_release(_p, _po, _n) __atomic64_try_cmpxchg(_release, _p, _po, _n) + +#else /* atomic64_try_cmpxchg */ +#define atomic64_try_cmpxchg_relaxed atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_acquire atomic64_try_cmpxchg +#define atomic64_try_cmpxchg_release atomic64_try_cmpxchg +#endif /* atomic64_try_cmpxchg */ + #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) { -- cgit v1.2.3 From f6dd8449cd50de25881b76cecf1086bebeb11fe8 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Fri, 17 Mar 2017 10:05:18 +0000 Subject: mfd: wm831x: Add basic device tree binding Add the basic ability to register the device through device tree, more work is needed to get each individual sub-driver functioning correctly but this is enough to get the device to probe from device tree. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/wm831x/core.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/wm831x/core.h b/include/linux/mfd/wm831x/core.h index 76c22648436f..b49fa67612f1 100644 --- a/include/linux/mfd/wm831x/core.h +++ b/include/linux/mfd/wm831x/core.h @@ -21,6 +21,8 @@ #include #include #include +#include +#include /* * Register values. @@ -367,6 +369,9 @@ struct wm831x { struct regmap *regmap; + struct wm831x_pdata pdata; + enum wm831x_parent type; + int irq; /* Our chip IRQ */ struct mutex irq_lock; struct irq_domain *irq_domain; @@ -412,7 +417,7 @@ int wm831x_set_bits(struct wm831x *wm831x, unsigned short reg, int wm831x_bulk_read(struct wm831x *wm831x, unsigned short reg, int count, u16 *buf); -int wm831x_device_init(struct wm831x *wm831x, unsigned long id, int irq); +int wm831x_device_init(struct wm831x *wm831x, int irq); void wm831x_device_exit(struct wm831x *wm831x); int wm831x_device_suspend(struct wm831x *wm831x); void wm831x_device_shutdown(struct wm831x *wm831x); @@ -427,4 +432,6 @@ static inline int wm831x_irq(struct wm831x *wm831x, int irq) extern struct regmap_config wm831x_regmap_config; +extern const struct of_device_id wm831x_of_match[]; + #endif -- cgit v1.2.3 From ead25133e9352896af4de68d2f33f1ef68997e16 Mon Sep 17 00:00:00 2001 From: Ksenija Stanojevic Date: Thu, 16 Mar 2017 13:27:09 +0100 Subject: mfd: mxs-lradc: Add support for mxs-lradc Add core files for low resolution analog-to-digital converter (mxs-lradc) MFD driver. Signed-off-by: Ksenija Stanojevic Reviewed-by: Marek Vasut Signed-off-by: Lee Jones --- include/linux/mfd/mxs-lradc.h | 187 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) create mode 100644 include/linux/mfd/mxs-lradc.h (limited to 'include/linux') diff --git a/include/linux/mfd/mxs-lradc.h b/include/linux/mfd/mxs-lradc.h new file mode 100644 index 000000000000..661a4521f723 --- /dev/null +++ b/include/linux/mfd/mxs-lradc.h @@ -0,0 +1,187 @@ +/* + * Freescale MXS Low Resolution Analog-to-Digital Converter driver + * + * Copyright (c) 2012 DENX Software Engineering, GmbH. + * Copyright (c) 2016 Ksenija Stanojevic + * + * Author: Marek Vasut + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __MFD_MXS_LRADC_H +#define __MFD_MXS_LRADC_H + +#include +#include +#include + +#define LRADC_MAX_DELAY_CHANS 4 +#define LRADC_MAX_MAPPED_CHANS 8 +#define LRADC_MAX_TOTAL_CHANS 16 + +#define LRADC_DELAY_TIMER_HZ 2000 + +#define LRADC_CTRL0 0x00 +# define LRADC_CTRL0_MX28_TOUCH_DETECT_ENABLE BIT(23) +# define LRADC_CTRL0_MX28_TOUCH_SCREEN_TYPE BIT(22) +# define LRADC_CTRL0_MX28_YNNSW /* YM */ BIT(21) +# define LRADC_CTRL0_MX28_YPNSW /* YP */ BIT(20) +# define LRADC_CTRL0_MX28_YPPSW /* YP */ BIT(19) +# define LRADC_CTRL0_MX28_XNNSW /* XM */ BIT(18) +# define LRADC_CTRL0_MX28_XNPSW /* XM */ BIT(17) +# define LRADC_CTRL0_MX28_XPPSW /* XP */ BIT(16) + +# define LRADC_CTRL0_MX23_TOUCH_DETECT_ENABLE BIT(20) +# define LRADC_CTRL0_MX23_YM BIT(19) +# define LRADC_CTRL0_MX23_XM BIT(18) +# define LRADC_CTRL0_MX23_YP BIT(17) +# define LRADC_CTRL0_MX23_XP BIT(16) + +# define LRADC_CTRL0_MX28_PLATE_MASK \ + (LRADC_CTRL0_MX28_TOUCH_DETECT_ENABLE | \ + LRADC_CTRL0_MX28_YNNSW | LRADC_CTRL0_MX28_YPNSW | \ + LRADC_CTRL0_MX28_YPPSW | LRADC_CTRL0_MX28_XNNSW | \ + LRADC_CTRL0_MX28_XNPSW | LRADC_CTRL0_MX28_XPPSW) + +# define LRADC_CTRL0_MX23_PLATE_MASK \ + (LRADC_CTRL0_MX23_TOUCH_DETECT_ENABLE | \ + LRADC_CTRL0_MX23_YM | LRADC_CTRL0_MX23_XM | \ + LRADC_CTRL0_MX23_YP | LRADC_CTRL0_MX23_XP) + +#define LRADC_CTRL1 0x10 +#define LRADC_CTRL1_TOUCH_DETECT_IRQ_EN BIT(24) +#define LRADC_CTRL1_LRADC_IRQ_EN(n) (1 << ((n) + 16)) +#define LRADC_CTRL1_MX28_LRADC_IRQ_EN_MASK (0x1fff << 16) +#define LRADC_CTRL1_MX23_LRADC_IRQ_EN_MASK (0x01ff << 16) +#define LRADC_CTRL1_LRADC_IRQ_EN_OFFSET 16 +#define LRADC_CTRL1_TOUCH_DETECT_IRQ BIT(8) +#define LRADC_CTRL1_LRADC_IRQ(n) BIT(n) +#define LRADC_CTRL1_MX28_LRADC_IRQ_MASK 0x1fff +#define LRADC_CTRL1_MX23_LRADC_IRQ_MASK 0x01ff +#define LRADC_CTRL1_LRADC_IRQ_OFFSET 0 + +#define LRADC_CTRL2 0x20 +#define LRADC_CTRL2_DIVIDE_BY_TWO_OFFSET 24 +#define LRADC_CTRL2_TEMPSENSE_PWD BIT(15) + +#define LRADC_STATUS 0x40 +#define LRADC_STATUS_TOUCH_DETECT_RAW BIT(0) + +#define LRADC_CH(n) (0x50 + (0x10 * (n))) +#define LRADC_CH_ACCUMULATE BIT(29) +#define LRADC_CH_NUM_SAMPLES_MASK (0x1f << 24) +#define LRADC_CH_NUM_SAMPLES_OFFSET 24 +#define LRADC_CH_NUM_SAMPLES(x) \ + ((x) << LRADC_CH_NUM_SAMPLES_OFFSET) +#define LRADC_CH_VALUE_MASK 0x3ffff +#define LRADC_CH_VALUE_OFFSET 0 + +#define LRADC_DELAY(n) (0xd0 + (0x10 * (n))) +#define LRADC_DELAY_TRIGGER_LRADCS_MASK (0xffUL << 24) +#define LRADC_DELAY_TRIGGER_LRADCS_OFFSET 24 +#define LRADC_DELAY_TRIGGER(x) \ + (((x) << LRADC_DELAY_TRIGGER_LRADCS_OFFSET) & \ + LRADC_DELAY_TRIGGER_LRADCS_MASK) +#define LRADC_DELAY_KICK BIT(20) +#define LRADC_DELAY_TRIGGER_DELAYS_MASK (0xf << 16) +#define LRADC_DELAY_TRIGGER_DELAYS_OFFSET 16 +#define LRADC_DELAY_TRIGGER_DELAYS(x) \ + (((x) << LRADC_DELAY_TRIGGER_DELAYS_OFFSET) & \ + LRADC_DELAY_TRIGGER_DELAYS_MASK) +#define LRADC_DELAY_LOOP_COUNT_MASK (0x1f << 11) +#define LRADC_DELAY_LOOP_COUNT_OFFSET 11 +#define LRADC_DELAY_LOOP(x) \ + (((x) << LRADC_DELAY_LOOP_COUNT_OFFSET) & \ + LRADC_DELAY_LOOP_COUNT_MASK) +#define LRADC_DELAY_DELAY_MASK 0x7ff +#define LRADC_DELAY_DELAY_OFFSET 0 +#define LRADC_DELAY_DELAY(x) \ + (((x) << LRADC_DELAY_DELAY_OFFSET) & \ + LRADC_DELAY_DELAY_MASK) + +#define LRADC_CTRL4 0x140 +#define LRADC_CTRL4_LRADCSELECT_MASK(n) (0xf << ((n) * 4)) +#define LRADC_CTRL4_LRADCSELECT_OFFSET(n) ((n) * 4) +#define LRADC_CTRL4_LRADCSELECT(n, x) \ + (((x) << LRADC_CTRL4_LRADCSELECT_OFFSET(n)) & \ + LRADC_CTRL4_LRADCSELECT_MASK(n)) + +#define LRADC_RESOLUTION 12 +#define LRADC_SINGLE_SAMPLE_MASK ((1 << LRADC_RESOLUTION) - 1) + +#define BUFFER_VCHANS_LIMITED 0x3f +#define BUFFER_VCHANS_ALL 0xff + + /* + * Certain LRADC channels are shared between touchscreen + * and/or touch-buttons and generic LRADC block. Therefore when using + * either of these, these channels are not available for the regular + * sampling. The shared channels are as follows: + * + * CH0 -- Touch button #0 + * CH1 -- Touch button #1 + * CH2 -- Touch screen XPUL + * CH3 -- Touch screen YPLL + * CH4 -- Touch screen XNUL + * CH5 -- Touch screen YNLR + * CH6 -- Touch screen WIPER (5-wire only) + * + * The bit fields below represents which parts of the LRADC block are + * switched into special mode of operation. These channels can not + * be sampled as regular LRADC channels. The driver will refuse any + * attempt to sample these channels. + */ +#define CHAN_MASK_TOUCHBUTTON (BIT(1) | BIT(0)) +#define CHAN_MASK_TOUCHSCREEN_4WIRE (0xf << 2) +#define CHAN_MASK_TOUCHSCREEN_5WIRE (0x1f << 2) + +enum mxs_lradc_id { + IMX23_LRADC, + IMX28_LRADC, +}; + +enum mxs_lradc_ts_wires { + MXS_LRADC_TOUCHSCREEN_NONE = 0, + MXS_LRADC_TOUCHSCREEN_4WIRE, + MXS_LRADC_TOUCHSCREEN_5WIRE, +}; + +/** + * struct mxs_lradc + * @soc: soc type (IMX23 or IMX28) + * @clk: 2 kHz clock for delay units + * @buffer_vchans: channels that can be used during buffered capture + * @touchscreen_wire: touchscreen type (4-wire or 5-wire) + * @use_touchbutton: button state (on or off) + */ +struct mxs_lradc { + enum mxs_lradc_id soc; + struct clk *clk; + u8 buffer_vchans; + + enum mxs_lradc_ts_wires touchscreen_wire; + bool use_touchbutton; +}; + +static inline u32 mxs_lradc_irq_mask(struct mxs_lradc *lradc) +{ + switch (lradc->soc) { + case IMX23_LRADC: + return LRADC_CTRL1_MX23_LRADC_IRQ_MASK; + case IMX28_LRADC: + return LRADC_CTRL1_MX28_LRADC_IRQ_MASK; + default: + return 0; + } +} + +#endif /* __MXS_LRADC_H */ -- cgit v1.2.3 From e1fe7b6a7b376bfb54558725ddb2a89aaaa4adcc Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 21 Mar 2017 13:56:46 +0200 Subject: lib/string: add sysfs_match_string helper Make a simple helper for matching strings with sysfs attribute files. In most parts the same as match_string(), except sysfs_match_string() uses sysfs_streq() instead of strcmp() for matching. This is more convenient when used with sysfs attributes. Signed-off-by: Heikki Krogerus Reviewed-by: Mika Westerberg Reviewed-by: Felipe Balbi Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/string.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 26b6f6a66f83..c4011b28f3d8 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -135,6 +135,16 @@ static inline int strtobool(const char *s, bool *res) } int match_string(const char * const *array, size_t n, const char *string); +int __sysfs_match_string(const char * const *array, size_t n, const char *s); + +/** + * sysfs_match_string - matches given string in an array + * @_a: array of strings + * @_s: string to match with + * + * Helper for __sysfs_match_string(). Calculates the size of @a automatically. + */ +#define sysfs_match_string(_a, _s) __sysfs_match_string(_a, ARRAY_SIZE(_a), _s) #ifdef CONFIG_BINARY_PRINTF int vbin_printf(u32 *bin_buf, size_t size, const char *fmt, va_list args); -- cgit v1.2.3 From fab9288428ec0fbd09adb67d3a17c51d78196f9c Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 21 Mar 2017 13:56:47 +0200 Subject: usb: USB Type-C connector class The purpose of USB Type-C connector class is to provide unified interface for the user space to get the status and basic information about USB Type-C connectors on a system, control over data role swapping, and when the port supports USB Power Delivery, also control over power role swapping and Alternate Modes. Signed-off-by: Heikki Krogerus Reviewed-by: Mika Westerberg Reviewed-and-Tested-by: Felipe Balbi Tested-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/typec.h | 243 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 include/linux/usb/typec.h (limited to 'include/linux') diff --git a/include/linux/usb/typec.h b/include/linux/usb/typec.h new file mode 100644 index 000000000000..ec78204964ab --- /dev/null +++ b/include/linux/usb/typec.h @@ -0,0 +1,243 @@ + +#ifndef __LINUX_USB_TYPEC_H +#define __LINUX_USB_TYPEC_H + +#include + +/* XXX: Once we have a header for USB Power Delivery, this belongs there */ +#define ALTMODE_MAX_MODES 6 + +/* USB Type-C Specification releases */ +#define USB_TYPEC_REV_1_0 0x100 /* 1.0 */ +#define USB_TYPEC_REV_1_1 0x110 /* 1.1 */ +#define USB_TYPEC_REV_1_2 0x120 /* 1.2 */ + +struct typec_altmode; +struct typec_partner; +struct typec_cable; +struct typec_plug; +struct typec_port; + +struct fwnode_handle; + +enum typec_port_type { + TYPEC_PORT_DFP, + TYPEC_PORT_UFP, + TYPEC_PORT_DRP, +}; + +enum typec_plug_type { + USB_PLUG_NONE, + USB_PLUG_TYPE_A, + USB_PLUG_TYPE_B, + USB_PLUG_TYPE_C, + USB_PLUG_CAPTIVE, +}; + +enum typec_data_role { + TYPEC_DEVICE, + TYPEC_HOST, +}; + +enum typec_role { + TYPEC_SINK, + TYPEC_SOURCE, +}; + +enum typec_pwr_opmode { + TYPEC_PWR_MODE_USB, + TYPEC_PWR_MODE_1_5A, + TYPEC_PWR_MODE_3_0A, + TYPEC_PWR_MODE_PD, +}; + +enum typec_accessory { + TYPEC_ACCESSORY_NONE, + TYPEC_ACCESSORY_AUDIO, + TYPEC_ACCESSORY_DEBUG, +}; + +#define TYPEC_MAX_ACCESSORY 3 + +/* + * struct usb_pd_identity - USB Power Delivery identity data + * @id_header: ID Header VDO + * @cert_stat: Cert Stat VDO + * @product: Product VDO + * + * USB power delivery Discover Identity command response data. + * + * REVISIT: This is USB Power Delivery specific information, so this structure + * probable belongs to USB Power Delivery header file once we have them. + */ +struct usb_pd_identity { + u32 id_header; + u32 cert_stat; + u32 product; +}; + +int typec_partner_set_identity(struct typec_partner *partner); +int typec_cable_set_identity(struct typec_cable *cable); + +/* + * struct typec_mode_desc - Individual Mode of an Alternate Mode + * @index: Index of the Mode within the SVID + * @vdo: VDO returned by Discover Modes USB PD command + * @desc: Optional human readable description of the mode + * @roles: Only for ports. DRP if the mode is available in both roles + * + * Description of a mode of an Alternate Mode which a connector, cable plug or + * partner supports. Every mode will have it's own sysfs group. The details are + * the VDO returned by discover modes command, description for the mode and + * active flag telling has the mode being entered or not. + */ +struct typec_mode_desc { + int index; + u32 vdo; + char *desc; + /* Only used with ports */ + enum typec_port_type roles; +}; + +/* + * struct typec_altmode_desc - USB Type-C Alternate Mode Descriptor + * @svid: Standard or Vendor ID + * @n_modes: Number of modes + * @modes: Array of modes supported by the Alternate Mode + * + * Representation of an Alternate Mode that has SVID assigned by USB-IF. The + * array of modes will list the modes of a particular SVID that are supported by + * a connector, partner of a cable plug. + */ +struct typec_altmode_desc { + u16 svid; + int n_modes; + struct typec_mode_desc modes[ALTMODE_MAX_MODES]; +}; + +struct typec_altmode +*typec_partner_register_altmode(struct typec_partner *partner, + struct typec_altmode_desc *desc); +struct typec_altmode +*typec_plug_register_altmode(struct typec_plug *plug, + struct typec_altmode_desc *desc); +struct typec_altmode +*typec_port_register_altmode(struct typec_port *port, + struct typec_altmode_desc *desc); +void typec_unregister_altmode(struct typec_altmode *altmode); + +struct typec_port *typec_altmode2port(struct typec_altmode *alt); + +void typec_altmode_update_active(struct typec_altmode *alt, int mode, + bool active); + +enum typec_plug_index { + TYPEC_PLUG_SOP_P, + TYPEC_PLUG_SOP_PP, +}; + +/* + * struct typec_plug_desc - USB Type-C Cable Plug Descriptor + * @index: SOP Prime for the plug connected to DFP and SOP Double Prime for the + * plug connected to UFP + * + * Represents USB Type-C Cable Plug. + */ +struct typec_plug_desc { + enum typec_plug_index index; +}; + +/* + * struct typec_cable_desc - USB Type-C Cable Descriptor + * @type: The plug type from USB PD Cable VDO + * @active: Is the cable active or passive + * @identity: Result of Discover Identity command + * + * Represents USB Type-C Cable attached to USB Type-C port. + */ +struct typec_cable_desc { + enum typec_plug_type type; + unsigned int active:1; + struct usb_pd_identity *identity; +}; + +/* + * struct typec_partner_desc - USB Type-C Partner Descriptor + * @usb_pd: USB Power Delivery support + * @accessory: Audio, Debug or none. + * @identity: Discover Identity command data + * + * Details about a partner that is attached to USB Type-C port. If @identity + * member exists when partner is registered, a directory named "identity" is + * created to sysfs for the partner device. + */ +struct typec_partner_desc { + unsigned int usb_pd:1; + enum typec_accessory accessory; + struct usb_pd_identity *identity; +}; + +/* + * struct typec_capability - USB Type-C Port Capabilities + * @role: DFP (Host-only), UFP (Device-only) or DRP (Dual Role) + * @revision: USB Type-C Specification release. Binary coded decimal + * @pd_revision: USB Power Delivery Specification revision if supported + * @prefer_role: Initial role preference + * @accessory: Supported Accessory Modes + * @fwnode: Optional fwnode of the port + * @try_role: Set data role preference for DRP port + * @dr_set: Set Data Role + * @pr_set: Set Power Role + * @vconn_set: Set VCONN Role + * @activate_mode: Enter/exit given Alternate Mode + * + * Static capabilities of a single USB Type-C port. + */ +struct typec_capability { + enum typec_port_type type; + u16 revision; /* 0120H = "1.2" */ + u16 pd_revision; /* 0300H = "3.0" */ + int prefer_role; + enum typec_accessory accessory[TYPEC_MAX_ACCESSORY]; + + struct fwnode_handle *fwnode; + + int (*try_role)(const struct typec_capability *, + int role); + + int (*dr_set)(const struct typec_capability *, + enum typec_data_role); + int (*pr_set)(const struct typec_capability *, + enum typec_role); + int (*vconn_set)(const struct typec_capability *, + enum typec_role); + + int (*activate_mode)(const struct typec_capability *, + int mode, int activate); +}; + +/* Specific to try_role(). Indicates the user want's to clear the preference. */ +#define TYPEC_NO_PREFERRED_ROLE (-1) + +struct typec_port *typec_register_port(struct device *parent, + const struct typec_capability *cap); +void typec_unregister_port(struct typec_port *port); + +struct typec_partner *typec_register_partner(struct typec_port *port, + struct typec_partner_desc *desc); +void typec_unregister_partner(struct typec_partner *partner); + +struct typec_cable *typec_register_cable(struct typec_port *port, + struct typec_cable_desc *desc); +void typec_unregister_cable(struct typec_cable *cable); + +struct typec_plug *typec_register_plug(struct typec_cable *cable, + struct typec_plug_desc *desc); +void typec_unregister_plug(struct typec_plug *plug); + +void typec_set_data_role(struct typec_port *port, enum typec_data_role role); +void typec_set_pwr_role(struct typec_port *port, enum typec_role role); +void typec_set_vconn_role(struct typec_port *port, enum typec_role role); +void typec_set_pwr_opmode(struct typec_port *port, enum typec_pwr_opmode mode); + +#endif /* __LINUX_USB_TYPEC_H */ -- cgit v1.2.3 From 66a359390e7e34f9a4c489467234b107b3d76169 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Mar 2017 11:35:30 +0100 Subject: USB: core: add helpers to retrieve endpoints Many USB drivers iterate over the available endpoints to find required endpoints of a specific type and direction. Typically the endpoints are required for proper function and a missing endpoint should abort probe. To facilitate code reuse, add a helper to retrieve common endpoints (bulk or interrupt, in or out) and four wrappers to find a single endpoint. Note that the helpers are marked as __must_check to serve as a reminder to always verify that all expected endpoints are indeed present. This also means that any optional endpoints, typically need to be looked up through separate calls. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 148752640693..7041cc950737 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -99,6 +99,41 @@ enum usb_interface_condition { USB_INTERFACE_UNBINDING, }; +int __must_check +usb_find_common_endpoints(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in, + struct usb_endpoint_descriptor **bulk_out, + struct usb_endpoint_descriptor **int_in, + struct usb_endpoint_descriptor **int_out); + +static inline int __must_check +usb_find_bulk_in_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in) +{ + return usb_find_common_endpoints(alt, bulk_in, NULL, NULL, NULL); +} + +static inline int __must_check +usb_find_bulk_out_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_out) +{ + return usb_find_common_endpoints(alt, NULL, bulk_out, NULL, NULL); +} + +static inline int __must_check +usb_find_int_in_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **int_in) +{ + return usb_find_common_endpoints(alt, NULL, NULL, int_in, NULL); +} + +static inline int __must_check +usb_find_int_out_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **int_out) +{ + return usb_find_common_endpoints(alt, NULL, NULL, NULL, int_out); +} + /** * struct usb_interface - what usb device drivers talk to * @altsetting: array of interface structures, one for each alternate -- cgit v1.2.3 From 279daf4e053470f22c9421a4ab05f8e5a9e9eeec Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 17 Mar 2017 11:35:31 +0100 Subject: USB: core: add helpers to retrieve endpoints in reverse order Several drivers have implemented their endpoint look-up loops in such a way that they have picked the last endpoint descriptor of the specified type should more than one such descriptor exist. To avoid any regressions, add corresponding helpers to lookup endpoints by searching the endpoint descriptors in reverse order. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 35 +++++++++++++++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 7041cc950737..226557362d36 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -106,6 +106,13 @@ usb_find_common_endpoints(struct usb_host_interface *alt, struct usb_endpoint_descriptor **int_in, struct usb_endpoint_descriptor **int_out); +int __must_check +usb_find_common_endpoints_reverse(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in, + struct usb_endpoint_descriptor **bulk_out, + struct usb_endpoint_descriptor **int_in, + struct usb_endpoint_descriptor **int_out); + static inline int __must_check usb_find_bulk_in_endpoint(struct usb_host_interface *alt, struct usb_endpoint_descriptor **bulk_in) @@ -134,6 +141,34 @@ usb_find_int_out_endpoint(struct usb_host_interface *alt, return usb_find_common_endpoints(alt, NULL, NULL, NULL, int_out); } +static inline int __must_check +usb_find_last_bulk_in_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_in) +{ + return usb_find_common_endpoints_reverse(alt, bulk_in, NULL, NULL, NULL); +} + +static inline int __must_check +usb_find_last_bulk_out_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **bulk_out) +{ + return usb_find_common_endpoints_reverse(alt, NULL, bulk_out, NULL, NULL); +} + +static inline int __must_check +usb_find_last_int_in_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **int_in) +{ + return usb_find_common_endpoints_reverse(alt, NULL, NULL, int_in, NULL); +} + +static inline int __must_check +usb_find_last_int_out_endpoint(struct usb_host_interface *alt, + struct usb_endpoint_descriptor **int_out) +{ + return usb_find_common_endpoints_reverse(alt, NULL, NULL, NULL, int_out); +} + /** * struct usb_interface - what usb device drivers talk to * @altsetting: array of interface structures, one for each alternate -- cgit v1.2.3 From 41334f54a43ab00cbb294e6a08d0f57068f43025 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Fri, 17 Mar 2017 15:53:19 +0100 Subject: PCI: Include pci.h for struct pci_ops definition struct pci_ecam_ops embeds a struct pci_ops. Explicitly request the definition for struct pci_ops, otherwise gcc might complain: include/linux/pci-ecam.h:29:19: error: field 'pci_ops' has incomplete type Signed-off-by: Marc Gonzalez Signed-off-by: Bjorn Helgaas --- include/linux/pci-ecam.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index f0d2b9451270..b8f11d783a11 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -16,6 +16,7 @@ #ifndef DRIVERS_PCI_ECAM_H #define DRIVERS_PCI_ECAM_H +#include #include #include -- cgit v1.2.3 From 7a88fa191944589b2ed795bbed32ca6e9e2df31f Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Thu, 23 Mar 2017 13:24:55 +0300 Subject: block: make nr_iovecs unsigned in bio_alloc_bioset() There isn't a bug here, but Smatch is not smart enough to know that "nr_iovecs" can't be negative so it complains about underflows. Really, it's slightly cleaner to make this parameter unsigned. Signed-off-by: Dan Carpenter Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 8e521194f6fc..4931756d86d9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -383,7 +383,7 @@ extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); extern mempool_t *biovec_create_pool(int pool_entries); -extern struct bio *bio_alloc_bioset(gfp_t, int, struct bio_set *); +extern struct bio *bio_alloc_bioset(gfp_t, unsigned int, struct bio_set *); extern void bio_put(struct bio *); extern void __bio_clone_fast(struct bio *, struct bio *); -- cgit v1.2.3 From 0695bd99c0d22bef4d9d4c72cf537b722ba98531 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Mon, 6 Feb 2017 22:12:04 +0100 Subject: clockevents: Make clockevents_config() static A clockevent device's rate should be configured before or at registration and changed afterwards through clockevents_update_freq() only. For the configuration at registration, we already have clockevents_config_and_register(). Right now, there are no clockevents_config() users outside of the clockevents core. To mitigiate the risk of drivers errorneously reconfiguring their rates through clockevents_config() *after* device registration, make clockevents_config() static. Signed-off-by: Nicolai Stange Signed-off-by: John Stultz --- include/linux/clockchips.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clockchips.h b/include/linux/clockchips.h index 5d3053c34fb3..eef1569e5cd0 100644 --- a/include/linux/clockchips.h +++ b/include/linux/clockchips.h @@ -182,7 +182,6 @@ extern u64 clockevent_delta2ns(unsigned long latch, struct clock_event_device *e extern void clockevents_register_device(struct clock_event_device *dev); extern int clockevents_unbind_device(struct clock_event_device *ced, int cpu); -extern void clockevents_config(struct clock_event_device *dev, u32 freq); extern void clockevents_config_and_register(struct clock_event_device *dev, u32 freq, unsigned long min_delta, unsigned long max_delta); -- cgit v1.2.3 From 83e007a0c6a3f4bfdf8f3f8d0fc266cda189b3d6 Mon Sep 17 00:00:00 2001 From: Carlo Caione Date: Fri, 3 Mar 2017 16:17:58 +0100 Subject: firmware: meson-sm: Check for buffer output size After the data is read by the secure monitor driver it is being copied in the output buffer checking only the size of the bounce buffer but not the size of the output buffer. Fix this in the secure monitor driver slightly changing the API. Fix also the efuse driver that it is the only driver using this API to not break bisectability. Signed-off-by: Carlo Caione Acked-by: Srinivas Kandagatla # for nvmem Acked-by: Mark Rutland Signed-off-by: Kevin Hilman --- include/linux/firmware/meson/meson_sm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/firmware/meson/meson_sm.h b/include/linux/firmware/meson/meson_sm.h index 8e953c6f394a..37a5eaea69dd 100644 --- a/include/linux/firmware/meson/meson_sm.h +++ b/include/linux/firmware/meson/meson_sm.h @@ -25,7 +25,7 @@ int meson_sm_call(unsigned int cmd_index, u32 *ret, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); int meson_sm_call_write(void *buffer, unsigned int b_size, unsigned int cmd_index, u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); -int meson_sm_call_read(void *buffer, unsigned int cmd_index, u32 arg0, u32 arg1, - u32 arg2, u32 arg3, u32 arg4); +int meson_sm_call_read(void *buffer, unsigned int bsize, unsigned int cmd_index, + u32 arg0, u32 arg1, u32 arg2, u32 arg3, u32 arg4); #endif /* _MESON_SM_FW_H_ */ -- cgit v1.2.3 From 91b8270f2a4d1d9b268de90451cdca63a70052d6 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 22 Mar 2017 17:27:34 -0700 Subject: Add a helper function to get socket cookie in eBPF Retrieve the socket cookie generated by sock_gen_cookie() from a sk_buff with a known socket. Generates a new cookie if one was not yet set.If the socket pointer inside sk_buff is NULL, 0 is returned. The helper function coud be useful in monitoring per socket networking traffic statistics and provide a unique socket identifier per namespace. Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: Willem de Bruijn Signed-off-by: Chenbo Feng Signed-off-by: David S. Miller --- include/linux/sock_diag.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sock_diag.h b/include/linux/sock_diag.h index a0596ca0e80a..a2f8109bb215 100644 --- a/include/linux/sock_diag.h +++ b/include/linux/sock_diag.h @@ -24,6 +24,7 @@ void sock_diag_unregister(const struct sock_diag_handler *h); void sock_diag_register_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); void sock_diag_unregister_inet_compat(int (*fn)(struct sk_buff *skb, struct nlmsghdr *nlh)); +u64 sock_gen_cookie(struct sock *sk); int sock_diag_check_cookie(struct sock *sk, const __u32 *cookie); void sock_diag_save_cookie(struct sock *sk, __u32 *cookie); -- cgit v1.2.3 From 380639c7cc786ec663e43eb3896ccf9172a46900 Mon Sep 17 00:00:00 2001 From: Russell King Date: Wed, 31 Aug 2016 08:49:44 +0100 Subject: gpio: add generic single-register fixed-direction GPIO driver Add a simple, generic, single register fixed-direction GPIO driver. This is able to support a single register with a mixture of inputs and outputs. This is different from gpio-mmio and gpio-74xx-mmio: * gpio-mmio doesn't allow a fixed direction, it assumes there is always a direction register. * gpio-74xx-mmio only supports all-in or all-out setups * gpio-74xx-mmio is DT only, this needs to support legacy too * they don't double-read when getting the GPIO value, as required by some implementations that this driver supports * we need to always do 32-bit reads, which bgpio doesn't guarantee * the current output state may not be readable from the hardware register - reading may reflect input status but not output status. Signed-off-by: Russell King Signed-off-by: Linus Walleij --- include/linux/gpio/gpio-reg.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) create mode 100644 include/linux/gpio/gpio-reg.h (limited to 'include/linux') diff --git a/include/linux/gpio/gpio-reg.h b/include/linux/gpio/gpio-reg.h new file mode 100644 index 000000000000..0352bec7319a --- /dev/null +++ b/include/linux/gpio/gpio-reg.h @@ -0,0 +1,12 @@ +#ifndef GPIO_REG_H +#define GPIO_REG_H + +struct device; + +struct gpio_chip *gpio_reg_init(struct device *dev, void __iomem *reg, + int base, int num, const char *label, u32 direction, u32 def_out, + const char *const *names); + +int gpio_reg_resume(struct gpio_chip *gc); + +#endif -- cgit v1.2.3 From 0e3cb6ee386f384a9131f0c7db52a0a961d2ded9 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 2 Sep 2016 12:05:56 +0100 Subject: gpio: gpio-reg: add irq mapping for gpio-reg users Add support for mapping gpio-reg gpios to interrupts. This may be a non-linear mapping - some gpios in the register may not even have corresponding interrupts associated with them, so we need to pass an array. Signed-off-by: Russell King Signed-off-by: Linus Walleij --- include/linux/gpio/gpio-reg.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/gpio-reg.h b/include/linux/gpio/gpio-reg.h index 0352bec7319a..90e0b9060e6d 100644 --- a/include/linux/gpio/gpio-reg.h +++ b/include/linux/gpio/gpio-reg.h @@ -2,10 +2,11 @@ #define GPIO_REG_H struct device; +struct irq_domain; struct gpio_chip *gpio_reg_init(struct device *dev, void __iomem *reg, int base, int num, const char *label, u32 direction, u32 def_out, - const char *const *names); + const char *const *names, struct irq_domain *irqdom, const int *irqs); int gpio_reg_resume(struct gpio_chip *gc); -- cgit v1.2.3 From ccebcf3f224a44ec8e9c5bfca9d8e5d29298a5a8 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Wed, 15 Mar 2017 13:20:43 -0500 Subject: crypto: ccp - Add SHA-2 384- and 512-bit support Incorporate 384-bit and 512-bit hashing for a version 5 CCP device Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/ccp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index c71dd8fa5764..90a1fbe84219 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -249,6 +249,8 @@ enum ccp_sha_type { CCP_SHA_TYPE_1 = 1, CCP_SHA_TYPE_224, CCP_SHA_TYPE_256, + CCP_SHA_TYPE_384, + CCP_SHA_TYPE_512, CCP_SHA_TYPE__LAST, }; -- cgit v1.2.3 From 990672d48515ce09c76fcf1ceccee48b0dd1942b Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Wed, 15 Mar 2017 13:20:52 -0500 Subject: crypto: ccp - Enable 3DES function on v5 CCPs Wire up support for Triple DES in ECB mode. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/ccp.h | 57 ++++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 56 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index 90a1fbe84219..fa0261748920 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -292,6 +292,60 @@ struct ccp_sha_engine { * final sha cmd */ }; +/***** 3DES engine *****/ +enum ccp_des3_mode { + CCP_DES3_MODE_ECB = 0, + CCP_DES3_MODE_CBC, + CCP_DES3_MODE_CFB, + CCP_DES3_MODE__LAST, +}; + +enum ccp_des3_type { + CCP_DES3_TYPE_168 = 1, + CCP_DES3_TYPE__LAST, + }; + +enum ccp_des3_action { + CCP_DES3_ACTION_DECRYPT = 0, + CCP_DES3_ACTION_ENCRYPT, + CCP_DES3_ACTION__LAST, +}; + +/** + * struct ccp_des3_engine - CCP SHA operation + * @type: Type of 3DES operation + * @mode: cipher mode + * @action: 3DES operation (decrypt/encrypt) + * @key: key to be used for this 3DES operation + * @key_len: length of key (in bytes) + * @iv: IV to be used for this AES operation + * @iv_len: length in bytes of iv + * @src: input data to be used for this operation + * @src_len: length of input data used for this operation (in bytes) + * @dst: output data produced by this operation + * + * Variables required to be set when calling ccp_enqueue_cmd(): + * - type, mode, action, key, key_len, src, dst, src_len + * - iv, iv_len for any mode other than ECB + * + * The iv variable is used as both input and output. On completion of the + * 3DES operation the new IV overwrites the old IV. + */ +struct ccp_des3_engine { + enum ccp_des3_type type; + enum ccp_des3_mode mode; + enum ccp_des3_action action; + + struct scatterlist *key; + u32 key_len; /* In bytes */ + + struct scatterlist *iv; + u32 iv_len; /* In bytes */ + + struct scatterlist *src, *dst; + u64 src_len; /* In bytes */ +}; + /***** RSA engine *****/ /** * struct ccp_rsa_engine - CCP RSA operation @@ -541,7 +595,7 @@ struct ccp_ecc_engine { enum ccp_engine { CCP_ENGINE_AES = 0, CCP_ENGINE_XTS_AES_128, - CCP_ENGINE_RSVD1, + CCP_ENGINE_DES3, CCP_ENGINE_SHA, CCP_ENGINE_RSA, CCP_ENGINE_PASSTHRU, @@ -589,6 +643,7 @@ struct ccp_cmd { union { struct ccp_aes_engine aes; struct ccp_xts_aes_engine xts; + struct ccp_des3_engine des3; struct ccp_sha_engine sha; struct ccp_rsa_engine rsa; struct ccp_passthru_engine passthru; -- cgit v1.2.3 From 36cf515b9bbe298e1ce7384620f0d4ec45ad3328 Mon Sep 17 00:00:00 2001 From: Gary R Hook Date: Wed, 15 Mar 2017 13:21:01 -0500 Subject: crypto: ccp - Enable support for AES GCM on v5 CCPs A version 5 device provides the primitive commands required for AES GCM. This patch adds support for en/decryption. Signed-off-by: Gary R Hook Signed-off-by: Herbert Xu --- include/linux/ccp.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ccp.h b/include/linux/ccp.h index fa0261748920..dbe8aa0f2940 100644 --- a/include/linux/ccp.h +++ b/include/linux/ccp.h @@ -123,6 +123,10 @@ enum ccp_aes_mode { CCP_AES_MODE_CFB, CCP_AES_MODE_CTR, CCP_AES_MODE_CMAC, + CCP_AES_MODE_GHASH, + CCP_AES_MODE_GCTR, + CCP_AES_MODE_GCM, + CCP_AES_MODE_GMAC, CCP_AES_MODE__LAST, }; @@ -137,6 +141,9 @@ enum ccp_aes_action { CCP_AES_ACTION_ENCRYPT, CCP_AES_ACTION__LAST, }; +/* Overloaded field */ +#define CCP_AES_GHASHAAD CCP_AES_ACTION_DECRYPT +#define CCP_AES_GHASHFINAL CCP_AES_ACTION_ENCRYPT /** * struct ccp_aes_engine - CCP AES operation @@ -181,6 +188,8 @@ struct ccp_aes_engine { struct scatterlist *cmac_key; /* K1/K2 cmac key required for * final cmac cmd */ u32 cmac_key_len; /* In bytes */ + + u32 aad_len; /* In bytes */ }; /***** XTS-AES engine *****/ -- cgit v1.2.3 From 3c7eb3cc8360736123a3139a1ec727d746de3252 Mon Sep 17 00:00:00 2001 From: "Jason A. Donenfeld" Date: Thu, 16 Mar 2017 15:18:57 +0100 Subject: md5: remove from lib and only live in crypto The md5_transform function is no longer used any where in the tree, except for the crypto api's actual implementation of md5, so we can drop the function from lib and put it as a static function of the crypto file, where it belongs. There should be no new users of md5_transform, anyway, since there are more modern ways of doing what it once achieved. Signed-off-by: Jason A. Donenfeld Reviewed-by: Eric Biggers Signed-off-by: Herbert Xu --- include/linux/cryptohash.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cryptohash.h b/include/linux/cryptohash.h index 3252799832cf..df4d3e943d28 100644 --- a/include/linux/cryptohash.h +++ b/include/linux/cryptohash.h @@ -10,9 +10,4 @@ void sha_init(__u32 *buf); void sha_transform(__u32 *digest, const char *data, __u32 *W); -#define MD5_DIGEST_WORDS 4 -#define MD5_MESSAGE_BYTES 64 - -void md5_transform(__u32 *hash, __u32 const *in); - #endif -- cgit v1.2.3 From 0ba42a599fbf59a55c1ffedb980be3726c734433 Mon Sep 17 00:00:00 2001 From: Masanari Iida Date: Tue, 7 Mar 2017 20:48:02 +0900 Subject: treewide: Fix typo in xml/driver-api/basics.xml This patch fix spelling typos found in Documentation/output/xml/driver-api/basics.xml. It is because the xml file was generated from comments in source, so I had to fix the comments. Signed-off-by: Masanari Iida Signed-off-by: Jiri Kosina --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d67eee84fd43..5ce85e861901 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -186,7 +186,7 @@ extern long io_schedule_timeout(long timeout); extern void io_schedule(void); /** - * struct prev_cputime - snaphsot of system and user cputime + * struct prev_cputime - snapshot of system and user cputime * @utime: time spent in user mode * @stime: time spent in system mode * @lock: protects the above two fields -- cgit v1.2.3 From 8f48cfabac57977338f5c828ed3e12fc34373c7d Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Fri, 24 Mar 2017 22:13:35 +0800 Subject: cgroup: drop duplicate header nsproxy.h Drop duplicate header nsproxy.h from linux/cgroup.h. Signed-off-by: Geliang Tang Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 44129793c7b8..34b4a298e52e 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -17,7 +17,6 @@ #include #include #include -#include #include #include #include -- cgit v1.2.3 From e725c731e3bb1e892e7b564c945b121cb41d1087 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 3 Mar 2017 13:37:33 -0500 Subject: tracing: Split tracing initialization into two for early initialization Create an early_trace_init() function that will initialize the buffers and allow for ealier use of trace_printk(). This will also allow for future work to have function tracing start earlier at boot up. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3633e8beff39..569db5589851 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -42,8 +42,10 @@ /* Main tracing buffer and events set up */ #ifdef CONFIG_TRACING void trace_init(void); +void early_trace_init(void); #else static inline void trace_init(void) { } +static inline void early_trace_init(void) { } #endif struct module; -- cgit v1.2.3 From 6f8802852f7e58a12177a86179803b9efaad98e2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 17 Mar 2017 00:12:29 +0800 Subject: block: introduce bio_copy_data_partial Turns out we can use bio_copy_data in raid1's write behind, and we can make alloc_behind_pages() more clean/efficient, but we need to partial version of bio_copy_data(). Signed-off-by: Ming Lei Reviewed-by: Jens Axboe Signed-off-by: Shaohua Li --- include/linux/bio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 8e521194f6fc..42b62a0288b0 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -468,6 +468,8 @@ static inline void bio_flush_dcache_pages(struct bio *bi) #endif extern void bio_copy_data(struct bio *dst, struct bio *src); +extern void bio_copy_data_partial(struct bio *dst, struct bio *src, + int offset, int size); extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); extern void bio_free_pages(struct bio *bio); -- cgit v1.2.3 From 264b88c9e5c86c92ca1d67689779362760baf651 Mon Sep 17 00:00:00 2001 From: Harald Geyer Date: Thu, 23 Feb 2017 17:06:52 +0000 Subject: regulator: core: Add new notification for enabling of regulator This is useful for devices, which need some time to start up, to help the drivers track how long the supply has been up already. Ie whether it can safely talk to the HW or needs to wait. Signed-off-by: Harald Geyer Signed-off-by: Mark Brown --- include/linux/regulator/consumer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/regulator/consumer.h b/include/linux/regulator/consumer.h index ea0fffa5faeb..df176d7c2b87 100644 --- a/include/linux/regulator/consumer.h +++ b/include/linux/regulator/consumer.h @@ -119,6 +119,7 @@ struct regmap; #define REGULATOR_EVENT_ABORT_VOLTAGE_CHANGE 0x200 #define REGULATOR_EVENT_PRE_DISABLE 0x400 #define REGULATOR_EVENT_ABORT_DISABLE 0x800 +#define REGULATOR_EVENT_ENABLE 0x1000 /* * Regulator errors that can be queried using regulator_get_error_flags -- cgit v1.2.3 From e6e14f63d744cede856ba5d517d6b266c9cfbf41 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Mar 2017 10:01:17 -0700 Subject: of_mdio: Correct check against CONFIG_OF CONFIG_OF_MDIO is actually what triggers the build of drivers/of/of_mdio.c, so providing inline stubs when CONFIG_OF_MDIO=y should be based on that symbol as well. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/of_mdio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_mdio.h b/include/linux/of_mdio.h index a58cca8bcb29..ba35ba520487 100644 --- a/include/linux/of_mdio.h +++ b/include/linux/of_mdio.h @@ -12,7 +12,7 @@ #include #include -#ifdef CONFIG_OF +#if IS_ENABLED(CONFIG_OF_MDIO) extern int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np); extern struct phy_device *of_phy_find_device(struct device_node *phy_np); extern struct phy_device *of_phy_connect(struct net_device *dev, @@ -32,7 +32,7 @@ extern int of_phy_register_fixed_link(struct device_node *np); extern void of_phy_deregister_fixed_link(struct device_node *np); extern bool of_phy_is_fixed_link(struct device_node *np); -#else /* CONFIG_OF */ +#else /* CONFIG_OF_MDIO */ static inline int of_mdiobus_register(struct mii_bus *mdio, struct device_node *np) { /* -- cgit v1.2.3 From 90eff9096c01ba90cdae504a6b95ee87fe2556a3 Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Thu, 23 Mar 2017 10:01:19 -0700 Subject: net: phy: Allow splitting MDIO bus/device support from PHYs Introduce a new configuration symbol: MDIO_DEVICE which allows building the MDIO devices and bus code, without pulling in the entire Ethernet PHY library and devices code. PHYLIB nows select MDIO_DEVICE and the relevant Makefile files are updated to reflect that. When MDIO_DEVICE (MDIO bus/device only) is selected, but not PHYLIB, we have mdio-bus.ko as a loadable module, and it does not have a module_exit() function because the safety of removing a bus class is unclear. When both MDIO_DEVICE and PHYLIB are enabled, we need to assemble everything into a common loadable module: libphy.ko because of nasty circular dependencies between phy.c, phy_device.c and mdio_bus.c which are really tough to untangle. Signed-off-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/phy.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 2efca6b39fba..624cecf69c28 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -745,8 +745,24 @@ int phy_write_mmd(struct phy_device *phydev, int devad, u32 regnum, u16 val); struct phy_device *phy_device_create(struct mii_bus *bus, int addr, int phy_id, bool is_c45, struct phy_c45_device_ids *c45_ids); +#if IS_ENABLED(CONFIG_PHYLIB) struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45); int phy_device_register(struct phy_device *phy); +void phy_device_free(struct phy_device *phydev); +#else +static inline +struct phy_device *get_phy_device(struct mii_bus *bus, int addr, bool is_c45) +{ + return NULL; +} + +static inline int phy_device_register(struct phy_device *phy) +{ + return 0; +} + +static inline void phy_device_free(struct phy_device *phydev) { } +#endif /* CONFIG_PHYLIB */ void phy_device_remove(struct phy_device *phydev); int phy_init_hw(struct phy_device *phydev); int phy_suspend(struct phy_device *phydev); @@ -827,7 +843,6 @@ int phy_ethtool_ksettings_set(struct phy_device *phydev, int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); -void phy_device_free(struct phy_device *phydev); int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, @@ -854,8 +869,10 @@ int phy_ethtool_set_link_ksettings(struct net_device *ndev, const struct ethtool_link_ksettings *cmd); int phy_ethtool_nway_reset(struct net_device *ndev); +#if IS_ENABLED(CONFIG_PHYLIB) int __init mdio_bus_init(void); void mdio_bus_exit(void); +#endif extern struct bus_type mdio_bus_type; @@ -866,7 +883,7 @@ struct mdio_board_info { const void *platform_data; }; -#if IS_ENABLED(CONFIG_PHYLIB) +#if IS_ENABLED(CONFIG_MDIO_DEVICE) int mdiobus_register_board_info(const struct mdio_board_info *info, unsigned int n); #else -- cgit v1.2.3 From 42c269c88dc146982a54a8267f71abc99f12852a Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 3 Mar 2017 16:15:39 -0500 Subject: ftrace: Allow for function tracing to record init functions on boot up Adding a hook into free_reserve_area() that informs ftrace that boot up init text is being free, lets ftrace safely remove those init functions from its records, which keeps ftrace from trying to modify text that no longer exists. Note, this still does not allow for tracing .init text of modules, as modules require different work for freeing its init code. Link: http://lkml.kernel.org/r/1488502497.7212.24.camel@linux.intel.com Cc: linux-mm@kvack.org Cc: Vlastimil Babka Cc: Mel Gorman Cc: Peter Zijlstra Requested-by: Todd Brandt Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 5 +++++ include/linux/init.h | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 569db5589851..0276a2c487e6 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -146,6 +146,10 @@ struct ftrace_ops_hash { struct ftrace_hash *filter_hash; struct mutex regex_lock; }; + +void ftrace_free_mem(void *start, void *end); +#else +static inline void ftrace_free_mem(void *start, void *end) { } #endif /* @@ -262,6 +266,7 @@ static inline int ftrace_nr_registered_ops(void) } static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } +static inline void ftrace_free_mem(void *start, void *end) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_STACK_TRACER diff --git a/include/linux/init.h b/include/linux/init.h index 79af0962fd52..94769d687cf0 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -39,7 +39,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold notrace __latent_entropy +#define __init __section(.init.text) __cold __inittrace __latent_entropy #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) @@ -68,8 +68,10 @@ #ifdef MODULE #define __exitused +#define __inittrace notrace #else #define __exitused __used +#define __inittrace #endif #define __exit __section(.exit.text) __exitused __cold notrace -- cgit v1.2.3 From af0009fc16a45d091f896794e97a6457f9a7eddf Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 16 Mar 2017 11:01:06 -0400 Subject: tracing: Move trace_handle_return() out of line Currently trace_handle_return() looks like this: static inline enum print_line_t trace_handle_return(struct trace_seq *s) { return trace_seq_has_overflowed(s) ? TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; } Where trace_seq_overflowed(s) is: static inline bool trace_seq_has_overflowed(struct trace_seq *s) { return s->full || seq_buf_has_overflowed(&s->seq); } And seq_buf_has_overflowed(&s->seq) is: static inline bool seq_buf_has_overflowed(struct seq_buf *s) { return s->len > s->size; } Making trace_handle_return() into: return (s->full || (s->seq->len > s->seq->size)) ? TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; One would think this is not an issue to keep as an inline. But because this is used in the TRACE_EVENT() macro, it is extended for every tracepoint in the system. Taking a look at a single tracepoint x86_irq_vector (was the first one I randomly chosen). As trace_handle_return is used in the TRACE_EVENT() macro of trace_raw_output_##call() we disassemble trace_raw_output_x86_irq_vector and do a diff: - is the original + is the out-of-line code I removed identical lines that were different just due to different addresses. --- /tmp/irq-vec-orig 2017-03-16 09:12:48.569384851 -0400 +++ /tmp/irq-vec-ool 2017-03-16 09:13:39.378153385 -0400 @@ -6,27 +6,23 @@ 53 push %rbx 48 89 fb mov %rdi,%rbx 4c 8b a7 c0 20 00 00 mov 0x20c0(%rdi),%r12 e8 f7 72 13 00 callq ffffffff81155c80 83 f8 01 cmp $0x1,%eax 74 05 je ffffffff8101e993 5b pop %rbx 41 5c pop %r12 5d pop %rbp c3 retq 41 8b 54 24 08 mov 0x8(%r12),%edx - 48 8d bb 98 10 00 00 lea 0x1098(%rbx),%rdi + 48 81 c3 98 10 00 00 add $0x1098,%rbx - 48 c7 c6 7b 8a a0 81 mov $0xffffffff81a08a7b,%rsi + 48 c7 c6 ab 8a a0 81 mov $0xffffffff81a08aab,%rsi - e8 c5 85 13 00 callq ffffffff81156f70 === here's the start of the main difference === + 48 89 df mov %rbx,%rdi + e8 62 7e 13 00 callq ffffffff81156810 - 8b 93 b8 20 00 00 mov 0x20b8(%rbx),%edx - 31 c0 xor %eax,%eax - 85 d2 test %edx,%edx - 75 11 jne ffffffff8101e9c8 - 48 8b 83 a8 20 00 00 mov 0x20a8(%rbx),%rax - 48 39 83 a0 20 00 00 cmp %rax,0x20a0(%rbx) - 0f 93 c0 setae %al + 48 89 df mov %rbx,%rdi + e8 4a c5 12 00 callq ffffffff8114af00 5b pop %rbx - 0f b6 c0 movzbl %al,%eax === end === 41 5c pop %r12 5d pop %rbp c3 retq If you notice, the original has 22 bytes of text more than the out of line version. As this is for every TRACE_EVENT() defined in the system, this can become quite large. text data bss dec hex filename 8690305 5450490 1298432 15439227 eb957b vmlinux-orig 8681725 5450490 1298432 15430647 eb73f7 vmlinux-handle This change has a total of 8580 bytes in savings. $ objdump -dr /tmp/vmlinux-orig | grep '^[0-9a-f]* Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 0af63c4381b9..a556805eff8a 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -138,16 +138,7 @@ enum print_line_t { TRACE_TYPE_NO_CONSUME = 3 /* Handled but ask to not consume */ }; -/* - * Several functions return TRACE_TYPE_PARTIAL_LINE if the trace_seq - * overflowed, and TRACE_TYPE_HANDLED otherwise. This helper function - * simplifies those functions and keeps them in sync. - */ -static inline enum print_line_t trace_handle_return(struct trace_seq *s) -{ - return trace_seq_has_overflowed(s) ? - TRACE_TYPE_PARTIAL_LINE : TRACE_TYPE_HANDLED; -} +enum print_line_t trace_handle_return(struct trace_seq *s); void tracing_generic_entry_update(struct trace_entry *entry, unsigned long flags, -- cgit v1.2.3 From aff2615763f206f897146e0ee1ddae8e22055ae3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Sat, 25 Mar 2017 00:52:05 +0300 Subject: net/mlx5e: Single bfreg (UAR) for all mlx5e SQs and netdevs One is sufficient since Blue Flame is not supported anymore. This will also come in handy for switchdev mode to save resources, since VF representors will use same single UAR as well for their own SQs. Signed-off-by: Saeed Mahameed Reviewed-by: Tariq Toukan Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2fcff6b4503f..f50864626230 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -728,6 +728,7 @@ struct mlx5e_resources { u32 pdn; struct mlx5_td td; struct mlx5_core_mkey mkey; + struct mlx5_sq_bfreg bfreg; }; struct mlx5_core_dev { -- cgit v1.2.3 From 869ab90f0ae0002ce6e9d3a5c75156ae8de48ffc Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Fri, 24 Mar 2017 18:03:48 -0700 Subject: block: constify struct blk_integrity_profile blk_integrity_profile's are never modified, so mark them 'const' so that they are placed in .rodata and benefit from memory protection. Signed-off-by: Eric Biggers Signed-off-by: Jens Axboe --- include/linux/genhd.h | 10 +++++----- include/linux/t10-pi.h | 8 ++++---- 2 files changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 76f39754e7b0..9e11082c7f9b 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -159,11 +159,11 @@ struct badblocks; #if defined(CONFIG_BLK_DEV_INTEGRITY) struct blk_integrity { - struct blk_integrity_profile *profile; - unsigned char flags; - unsigned char tuple_size; - unsigned char interval_exp; - unsigned char tag_size; + const struct blk_integrity_profile *profile; + unsigned char flags; + unsigned char tuple_size; + unsigned char interval_exp; + unsigned char tag_size; }; #endif /* CONFIG_BLK_DEV_INTEGRITY */ diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 9fba9dd33544..9375d23a24e7 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -34,9 +34,9 @@ struct t10_pi_tuple { }; -extern struct blk_integrity_profile t10_pi_type1_crc; -extern struct blk_integrity_profile t10_pi_type1_ip; -extern struct blk_integrity_profile t10_pi_type3_crc; -extern struct blk_integrity_profile t10_pi_type3_ip; +extern const struct blk_integrity_profile t10_pi_type1_crc; +extern const struct blk_integrity_profile t10_pi_type1_ip; +extern const struct blk_integrity_profile t10_pi_type3_crc; +extern const struct blk_integrity_profile t10_pi_type3_ip; #endif -- cgit v1.2.3 From f45958756fef552436e4a63029a168495920026e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 24 Mar 2017 10:34:43 -0700 Subject: block: remove bio_clone_bioset_partial() commit c18a1e0(block: introduce bio_clone_bioset_partial()) introduced bio_clone_bioset_partial() for raid1 write behind IO. Now the write behind is rewritten by Ming. We don't need the API any more, so revert the commit. Cc: Christoph Hellwig Reviewed-by: Jens Axboe Reviewed-by: Ming Lei Signed-off-by: Shaohua Li --- include/linux/bio.h | 11 ++--------- 1 file changed, 2 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 42b62a0288b0..fafef6343d1b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -183,7 +183,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, #define bio_iter_last(bvec, iter) ((iter).bi_size == (bvec).bv_len) -static inline unsigned __bio_segments(struct bio *bio, struct bvec_iter *bvec) +static inline unsigned bio_segments(struct bio *bio) { unsigned segs = 0; struct bio_vec bv; @@ -205,17 +205,12 @@ static inline unsigned __bio_segments(struct bio *bio, struct bvec_iter *bvec) break; } - __bio_for_each_segment(bv, bio, iter, *bvec) + bio_for_each_segment(bv, bio, iter) segs++; return segs; } -static inline unsigned bio_segments(struct bio *bio) -{ - return __bio_segments(bio, &bio->bi_iter); -} - /* * get a reference to a bio, so it won't disappear. the intended use is * something like: @@ -389,8 +384,6 @@ extern void bio_put(struct bio *); extern void __bio_clone_fast(struct bio *, struct bio *); extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); -extern struct bio *bio_clone_bioset_partial(struct bio *, gfp_t, - struct bio_set *, int, int); extern struct bio_set *fs_bio_set; -- cgit v1.2.3 From d7ed89d5aadf09f1060cd3a9cf07df17447c7392 Mon Sep 17 00:00:00 2001 From: Song Hongyan Date: Mon, 20 Mar 2017 22:28:45 +0800 Subject: iio: hid: Add humidity sensor support Environmental humidity sensor is a hid defined sensor, it shows raw humidity measurement of air. More information can be found in: http://www.usb.org/developers/hidpage/HUTRR39b.pdf According to IIO ABI definition, humidityrelative data output unit is milli percent. Add the unit convert from percent to milli percent. Signed-off-by: Song Hongyan Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-ids.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-ids.h b/include/linux/hid-sensor-ids.h index 46dd1f27d2f2..761f86242473 100644 --- a/include/linux/hid-sensor-ids.h +++ b/include/linux/hid-sensor-ids.h @@ -49,6 +49,10 @@ #define HID_USAGE_SENSOR_TEMPERATURE 0x200033 #define HID_USAGE_SENSOR_DATA_ENVIRONMENTAL_TEMPERATURE 0x200434 +/* humidity */ +#define HID_USAGE_SENSOR_HUMIDITY 0x200032 +#define HID_USAGE_SENSOR_ATMOSPHERIC_HUMIDITY 0x200433 + /* Gyro 3D: (200076) */ #define HID_USAGE_SENSOR_GYRO_3D 0x200076 #define HID_USAGE_SENSOR_DATA_ANGL_VELOCITY 0x200456 -- cgit v1.2.3 From 8ce371f9846ef1e8b3cc8f6865766cb5c1f17e40 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 20 Mar 2017 12:26:55 +0100 Subject: lockdep: Fix per-cpu static objects Since commit 383776fa7527 ("locking/lockdep: Handle statically initialized PER_CPU locks properly") we try to collapse per-cpu locks into a single class by giving them all the same key. For this key we choose the canonical address of the per-cpu object, which would be the offset into the per-cpu area. This has two problems: - there is a case where we run !0 lock->key through static_obj() and expect this to pass; it doesn't for canonical pointers. - 0 is a valid canonical address. Cure both issues by redefining the canonical address as the address of the per-cpu variable on the boot CPU. Since I didn't want to rely on CPU0 being the boot-cpu, or even existing at all, track the boot CPU in a variable. Fixes: 383776fa7527 ("locking/lockdep: Handle statically initialized PER_CPU locks properly") Reported-by: kernel test robot Signed-off-by: Peter Zijlstra (Intel) Tested-by: Borislav Petkov Cc: Sebastian Andrzej Siewior Cc: linux-mm@kvack.org Cc: wfg@linux.intel.com Cc: kernel test robot Cc: LKP Link: http://lkml.kernel.org/r/20170320114108.kbvcsuepem45j5cr@hirez.programming.kicks-ass.net Signed-off-by: Thomas Gleixner --- include/linux/smp.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 8e0cb7a0f836..68123c1fe549 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -120,6 +120,13 @@ extern unsigned int setup_max_cpus; extern void __init setup_nr_cpu_ids(void); extern void __init smp_init(void); +extern int __boot_cpu_id; + +static inline int get_boot_cpu_id(void) +{ + return __boot_cpu_id; +} + #else /* !SMP */ static inline void smp_send_stop(void) { } @@ -158,6 +165,11 @@ static inline void smp_init(void) { up_late_init(); } static inline void smp_init(void) { } #endif +static inline int get_boot_cpu_id(void) +{ + return 0; +} + #endif /* !SMP */ /* -- cgit v1.2.3 From e4e55b47ed9ae2c05ff062601ff6dacbe9dc4775 Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Fri, 24 Mar 2017 20:46:33 +0900 Subject: LSM: Revive security_task_alloc() hook and per "struct task_struct" security blob. MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We switched from "struct task_struct"->security to "struct cred"->security in Linux 2.6.29. But not all LSM modules were happy with that change. TOMOYO LSM module is an example which want to use per "struct task_struct" security blob, for TOMOYO's security context is defined based on "struct task_struct" rather than "struct cred". AppArmor LSM module is another example which want to use it, for AppArmor is currently abusing the cred a little bit to store the change_hat and setexeccon info. Although security_task_free() hook was revived in Linux 3.4 because Yama LSM module wanted to release per "struct task_struct" security blob, security_task_alloc() hook and "struct task_struct"->security field were not revived. Nowadays, we are getting proposals of lightweight LSM modules which want to use per "struct task_struct" security blob. We are already allowing multiple concurrent LSM modules (up to one fully armored module which uses "struct cred"->security field or exclusive hooks like security_xfrm_state_pol_flow_match(), plus unlimited number of lightweight modules which do not use "struct cred"->security nor exclusive hooks) as long as they are built into the kernel. But this patch does not implement variable length "struct task_struct"->security field which will become needed when multiple LSM modules want to use "struct task_struct"-> security field. Although it won't be difficult to implement variable length "struct task_struct"->security field, let's think about it after we merged this patch. Signed-off-by: Tetsuo Handa Acked-by: John Johansen Acked-by: Serge Hallyn Acked-by: Casey Schaufler Tested-by: Djalal Harouni Acked-by: José Bollo Cc: Paul Moore Cc: Stephen Smalley Cc: Eric Paris Cc: Kees Cook Cc: James Morris Cc: José Bollo Signed-off-by: James Morris --- include/linux/init_task.h | 7 +++++++ include/linux/lsm_hooks.h | 9 ++++++++- include/linux/sched.h | 4 ++++ include/linux/security.h | 7 +++++++ 4 files changed, 26 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 91d9049f0039..926f2f553cc5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -210,6 +210,12 @@ extern struct cred init_cred; # define INIT_TASK_TI(tsk) #endif +#ifdef CONFIG_SECURITY +#define INIT_TASK_SECURITY .security = NULL, +#else +#define INIT_TASK_SECURITY +#endif + /* * INIT_TASK is used to set up the first task table, touch at * your own risk!. Base=0, limit=0x1fffff (=2MB) @@ -288,6 +294,7 @@ extern struct cred init_cred; INIT_VTIME(tsk) \ INIT_NUMA_BALANCING(tsk) \ INIT_KASAN(tsk) \ + INIT_TASK_SECURITY \ } diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index 1aa63335de9e..080f34e66017 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -533,8 +533,13 @@ * manual page for definitions of the @clone_flags. * @clone_flags contains the flags indicating what should be shared. * Return 0 if permission is granted. + * @task_alloc: + * @task task being allocated. + * @clone_flags contains the flags indicating what should be shared. + * Handle allocation of task-related resources. + * Returns a zero on success, negative values on failure. * @task_free: - * @task task being freed + * @task task about to be freed. * Handle release of task-related resources. (Note that this can be called * from interrupt context.) * @cred_alloc_blank: @@ -1482,6 +1487,7 @@ union security_list_options { int (*file_open)(struct file *file, const struct cred *cred); int (*task_create)(unsigned long clone_flags); + int (*task_alloc)(struct task_struct *task, unsigned long clone_flags); void (*task_free)(struct task_struct *task); int (*cred_alloc_blank)(struct cred *cred, gfp_t gfp); void (*cred_free)(struct cred *cred); @@ -1748,6 +1754,7 @@ struct security_hook_heads { struct list_head file_receive; struct list_head file_open; struct list_head task_create; + struct list_head task_alloc; struct list_head task_free; struct list_head cred_alloc_blank; struct list_head cred_free; diff --git a/include/linux/sched.h b/include/linux/sched.h index d67eee84fd43..71b8df306bb0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1037,6 +1037,10 @@ struct task_struct { #ifdef CONFIG_THREAD_INFO_IN_TASK /* A live task holds one reference: */ atomic_t stack_refcount; +#endif +#ifdef CONFIG_SECURITY + /* Used by LSM modules for access restriction: */ + void *security; #endif /* CPU-specific state of this task: */ struct thread_struct thread; diff --git a/include/linux/security.h b/include/linux/security.h index 97df7bac5b48..af675b576645 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -308,6 +308,7 @@ int security_file_send_sigiotask(struct task_struct *tsk, int security_file_receive(struct file *file); int security_file_open(struct file *file, const struct cred *cred); int security_task_create(unsigned long clone_flags); +int security_task_alloc(struct task_struct *task, unsigned long clone_flags); void security_task_free(struct task_struct *task); int security_cred_alloc_blank(struct cred *cred, gfp_t gfp); void security_cred_free(struct cred *cred); @@ -861,6 +862,12 @@ static inline int security_task_create(unsigned long clone_flags) return 0; } +static inline int security_task_alloc(struct task_struct *task, + unsigned long clone_flags) +{ + return 0; +} + static inline void security_task_free(struct task_struct *task) { } -- cgit v1.2.3 From 591a3d7c09fa08baff48ad86c2347dbd28a52753 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 24 Mar 2017 14:13:05 +0300 Subject: mm: Fix false-positive VM_BUG_ON() in page_cache_{get,add}_speculative() 0day testing by Fengguang Wu triggered this crash while running Trinity: kernel BUG at include/linux/pagemap.h:151! ... CPU: 0 PID: 458 Comm: trinity-c0 Not tainted 4.11.0-rc2-00251-g2947ba0 #1 ... Call Trace: __get_user_pages_fast() get_user_pages_fast() get_futex_key() futex_requeue() do_futex() SyS_futex() do_syscall_64() entry_SYSCALL64_slow_path() It' VM_BUG_ON() due to false-negative in_atomic(). We call page_cache_get_speculative() with disabled local interrupts. It should be atomic enough. So let's check for disabled interrupts in the VM_BUG_ON() condition too, to resolve this. ( This got triggered by the conversion of the x86 GUP code to the generic GUP code. ) Reported-by: Fengguang Wu Signed-off-by: Kirill A. Shutemov Cc: Andrew Morton Cc: Aneesh Kumar K.V Cc: Kirill A. Shutemov Cc: LKP Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/20170324114709.pcytvyb3d6ajux33@black.fi.intel.com Signed-off-by: Ingo Molnar --- include/linux/pagemap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 84943e8057ef..316a19f6b635 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -148,7 +148,7 @@ static inline int page_cache_get_speculative(struct page *page) #ifdef CONFIG_TINY_RCU # ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic()); + VM_BUG_ON(!in_atomic() && !irqs_disabled()); # endif /* * Preempt must be disabled here - we rely on rcu_read_lock doing @@ -186,7 +186,7 @@ static inline int page_cache_add_speculative(struct page *page, int count) #if !defined(CONFIG_SMP) && defined(CONFIG_TREE_RCU) # ifdef CONFIG_PREEMPT_COUNT - VM_BUG_ON(!in_atomic()); + VM_BUG_ON(!in_atomic() && !irqs_disabled()); # endif VM_BUG_ON_PAGE(page_count(page) == 0, page); page_ref_add(page, count); -- cgit v1.2.3 From 011d8261117249eab97bc86a8e1ac7731e03e319 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Mon, 27 Mar 2017 11:33:02 +0200 Subject: RAS: Add a Corrected Errors Collector Introduce a simple data structure for collecting correctable errors along with accessors. More detailed description in the code itself. The error decoding is done with the decoding chain now and mce_first_notifier() gets to see the error first and the CEC decides whether to log it and then the rest of the chain doesn't hear about it - basically the main reason for the CE collector - or to continue running the notifiers. When the CEC hits the action threshold, it will try to soft-offine the page containing the ECC and then the whole decoding chain gets to see the error. Signed-off-by: Borislav Petkov Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-edac Link: http://lkml.kernel.org/r/20170327093304.10683-5-bp@alien8.de Signed-off-by: Ingo Molnar --- include/linux/ras.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ras.h b/include/linux/ras.h index 2aceeafd6fe5..ffb147185e8d 100644 --- a/include/linux/ras.h +++ b/include/linux/ras.h @@ -1,14 +1,25 @@ #ifndef __RAS_H__ #define __RAS_H__ +#include + #ifdef CONFIG_DEBUG_FS int ras_userspace_consumers(void); void ras_debugfs_init(void); int ras_add_daemon_trace(void); #else static inline int ras_userspace_consumers(void) { return 0; } -static inline void ras_debugfs_init(void) { return; } +static inline void ras_debugfs_init(void) { } static inline int ras_add_daemon_trace(void) { return 0; } #endif +#ifdef CONFIG_RAS_CEC +void __init cec_init(void); +int __init parse_cec_param(char *str); +int cec_add_elem(u64 pfn); +#else +static inline void __init cec_init(void) { } +static inline int cec_add_elem(u64 pfn) { return -ENODEV; } #endif + +#endif /* __RAS_H__ */ -- cgit v1.2.3 From 4f9ab0c1570800002e77515888600ca2e3dce4a9 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Fri, 24 Mar 2017 09:47:31 +0100 Subject: mfd: cpcap: Add missing include dependencies This fixes compilation for files, that try to include the cpcap header in alphabetically sorted #include lists. Acked-by: Pavel Machek Acked-by: Tony Lindgren Signed-off-by: Sebastian Reichel Signed-off-by: Lee Jones --- include/linux/mfd/motorola-cpcap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/motorola-cpcap.h b/include/linux/mfd/motorola-cpcap.h index b4031c2b2214..53758a7d7c32 100644 --- a/include/linux/mfd/motorola-cpcap.h +++ b/include/linux/mfd/motorola-cpcap.h @@ -14,6 +14,9 @@ * published by the Free Software Foundation. */ +#include +#include + #define CPCAP_VENDOR_ST 0 #define CPCAP_VENDOR_TI 1 -- cgit v1.2.3 From 07814246dd5530860ef758fd9b2b5f2e26472aa2 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 17:13:30 +0100 Subject: USB: serial: allow subdrivers to modify port-endpoint mapping Allow subdrivers to modify the port-endpoint mapping by passing the endpoint descriptors to calc_num_ports. The callback can now also be used to verify that the required endpoints exists and abort probing otherwise. This will allow us to get rid of a few hacks in subdrivers that are already modifying the port-endpoint mapping (or aborting probe due to missing endpoints), but only after the port structures have been setup. Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index f1b8a8493762..da528818cfd8 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -181,6 +181,17 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data) serial->private = data; } +struct usb_serial_endpoints { + unsigned char num_bulk_in; + unsigned char num_bulk_out; + unsigned char num_interrupt_in; + unsigned char num_interrupt_out; + struct usb_endpoint_descriptor *bulk_in[MAX_NUM_PORTS]; + struct usb_endpoint_descriptor *bulk_out[MAX_NUM_PORTS]; + struct usb_endpoint_descriptor *interrupt_in[MAX_NUM_PORTS]; + struct usb_endpoint_descriptor *interrupt_out[MAX_NUM_PORTS]; +}; + /** * usb_serial_driver - describes a usb serial driver * @description: pointer to a string that describes this driver. This string @@ -196,8 +207,9 @@ static inline void usb_set_serial_data(struct usb_serial *serial, void *data) * (0 = end-point size) * @bulk_out_size: bytes to allocate for bulk-out buffer (0 = end-point size) * @calc_num_ports: pointer to a function to determine how many ports this - * device has dynamically. It will be called after the probe() - * callback is called, but before attach() + * device has dynamically. It can also be used to verify the number of + * endpoints or to modify the port-endpoint mapping. It will be called + * after the probe() callback is called, but before attach(). * @probe: pointer to the driver's probe function. * This will be called when the device is inserted into the system, * but before the device has been fully initialized by the usb_serial @@ -249,7 +261,8 @@ struct usb_serial_driver { int (*probe)(struct usb_serial *serial, const struct usb_device_id *id); int (*attach)(struct usb_serial *serial); - int (*calc_num_ports) (struct usb_serial *serial); + int (*calc_num_ports)(struct usb_serial *serial, + struct usb_serial_endpoints *epds); void (*disconnect)(struct usb_serial *serial); void (*release)(struct usb_serial *serial); -- cgit v1.2.3 From a794499b261b8487a984783ccc864975e1bcc7bf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 16 Mar 2017 17:13:32 +0100 Subject: USB: serial: add calc_num_ports callback to generic driver Add a calc_num_ports callback to the generic driver and verify that the device has the required endpoints there instead of in core. Note that the generic driver num_ports field was never used. Signed-off-by: Johan Hovold --- include/linux/usb/serial.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index da528818cfd8..e2f0ab07eea5 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -379,7 +379,6 @@ extern void usb_serial_handle_dcd_change(struct usb_serial_port *usb_port, extern int usb_serial_bus_register(struct usb_serial_driver *device); extern void usb_serial_bus_deregister(struct usb_serial_driver *device); -extern struct usb_serial_driver usb_serial_generic_device; extern struct bus_type usb_serial_bus_type; extern struct tty_driver *usb_serial_tty_driver; -- cgit v1.2.3 From e753b2b50dc3c6582e9d5971555693db41a6d821 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Wed, 1 Feb 2017 19:01:18 +0200 Subject: net/mlx5: Add helper to initialize a flow steering actions struct instance There are bunch of places in the code where the intermediate struct that keeps the elements related to flow actions is initialized with the same default values. Put that into a small DECLARE type helper. This patch doesn't change any functionality. Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 949b24b6c479..5eea1ba2e593 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -136,6 +136,10 @@ struct mlx5_flow_act { u32 encap_id; }; +#define MLX5_DECLARE_FLOW_ACT(name) \ + struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ + MLX5_FS_DEFAULT_FLOW_TAG, 0} + /* Single destination per rule. * Group ID is implied by the match criteria. */ @@ -156,5 +160,4 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); - #endif -- cgit v1.2.3 From 2a69cb9ff7caac00f3bf7c865964228dd2a0c415 Mon Sep 17 00:00:00 2001 From: Or Gerlitz Date: Thu, 19 Jan 2017 19:31:25 +0200 Subject: net/mlx5: Introduce modify header structures, commands and steering action definitions Add the definitions related to creation/deletion of a modify header context and the modify header steering action which are used for HW packet header modify (re-write) as part of steering. Add as well the modify header id into two intermediate structs and set it to the FTE. Note that as the push/pop vlan steering actions are emulated by the ewitch management code, we're not breaking any compatibility while changing their values to make room for the modify header action which is not emulated and whose value is part of the FW API. The new bit values for the emulated actions are at the end of the possible range. Signed-off-by: Or Gerlitz Reviewed-by: Hadar Hen Zion Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 3 +- include/linux/mlx5/mlx5_ifc.h | 113 +++++++++++++++++++++++++++++++++++++++++- 2 files changed, 113 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 5eea1ba2e593..ae91a4bda1a3 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -134,11 +134,12 @@ struct mlx5_flow_act { u32 action; u32 flow_tag; u32 encap_id; + u32 modify_id; }; #define MLX5_DECLARE_FLOW_ACT(name) \ struct mlx5_flow_act name = {MLX5_FLOW_CONTEXT_ACTION_FWD_DEST,\ - MLX5_FS_DEFAULT_FLOW_TAG, 0} + MLX5_FS_DEFAULT_FLOW_TAG, 0, 0} /* Single destination per rule. * Group ID is implied by the match criteria. diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 838242697541..56bc842b0620 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -227,6 +227,8 @@ enum { MLX5_CMD_OP_MODIFY_FLOW_TABLE = 0x93c, MLX5_CMD_OP_ALLOC_ENCAP_HEADER = 0x93d, MLX5_CMD_OP_DEALLOC_ENCAP_HEADER = 0x93e, + MLX5_CMD_OP_ALLOC_MODIFY_HEADER_CONTEXT = 0x940, + MLX5_CMD_OP_DEALLOC_MODIFY_HEADER_CONTEXT = 0x941, MLX5_CMD_OP_MAX }; @@ -302,7 +304,8 @@ struct mlx5_ifc_flow_table_prop_layout_bits { u8 reserved_at_20[0x2]; u8 log_max_ft_size[0x6]; - u8 reserved_at_28[0x10]; + u8 log_max_modify_header_context[0x8]; + u8 max_modify_header_actions[0x8]; u8 max_ft_level[0x8]; u8 reserved_at_40[0x20]; @@ -2190,6 +2193,7 @@ enum { MLX5_FLOW_CONTEXT_ACTION_COUNT = 0x8, MLX5_FLOW_CONTEXT_ACTION_ENCAP = 0x10, MLX5_FLOW_CONTEXT_ACTION_DECAP = 0x20, + MLX5_FLOW_CONTEXT_ACTION_MOD_HDR = 0x40, }; struct mlx5_ifc_flow_context_bits { @@ -2211,7 +2215,9 @@ struct mlx5_ifc_flow_context_bits { u8 encap_id[0x20]; - u8 reserved_at_e0[0x120]; + u8 modify_header_id[0x20]; + + u8 reserved_at_100[0x100]; struct mlx5_ifc_fte_match_param_bits match_value; @@ -4534,6 +4540,109 @@ struct mlx5_ifc_dealloc_encap_header_in_bits { u8 reserved_60[0x20]; }; +struct mlx5_ifc_set_action_in_bits { + u8 action_type[0x4]; + u8 field[0xc]; + u8 reserved_at_10[0x3]; + u8 offset[0x5]; + u8 reserved_at_18[0x3]; + u8 length[0x5]; + + u8 data[0x20]; +}; + +struct mlx5_ifc_add_action_in_bits { + u8 action_type[0x4]; + u8 field[0xc]; + u8 reserved_at_10[0x10]; + + u8 data[0x20]; +}; + +union mlx5_ifc_set_action_in_add_action_in_auto_bits { + struct mlx5_ifc_set_action_in_bits set_action_in; + struct mlx5_ifc_add_action_in_bits add_action_in; + u8 reserved_at_0[0x40]; +}; + +enum { + MLX5_ACTION_TYPE_SET = 0x1, + MLX5_ACTION_TYPE_ADD = 0x2, +}; + +enum { + MLX5_ACTION_IN_FIELD_OUT_SMAC_47_16 = 0x1, + MLX5_ACTION_IN_FIELD_OUT_SMAC_15_0 = 0x2, + MLX5_ACTION_IN_FIELD_OUT_ETHERTYPE = 0x3, + MLX5_ACTION_IN_FIELD_OUT_DMAC_47_16 = 0x4, + MLX5_ACTION_IN_FIELD_OUT_DMAC_15_0 = 0x5, + MLX5_ACTION_IN_FIELD_OUT_IP_DSCP = 0x6, + MLX5_ACTION_IN_FIELD_OUT_TCP_FLAGS = 0x7, + MLX5_ACTION_IN_FIELD_OUT_TCP_SPORT = 0x8, + MLX5_ACTION_IN_FIELD_OUT_TCP_DPORT = 0x9, + MLX5_ACTION_IN_FIELD_OUT_IP_TTL = 0xa, + MLX5_ACTION_IN_FIELD_OUT_UDP_SPORT = 0xb, + MLX5_ACTION_IN_FIELD_OUT_UDP_DPORT = 0xc, + MLX5_ACTION_IN_FIELD_OUT_SIPV6_127_96 = 0xd, + MLX5_ACTION_IN_FIELD_OUT_SIPV6_95_64 = 0xe, + MLX5_ACTION_IN_FIELD_OUT_SIPV6_63_32 = 0xf, + MLX5_ACTION_IN_FIELD_OUT_SIPV6_31_0 = 0x10, + MLX5_ACTION_IN_FIELD_OUT_DIPV6_127_96 = 0x11, + MLX5_ACTION_IN_FIELD_OUT_DIPV6_95_64 = 0x12, + MLX5_ACTION_IN_FIELD_OUT_DIPV6_63_32 = 0x13, + MLX5_ACTION_IN_FIELD_OUT_DIPV6_31_0 = 0x14, + MLX5_ACTION_IN_FIELD_OUT_SIPV4 = 0x15, + MLX5_ACTION_IN_FIELD_OUT_DIPV4 = 0x16, +}; + +struct mlx5_ifc_alloc_modify_header_context_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 modify_header_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + +struct mlx5_ifc_alloc_modify_header_context_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 reserved_at_40[0x20]; + + u8 table_type[0x8]; + u8 reserved_at_68[0x10]; + u8 num_of_actions[0x8]; + + union mlx5_ifc_set_action_in_add_action_in_auto_bits actions[0]; +}; + +struct mlx5_ifc_dealloc_modify_header_context_out_bits { + u8 status[0x8]; + u8 reserved_at_8[0x18]; + + u8 syndrome[0x20]; + + u8 reserved_at_40[0x40]; +}; + +struct mlx5_ifc_dealloc_modify_header_context_in_bits { + u8 opcode[0x10]; + u8 reserved_at_10[0x10]; + + u8 reserved_at_20[0x10]; + u8 op_mod[0x10]; + + u8 modify_header_id[0x20]; + + u8 reserved_at_60[0x20]; +}; + struct mlx5_ifc_query_dct_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; -- cgit v1.2.3 From 9e234eeafbe17e85908584392f249f0b329b8e1b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 27 Mar 2017 10:51:41 -0700 Subject: blk-throttle: add a simple idle detection A cgroup gets assigned a low limit, but the cgroup could never dispatch enough IO to cross the low limit. In such case, the queue state machine will remain in LIMIT_LOW state and all other cgroups will be throttled according to low limit. This is unfair for other cgroups. We should treat the cgroup idle and upgrade the state machine to lower state. We also have a downgrade logic. If the state machine upgrades because of cgroup idle (real idle), the state machine will downgrade soon as the cgroup is below its low limit. This isn't what we want. A more complicated case is cgroup isn't idle when queue is in LIMIT_LOW. But when queue gets upgraded to lower state, other cgroups could dispatch more IO and this cgroup can't dispatch enough IO, so the cgroup is below its low limit and looks like idle (fake idle). In this case, the queue should downgrade soon. The key to determine if we should do downgrade is to detect if cgroup is truely idle. Unfortunately it's very hard to determine if a cgroup is real idle. This patch uses the 'think time check' idea from CFQ for the purpose. Please note, the idea doesn't work for all workloads. For example, a workload with io depth 8 has disk utilization 100%, hence think time is 0, eg, not idle. But the workload can run higher bandwidth with io depth 16. Compared to io depth 16, the io depth 8 workload is idle. We use the idea to roughly determine if a cgroup is idle. We treat a cgroup idle if its think time is above a threshold (by default 1ms for SSD and 100ms for HD). The idea is think time above the threshold will start to harm performance. HD is much slower so a longer think time is ok. The patch (and the latter patches) uses 'unsigned long' to track time. We convert 'ns' to 'us' with 'ns >> 10'. This is fast but loses precision, should not a big deal. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 270119a501fb..07a9e9607909 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -58,6 +58,9 @@ struct bio { */ struct io_context *bi_ioc; struct cgroup_subsys_state *bi_css; +#ifdef CONFIG_BLK_DEV_THROTTLING_LOW + void *bi_cg_private; +#endif #endif union { #if defined(CONFIG_BLK_DEV_INTEGRITY) -- cgit v1.2.3 From 88eeca495ba7de749ff253376ec6be19bb05368d Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 27 Mar 2017 15:19:41 -0700 Subject: block: track request size in blk_issue_stat Currently there is no way to know the request size when the request is finished. Next patch will need this info. We could add extra field to record the size, but blk_issue_stat has enough space to record it, so this patch just overloads blk_issue_stat. With this, we will have 49bits to track time, which still is very long time. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 07a9e9607909..3ad567347671 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -287,7 +287,7 @@ static inline bool blk_qc_t_is_internal(blk_qc_t cookie) } struct blk_issue_stat { - u64 time; + u64 stat; }; struct blk_rq_stat { -- cgit v1.2.3 From b9147dd1bae2b15d6931ecd42f8606c775fecbc9 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Mon, 27 Mar 2017 15:19:42 -0700 Subject: blk-throttle: add a mechanism to estimate IO latency User configures latency target, but the latency threshold for each request size isn't fixed. For a SSD, the IO latency highly depends on request size. To calculate latency threshold, we sample some data, eg, average latency for request size 4k, 8k, 16k, 32k .. 1M. The latency threshold of each request size will be the sample latency (I'll call it base latency) plus latency target. For example, the base latency for request size 4k is 80us and user configures latency target 60us. The 4k latency threshold will be 80 + 60 = 140us. To sample data, we calculate the order base 2 of rounded up IO sectors. If the IO size is bigger than 1M, it will be accounted as 1M. Since the calculation does round up, the base latency will be slightly smaller than actual value. Also if there isn't any IO dispatched for a specific IO size, we will use the base latency of smaller IO size for this IO size. But we shouldn't sample data at any time. The base latency is supposed to be latency where disk isn't congested, because we use latency threshold to schedule IOs between cgroups. If disk is congested, the latency is higher, using it for scheduling is meaningless. Hence we only do the sampling when block throttling is in the LOW limit, with assumption disk isn't congested in such state. If the assumption isn't true, eg, low limit is too high, calculated latency threshold will be higher. Hard disk is completely different. Latency depends on spindle seek instead of request size. Currently this feature is SSD only, we probably can use a fixed threshold like 4ms for hard disk though. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3ad567347671..67bcf8a5326e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -17,6 +17,10 @@ struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); +struct blk_issue_stat { + u64 stat; +}; + /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) @@ -60,6 +64,7 @@ struct bio { struct cgroup_subsys_state *bi_css; #ifdef CONFIG_BLK_DEV_THROTTLING_LOW void *bi_cg_private; + struct blk_issue_stat bi_issue_stat; #endif #endif union { @@ -286,10 +291,6 @@ static inline bool blk_qc_t_is_internal(blk_qc_t cookie) return (cookie & BLK_QC_T_INTERNAL) != 0; } -struct blk_issue_stat { - u64 stat; -}; - struct blk_rq_stat { s64 mean; u64 min; -- cgit v1.2.3 From e8bb4673596ea28fab287dbc417e8100d798cd40 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Mon, 27 Mar 2017 07:31:03 +0200 Subject: dmaengine: pl330: remove pdata based initialization This driver is now used only on platforms which support device tree, so it is safe to remove legacy platform data based initialization code. Signed-off-by: Marek Szyprowski Reviewed-by: Ulf Hansson Acked-by: Arnd Bergmann For plat-samsung: Acked-by: Krzysztof Kozlowski Signed-off-by: Vinod Koul --- include/linux/amba/pl330.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) delete mode 100644 include/linux/amba/pl330.h (limited to 'include/linux') diff --git a/include/linux/amba/pl330.h b/include/linux/amba/pl330.h deleted file mode 100644 index fe93758e8403..000000000000 --- a/include/linux/amba/pl330.h +++ /dev/null @@ -1,35 +0,0 @@ -/* linux/include/linux/amba/pl330.h - * - * Copyright (C) 2010 Samsung Electronics Co. Ltd. - * Jaswinder Singh - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - */ - -#ifndef __AMBA_PL330_H_ -#define __AMBA_PL330_H_ - -#include - -struct dma_pl330_platdata { - /* - * Number of valid peripherals connected to DMAC. - * This may be different from the value read from - * CR0, as the PL330 implementation might have 'holes' - * in the peri list or the peri could also be reached - * from another DMAC which the platform prefers. - */ - u8 nr_valid_peri; - /* Array of valid peripherals */ - u8 *peri_id; - /* Operational capabilities */ - dma_cap_mask_t cap_mask; - /* Bytes to allocate for MC buffer */ - unsigned mcbuf_sz; -}; - -extern bool pl330_filter(struct dma_chan *chan, void *param); -#endif /* __AMBA_PL330_H_ */ -- cgit v1.2.3 From dbc049eee73004db996cc8f63754f8dd5f86d0f7 Mon Sep 17 00:00:00 2001 From: Anup Patel Date: Wed, 15 Mar 2017 12:10:00 +0530 Subject: mailbox: Add driver for Broadcom FlexRM ring manager Some of the Broadcom iProc SoCs have FlexRM ring manager which provides a ring-based programming interface to various offload engines (e.g. RAID, Crypto, etc). This patch adds a common mailbox driver for Broadcom FlexRM ring manager which can be shared by various offload engine drivers (implemented as mailbox clients). Reviewed-by: Ray Jui Reviewed-by: Scott Branden Reviewed-by: Pramod KUMAR Signed-off-by: Anup Patel Signed-off-by: Jassi Brar --- include/linux/mailbox/brcm-message.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mailbox/brcm-message.h b/include/linux/mailbox/brcm-message.h index 6b55c938b401..c20b4843fc2d 100644 --- a/include/linux/mailbox/brcm-message.h +++ b/include/linux/mailbox/brcm-message.h @@ -16,6 +16,7 @@ enum brcm_message_type { BRCM_MESSAGE_UNKNOWN = 0, + BRCM_MESSAGE_BATCH, BRCM_MESSAGE_SPU, BRCM_MESSAGE_SBA, BRCM_MESSAGE_MAX, @@ -23,23 +24,28 @@ enum brcm_message_type { struct brcm_sba_command { u64 cmd; + u64 *cmd_dma; + dma_addr_t cmd_dma_addr; #define BRCM_SBA_CMD_TYPE_A BIT(0) #define BRCM_SBA_CMD_TYPE_B BIT(1) #define BRCM_SBA_CMD_TYPE_C BIT(2) #define BRCM_SBA_CMD_HAS_RESP BIT(3) #define BRCM_SBA_CMD_HAS_OUTPUT BIT(4) u64 flags; - dma_addr_t input; - size_t input_len; dma_addr_t resp; size_t resp_len; - dma_addr_t output; - size_t output_len; + dma_addr_t data; + size_t data_len; }; struct brcm_message { enum brcm_message_type type; union { + struct { + struct brcm_message *msgs; + unsigned int msgs_queued; + unsigned int msgs_count; + } batch; struct { struct scatterlist *src; struct scatterlist *dst; -- cgit v1.2.3 From db68ce10c4f0a27c1ff9fa0e789e5c41f8c4ea63 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Mar 2017 21:08:07 -0400 Subject: new helper: uaccess_kernel() Signed-off-by: Al Viro --- include/linux/uaccess.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index b786ca2419b4..9c3ae8706e9d 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -7,6 +7,8 @@ #define VERIFY_READ 0 #define VERIFY_WRITE 1 +#define uaccess_kernel() segment_eq(get_fs(), KERNEL_DS) + #include static __always_inline void pagefault_disabled_inc(void) -- cgit v1.2.3 From a2c680c6ce386e9ca6cdf362e8b01789126c9bf7 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 14 Mar 2017 11:18:03 -0400 Subject: firmware/qcom: add qcom_scm_restore_sec_cfg() Signed-off-by: Rob Clark Signed-off-by: Andy Gross --- include/linux/qcom_scm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index d32f6f1a5225..22017f5d17e0 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -40,6 +40,7 @@ extern int qcom_scm_pas_shutdown(u32 peripheral); extern void qcom_scm_cpu_power_down(u32 flags); extern u32 qcom_scm_get_version(void); extern int qcom_scm_set_remote_state(u32 state, u32 id); +extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); #else static inline int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) @@ -67,5 +68,6 @@ static inline void qcom_scm_cpu_power_down(u32 flags) {} static inline u32 qcom_scm_get_version(void) { return 0; } static inline u32 qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; } +static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return -ENODEV; } #endif #endif -- cgit v1.2.3 From b182cc4d597a6e73ff04ee1b7fb4f1a28f56ae3d Mon Sep 17 00:00:00 2001 From: Stanimir Varbanov Date: Tue, 14 Mar 2017 11:18:04 -0400 Subject: firmware: qcom_scm: add two scm calls for iommu secure page table Those two new SCM calls are needed from qcom-iommu driver in order to initialize secure iommu page table. Signed-off-by: Stanimir Varbanov Signed-off-by: Rob Clark Signed-off-by: Andy Gross --- include/linux/qcom_scm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 22017f5d17e0..e5380471c2cd 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -41,6 +41,8 @@ extern void qcom_scm_cpu_power_down(u32 flags); extern u32 qcom_scm_get_version(void); extern int qcom_scm_set_remote_state(u32 state, u32 id); extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); +extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); +extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); #else static inline int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) @@ -69,5 +71,7 @@ static inline u32 qcom_scm_get_version(void) { return 0; } static inline u32 qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; } static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return -ENODEV; } +static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { return -ENODEV; } +static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) { return -ENODEV; } #endif #endif -- cgit v1.2.3 From 7fc6b87a9ff537e7df32b1278118ce9c5bcd6788 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Thu, 9 Mar 2017 00:05:31 -0800 Subject: blkcg: allocate struct blkcg_gq outside request queue spinlock blkg_conf_prep() currently calls blkg_lookup_create() while holding request queue spinlock. This means allocating memory for struct blkcg_gq has to be made non-blocking. This causes occasional -ENOMEM failures in call paths like below: pcpu_alloc+0x68f/0x710 __alloc_percpu_gfp+0xd/0x10 __percpu_counter_init+0x55/0xc0 cfq_pd_alloc+0x3b2/0x4e0 blkg_alloc+0x187/0x230 blkg_create+0x489/0x670 blkg_lookup_create+0x9a/0x230 blkg_conf_prep+0x1fb/0x240 __cfqg_set_weight_device.isra.105+0x5c/0x180 cfq_set_weight_on_dfl+0x69/0xc0 cgroup_file_write+0x39/0x1c0 kernfs_fop_write+0x13f/0x1d0 __vfs_write+0x23/0x120 vfs_write+0xc2/0x1f0 SyS_write+0x44/0xb0 entry_SYSCALL_64_fastpath+0x18/0xad In the code path above, percpu allocator cannot call vmalloc() due to queue spinlock. A failure in this call path gives grief to tools which are trying to configure io weights. We see occasional failures happen shortly after reboots even when system is not under any memory pressure. Machines with a lot of cpus are more vulnerable to this condition. Update blkg_create() function to temporarily drop the rcu and queue locks when it is allowed by gfp mask. Suggested-by: Tejun Heo Signed-off-by: Tahsin Erdogan Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 01b62e7bac74..955903a8f6cb 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -172,7 +172,8 @@ extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q); + struct request_queue *q, gfp_t gfp, + const struct blkcg_policy *pol); int blkcg_init_queue(struct request_queue *q); void blkcg_drain_queue(struct request_queue *q); void blkcg_exit_queue(struct request_queue *q); @@ -694,7 +695,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); + blkg = blkg_lookup_create(blkcg, q, GFP_NOWAIT | __GFP_NOWARN, + NULL); if (IS_ERR(blkg)) blkg = NULL; spin_unlock_irq(q->queue_lock); -- cgit v1.2.3 From dfa672fbc0d9e83ff0dc1a75f1f5d0e59a30706b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 28 Mar 2017 10:52:16 +0300 Subject: ACPI / property: Add possiblity to retrieve parent firmware node Sometimes it is useful to be able to navigate firmware node hierarchy upwards toward parent nodes. ACPI device nodes are pretty much already supported because ACPICA provides acpi_get_parent(). ACPI data nodes, however, are all below the same parent ACPI device. Their hierarchy is created by "linking" each other using references in the value field. Add parent pointer to the parent data node while we create them so it is easy to navigate the hierarchy backwards. We use this parent pointer in a new function acpi_node_get_parent() that is able to extract parent of both ACPI firmware node types. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9b05886f9773..e74e8bdbb6af 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -999,6 +999,7 @@ int acpi_dev_prop_read(struct acpi_device *adev, const char *propname, struct fwnode_handle *acpi_get_next_subnode(struct device *dev, struct fwnode_handle *subnode); +struct fwnode_handle *acpi_node_get_parent(struct fwnode_handle *fwnode); struct acpi_probe_entry; typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, @@ -1121,6 +1122,12 @@ static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev, return NULL; } +static inline struct fwnode_handle * +acpi_node_get_parent(struct fwnode_handle *fwnode) +{ + return NULL; +} + #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \ static const void * __acpi_table_##name[] \ __attribute__((unused)) \ -- cgit v1.2.3 From afaf26fd8458be29949ae5a52c65a464a1b0cbb6 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 28 Mar 2017 10:52:17 +0300 Subject: device property: Add fwnode_get_parent() Now that ACPI has support for returning parent firmware node for both types of nodes we can expose this to others as well. This adds a new function fwnode_get_parent() that can be used for DT and ACPI nodes to retrieve the parent firmware node. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 64e3a9c6d95f..ab0a8160cef6 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -70,6 +70,8 @@ int fwnode_property_read_string(struct fwnode_handle *fwnode, int fwnode_property_match_string(struct fwnode_handle *fwnode, const char *propname, const char *string); +struct fwnode_handle *fwnode_get_parent(struct fwnode_handle *fwnode); + struct fwnode_handle *device_get_next_child_node(struct device *dev, struct fwnode_handle *child); -- cgit v1.2.3 From 34055190b19d7c634caf738c8ca195cad06550cd Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 28 Mar 2017 10:52:18 +0300 Subject: ACPI / property: Add fwnode_get_next_child_node() The ACPI _DSD hierarchical data extension makes it possible to have hierarchies deeper than one level in similar way than DT allows. These "subsubnodes" have not been accessible because device property implementation only provides device_get_next_child_node() that is limited to direct descendants of a device. We need this ability in order support things like remote endpoints currently supported in DT with of_graph_* APIs. Modify acpi_get_next_subnode() to accept fwnode handle instead and update callers accordingly. Also add a new function fwnode_get_next_child_node() that works directly with fwnodes and modify device_get_next_child_node() to call it directly. While there add a macro fwnode_for_each_child_node() analogous to the current device_for_each_child_node() but it works with fwnodes instead of devices. Link: http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.pdf Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 8 ++++---- include/linux/property.h | 6 ++++++ 2 files changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index e74e8bdbb6af..4eb1f5941ede 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -997,8 +997,8 @@ int acpi_node_prop_read(struct fwnode_handle *fwnode, const char *propname, int acpi_dev_prop_read(struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval); -struct fwnode_handle *acpi_get_next_subnode(struct device *dev, - struct fwnode_handle *subnode); +struct fwnode_handle *acpi_get_next_subnode(struct fwnode_handle *fwnode, + struct fwnode_handle *child); struct fwnode_handle *acpi_node_get_parent(struct fwnode_handle *fwnode); struct acpi_probe_entry; @@ -1116,8 +1116,8 @@ static inline int acpi_dev_prop_read(struct acpi_device *adev, return -ENXIO; } -static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev, - struct fwnode_handle *subnode) +static inline struct fwnode_handle * +acpi_get_next_subnode(struct fwnode_handle *fwnode, struct fwnode_handle *child) { return NULL; } diff --git a/include/linux/property.h b/include/linux/property.h index ab0a8160cef6..f4786a8655f1 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -71,6 +71,12 @@ int fwnode_property_match_string(struct fwnode_handle *fwnode, const char *propname, const char *string); struct fwnode_handle *fwnode_get_parent(struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_get_next_child_node(struct fwnode_handle *fwnode, + struct fwnode_handle *child); + +#define fwnode_for_each_child_node(fwnode, child) \ + for (child = fwnode_get_next_child_node(fwnode, NULL); child; \ + child = fwnode_get_next_child_node(fwnode, child)) struct fwnode_handle *device_get_next_child_node(struct device *dev, struct fwnode_handle *child); -- cgit v1.2.3 From 21ea73f54c6d77f35381c79870160496c9e78b60 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 28 Mar 2017 10:52:19 +0300 Subject: device property: Add fwnode_get_named_child_node() Since now we have means to enumerate all children of any fwnode even in ACPI we can implement fwnode_get_named_child_node(). This is similar than device_get_named_child_node() with the exception that it can be called to any fwnode handle. Make device_get_named_child_node() call directly this new function. This is useful in cases where we need to be able to find child nodes which are not direct descendants of the parent device. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index f4786a8655f1..514b19559fbe 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -85,6 +85,8 @@ struct fwnode_handle *device_get_next_child_node(struct device *dev, for (child = device_get_next_child_node(dev, NULL); child; \ child = device_get_next_child_node(dev, child)) +struct fwnode_handle *fwnode_get_named_child_node(struct fwnode_handle *fwnode, + const char *childname); struct fwnode_handle *device_get_named_child_node(struct device *dev, const char *childname); -- cgit v1.2.3 From 79389a83bc3888a900191e3990cda5c76f2ca1ec Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 28 Mar 2017 10:52:20 +0300 Subject: ACPI / property: Add support for remote endpoints DT has had concept of remote endpoints for some time already. It makes possible to reference another firmware node through a property called remote-endpoint. This is already used by some subsystems like v4l2 for parsing hardware properties related to camera. This patch adds ACPI support for remote endpoints utilizing _DSD hierarchical data extensions. Signed-off-by: Mika Westerberg Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4eb1f5941ede..add8a96e1977 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1001,6 +1001,13 @@ struct fwnode_handle *acpi_get_next_subnode(struct fwnode_handle *fwnode, struct fwnode_handle *child); struct fwnode_handle *acpi_node_get_parent(struct fwnode_handle *fwnode); +struct fwnode_handle *acpi_graph_get_next_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle *prev); +int acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle **remote, + struct fwnode_handle **port, + struct fwnode_handle **endpoint); + struct acpi_probe_entry; typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, struct acpi_probe_entry *); @@ -1128,6 +1135,22 @@ acpi_node_get_parent(struct fwnode_handle *fwnode) return NULL; } +static inline struct fwnode_handle * +acpi_graph_get_next_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle *prev) +{ + return ERR_PTR(-ENXIO); +} + +static inline int +acpi_graph_get_remote_endpoint(struct fwnode_handle *fwnode, + struct fwnode_handle **remote, + struct fwnode_handle **port, + struct fwnode_handle **endpoint) +{ + return -ENXIO; +} + #define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \ static const void * __acpi_table_##name[] \ __attribute__((unused)) \ -- cgit v1.2.3 From 07bb80d40b0e6a43aafb422296d33baed255569a Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Tue, 28 Mar 2017 10:52:21 +0300 Subject: device property: Add support for remote endpoints This follows DT implementation of of_graph_* APIs but we call them fwnode_graph_* instead. For DT nodes the existing of_graph_* implementation will be used. For ACPI we use the new ACPI graph implementation instead. Signed-off-by: Mika Westerberg Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 514b19559fbe..8d7809c2c42d 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -268,4 +268,13 @@ int device_get_phy_mode(struct device *dev); void *device_get_mac_address(struct device *dev, char *addr, int alen); +struct fwnode_handle *fwnode_graph_get_next_endpoint( + struct fwnode_handle *fwnode, struct fwnode_handle *prev); +struct fwnode_handle *fwnode_graph_get_remote_port_parent( + struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_graph_get_remote_port( + struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_graph_get_remote_endpoint( + struct fwnode_handle *fwnode); + #endif /* _LINUX_PROPERTY_H_ */ -- cgit v1.2.3 From e7887c284969a23a98fe1aff2f631c5ccdcd1757 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 28 Mar 2017 10:52:22 +0300 Subject: device property: Add fwnode_handle_get() fwnode_handle_get() is used to obtain a reference to a fwnode_handle container. In this case this is OF specific struct device_node. This complements fwnode_handle_put() which is already implemented. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 8d7809c2c42d..0ae7d209f6c2 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -90,6 +90,7 @@ struct fwnode_handle *fwnode_get_named_child_node(struct fwnode_handle *fwnode, struct fwnode_handle *device_get_named_child_node(struct device *dev, const char *childname); +void fwnode_handle_get(struct fwnode_handle *fwnode); void fwnode_handle_put(struct fwnode_handle *fwnode); unsigned int device_get_child_node_count(struct device *dev); -- cgit v1.2.3 From 67831837e0b192fe0b8ee8b5e502d95ad2c497c0 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 28 Mar 2017 10:52:23 +0300 Subject: of: Add of_fwnode_handle() to convert device nodes to fwnode_handle of_fwnode_handle() returns a struct fwnode_handle of the struct device_node. This may be used on the fwnode property API. Use a macro instead of a function in order to support const and non-const arguments. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/of.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 21e6323de0f3..e5d4225fda35 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -159,6 +159,8 @@ static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) container_of(fwnode, struct device_node, fwnode) : NULL; } +#define of_fwnode_handle(node) (&(node)->fwnode) + static inline bool of_have_populated_dt(void) { return of_root != NULL; @@ -602,6 +604,8 @@ static inline struct device_node *of_find_node_with_property( return NULL; } +#define of_fwnode_handle(node) NULL + static inline bool of_have_populated_dt(void) { return false; -- cgit v1.2.3 From e44bb0cbdc88686c21e2175a990b40bf6db5d005 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 28 Mar 2017 10:52:24 +0300 Subject: device property: Make dev_fwnode() public The function to obtain a fwnode related to a struct device is useful for drivers that use the fwnode property API: it allows not being aware of the underlying firmware implementation. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 0ae7d209f6c2..6e20a12a2eec 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -33,6 +33,8 @@ enum dev_dma_attr { DEV_DMA_COHERENT, }; +struct fwnode_handle *dev_fwnode(struct device *dev); + bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, u8 *val, size_t nval); -- cgit v1.2.3 From 2bd5452d46df46d99b869b59a1532647e2981d75 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 28 Mar 2017 10:52:25 +0300 Subject: device property: Add support for fwnode endpoints Similar to OF endpoints, endpoint type nodes can be also supported on ACPI. In order to make it possible for drivers to ignore the matter, add a type for fwnode_endpoint and a function to parse them. On ACPI, find the child node index instead of relying on the "endpoint" property. Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 12 ++++++++++++ include/linux/property.h | 3 +++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 8bd28ce6d76e..3dff2398a5f0 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -27,4 +27,16 @@ struct fwnode_handle { struct fwnode_handle *secondary; }; +/** + * struct fwnode_endpoint - Fwnode graph endpoint + * @port: Port number + * @id: Endpoint id + * @local_fwnode: reference to the related fwnode + */ +struct fwnode_endpoint { + unsigned int port; + unsigned int id; + const struct fwnode_handle *local_fwnode; +}; + #endif diff --git a/include/linux/property.h b/include/linux/property.h index 6e20a12a2eec..3a4e43599e01 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -280,4 +280,7 @@ struct fwnode_handle *fwnode_graph_get_remote_port( struct fwnode_handle *fwnode_graph_get_remote_endpoint( struct fwnode_handle *fwnode); +int fwnode_graph_parse_endpoint(struct fwnode_handle *fwnode, + struct fwnode_endpoint *endpoint); + #endif /* _LINUX_PROPERTY_H_ */ -- cgit v1.2.3 From 233872585de1cf26c3c3da5859ffb3aba45bd486 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 28 Mar 2017 10:52:26 +0300 Subject: device property: Add fwnode_get_next_parent() In order to differentiate the functionality between dropping a reference to the node (or not) for the benefit of OF, introduce fwnode_get_next_parent(). Signed-off-by: Sakari Ailus Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 3a4e43599e01..2f482616a2f2 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -73,6 +73,7 @@ int fwnode_property_match_string(struct fwnode_handle *fwnode, const char *propname, const char *string); struct fwnode_handle *fwnode_get_parent(struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_get_next_parent(struct fwnode_handle *fwnode); struct fwnode_handle *fwnode_get_next_child_node(struct fwnode_handle *fwnode, struct fwnode_handle *child); -- cgit v1.2.3 From ffaa42e8a40b7f1041e36b022cd28b7c45e2b564 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 20 Mar 2017 11:19:21 +0100 Subject: PM / Domains: Enable users of genpd to specify always on PM domains The current way to implement an always on PM domain consists of returning -EBUSY from the ->power_off() callback. This is a bit different compared to using the always on genpd governor, which prevents the PM domain from being powered off via runtime suspend, but not via system suspend. The approach to return -EBUSY from the ->power_off() callback to support always on PM domains in genpd is suboptimal. That is because it requires genpd to follow the regular execution path of the power off sequence, which ends by invoking the ->power_off() callback. To enable genpd to early abort the power off sequence for always on PM domains, it needs static information about these configurations. Therefore let's add a new genpd configuration flag, GENPD_FLAG_ALWAYS_ON. Users of the new GENPD_FLAG_ALWAYS_ON flag, are by genpd required to make sure the PM domain is powered on before calling pm_genpd_init(). Moreover, users don't need to implement the ->power_off() callback, as genpd doesn't ever invoke it. Signed-off-by: Ulf Hansson Reviewed-by: Viresh Kumar Reviewed-by: Geert Uytterhoeven Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 5339ed5bd6f9..9b6abe632587 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -20,6 +20,7 @@ /* Defines used for the flags field in the struct generic_pm_domain */ #define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ #define GENPD_FLAG_IRQ_SAFE (1U << 1) /* PM domain operates in atomic */ +#define GENPD_FLAG_ALWAYS_ON (1U << 2) /* PM domain is always powered on */ enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ -- cgit v1.2.3 From d597580d373774b1bdab84b3d26ff0b55162b916 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 20 Mar 2017 21:56:06 -0400 Subject: generic ...copy_..._user primitives provide raw_copy_..._user() and select ARCH_HAS_RAW_COPY_USER to use those. Signed-off-by: Al Viro --- include/linux/uaccess.h | 187 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 187 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 9c3ae8706e9d..5f76bc995d96 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -3,6 +3,7 @@ #include #include +#include #define VERIFY_READ 0 #define VERIFY_WRITE 1 @@ -11,6 +12,192 @@ #include +#ifdef CONFIG_ARCH_HAS_RAW_COPY_USER +/* + * Architectures should provide two primitives (raw_copy_{to,from}_user()) + * select ARCH_HAS_RAW_COPY_FROM_USER and get rid of their private instances + * of copy_{to,from}_user() and __copy_{to,from}_user{,_inatomic}(). Once + * all of them switch, this part of linux/uaccess.h will become unconditional. + * + * raw_copy_{to,from}_user(to, from, size) should copy up to size bytes and + * return the amount left to copy. They should assume that access_ok() has + * already been checked (and succeeded); they should *not* zero-pad anything. + * No KASAN or object size checks either - those belong here. + * + * Both of these functions should attempt to copy size bytes starting at from + * into the area starting at to. They must not fetch or store anything + * outside of those areas. Return value must be between 0 (everything + * copied successfully) and size (nothing copied). + * + * If raw_copy_{to,from}_user(to, from, size) returns N, size - N bytes starting + * at to must become equal to the bytes fetched from the corresponding area + * starting at from. All data past to + size - N must be left unmodified. + * + * If copying succeeds, the return value must be 0. If some data cannot be + * fetched, it is permitted to copy less than had been fetched; the only + * hard requirement is that not storing anything at all (i.e. returning size) + * should happen only when nothing could be copied. In other words, you don't + * have to squeeze as much as possible - it is allowed, but not necessary. + * + * For raw_copy_from_user() to always points to kernel memory and no faults + * on store should happen. Interpretation of from is affected by set_fs(). + * For raw_copy_to_user() it's the other way round. + * + * Both can be inlined - it's up to architectures whether it wants to bother + * with that. They should not be used directly; they are used to implement + * the 6 functions (copy_{to,from}_user(), __copy_{to,from}_user_inatomic()) + * that are used instead. Out of those, __... ones are inlined. Plain + * copy_{to,from}_user() might or might not be inlined. If you want them + * inlined, have asm/uaccess.h define INLINE_COPY_{TO,FROM}_USER. + * + * NOTE: only copy_from_user() zero-pads the destination in case of short copy. + * Neither __copy_from_user() nor __copy_from_user_inatomic() zero anything + * at all; their callers absolutely must check the return value. + * + * Biarch ones should also provide raw_copy_in_user() - similar to the above, + * but both source and destination are __user pointers (affected by set_fs() + * as usual) and both source and destination can trigger faults. + */ + +static __always_inline unsigned long +__copy_from_user_inatomic(void *to, const void __user *from, unsigned long n) +{ + kasan_check_write(to, n); + check_object_size(to, n, false); + return raw_copy_from_user(to, from, n); +} + +static __always_inline unsigned long +__copy_from_user(void *to, const void __user *from, unsigned long n) +{ + might_fault(); + kasan_check_write(to, n); + check_object_size(to, n, false); + return raw_copy_from_user(to, from, n); +} + +/** + * __copy_to_user_inatomic: - Copy a block of data into user space, with less checking. + * @to: Destination address, in user space. + * @from: Source address, in kernel space. + * @n: Number of bytes to copy. + * + * Context: User context only. + * + * Copy data from kernel space to user space. Caller must check + * the specified block with access_ok() before calling this function. + * The caller should also make sure he pins the user space address + * so that we don't result in page fault and sleep. + */ +static __always_inline unsigned long +__copy_to_user_inatomic(void __user *to, const void *from, unsigned long n) +{ + kasan_check_read(from, n); + check_object_size(from, n, true); + return raw_copy_to_user(to, from, n); +} + +static __always_inline unsigned long +__copy_to_user(void __user *to, const void *from, unsigned long n) +{ + might_fault(); + kasan_check_read(from, n); + check_object_size(from, n, true); + return raw_copy_to_user(to, from, n); +} + +#ifdef INLINE_COPY_FROM_USER +static inline unsigned long +_copy_from_user(void *to, const void __user *from, unsigned long n) +{ + unsigned long res = n; + if (likely(access_ok(VERIFY_READ, from, n))) + res = raw_copy_from_user(to, from, n); + if (unlikely(res)) + memset(to + (n - res), 0, res); + return res; +} +#else +extern unsigned long +_copy_from_user(void *, const void __user *, unsigned long); +#endif + +#ifdef INLINE_COPY_TO_USER +static inline unsigned long +_copy_to_user(void __user *to, const void *from, unsigned long n) +{ + if (access_ok(VERIFY_WRITE, to, n)) + n = raw_copy_to_user(to, from, n); + return n; +} +#else +extern unsigned long +_copy_to_user(void __user *, const void *, unsigned long); +#endif + +extern void __compiletime_error("usercopy buffer size is too small") +__bad_copy_user(void); + +static inline void copy_user_overflow(int size, unsigned long count) +{ + WARN(1, "Buffer overflow detected (%d < %lu)!\n", size, count); +} + +static __always_inline unsigned long __must_check +copy_from_user(void *to, const void __user *from, unsigned long n) +{ + int sz = __compiletime_object_size(to); + + might_fault(); + kasan_check_write(to, n); + + if (likely(sz < 0 || sz >= n)) { + check_object_size(to, n, false); + n = _copy_from_user(to, from, n); + } else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); + else + __bad_copy_user(); + + return n; +} + +static __always_inline unsigned long __must_check +copy_to_user(void __user *to, const void *from, unsigned long n) +{ + int sz = __compiletime_object_size(from); + + kasan_check_read(from, n); + might_fault(); + + if (likely(sz < 0 || sz >= n)) { + check_object_size(from, n, true); + n = _copy_to_user(to, from, n); + } else if (!__builtin_constant_p(n)) + copy_user_overflow(sz, n); + else + __bad_copy_user(); + + return n; +} +#ifdef CONFIG_COMPAT +static __always_inline unsigned long __must_check +__copy_in_user(void __user *to, const void *from, unsigned long n) +{ + might_fault(); + return raw_copy_in_user(to, from, n); +} +static __always_inline unsigned long __must_check +copy_in_user(void __user *to, const void *from, unsigned long n) +{ + might_fault(); + if (access_ok(VERIFY_WRITE, to, n) && access_ok(VERIFY_READ, from, n)) + n = raw_copy_in_user(to, from, n); + return n; +} +#endif +#endif + static __always_inline void pagefault_disabled_inc(void) { current->pagefault_disabled++; -- cgit v1.2.3 From 3f763453e6f27d82fa0ac58f8e1ac4094c1fb1f8 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 25 Mar 2017 18:47:28 -0400 Subject: kill __copy_from_user_nocache() Signed-off-by: Al Viro --- include/linux/uaccess.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 5f76bc995d96..7fc2104b88bc 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -261,12 +261,6 @@ static inline unsigned long __copy_from_user_inatomic_nocache(void *to, return __copy_from_user_inatomic(to, from, n); } -static inline unsigned long __copy_from_user_nocache(void *to, - const void __user *from, unsigned long n) -{ - return __copy_from_user(to, from, n); -} - #endif /* ARCH_HAS_NOCACHE_UACCESS */ /* -- cgit v1.2.3 From 5052de8deff5619a9b7071f00084fd0264b58e17 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 27 Mar 2017 22:26:33 -0700 Subject: soc: qcom: smd: Transition client drivers from smd to rpmsg By moving these client drivers to use RPMSG instead of the direct SMD API we can reuse them ontop of the newly added GLINK wire-protocol support found in the 820 and 835 Qualcomm platforms. As the new (RPMSG-based) and old SMD implementations are mutually exclusive we have to change all client drivers in one commit, to make sure we have a working system before and after this transition. Acked-by: Andy Gross Acked-by: Kalle Valo Acked-by: Marcel Holtmann Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- include/linux/soc/qcom/wcnss_ctrl.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/wcnss_ctrl.h b/include/linux/soc/qcom/wcnss_ctrl.h index eab64976a73b..a4dd4d7c711d 100644 --- a/include/linux/soc/qcom/wcnss_ctrl.h +++ b/include/linux/soc/qcom/wcnss_ctrl.h @@ -1,16 +1,19 @@ #ifndef __WCNSS_CTRL_H__ #define __WCNSS_CTRL_H__ -#include +#include #if IS_ENABLED(CONFIG_QCOM_WCNSS_CTRL) -struct qcom_smd_channel *qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb); +struct rpmsg_endpoint *qcom_wcnss_open_channel(void *wcnss, const char *name, + rpmsg_rx_cb_t cb, void *priv); #else -static inline struct qcom_smd_channel* -qcom_wcnss_open_channel(void *wcnss, const char *name, qcom_smd_cb_t cb) +static struct rpmsg_endpoint *qcom_wcnss_open_channel(void *wcnss, + const char *name, + rpmsg_rx_cb_t cb, + void *priv) { WARN_ON(1); return ERR_PTR(-ENXIO); -- cgit v1.2.3 From 395a48053af6c5e0f0217b610dcb7225ea3e3e42 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 27 Mar 2017 22:26:34 -0700 Subject: soc: qcom: smd: Remove standalone driver Remove the standalone SMD implementation as we have transitioned the client drivers to use the RPMSG based one. Also remove all dependencies on QCOM_SMD from Kconfig files, in order to keep them selectable in the absence of the removed symbol. Acked-by: Andy Gross Signed-off-by: Bjorn Andersson Signed-off-by: David S. Miller --- include/linux/rpmsg/qcom_smd.h | 2 +- include/linux/soc/qcom/smd.h | 139 ----------------------------------------- 2 files changed, 1 insertion(+), 140 deletions(-) delete mode 100644 include/linux/soc/qcom/smd.h (limited to 'include/linux') diff --git a/include/linux/rpmsg/qcom_smd.h b/include/linux/rpmsg/qcom_smd.h index 8ec8b6439b25..f27917e0a101 100644 --- a/include/linux/rpmsg/qcom_smd.h +++ b/include/linux/rpmsg/qcom_smd.h @@ -6,7 +6,7 @@ struct qcom_smd_edge; -#if IS_ENABLED(CONFIG_RPMSG_QCOM_SMD) || IS_ENABLED(CONFIG_QCOM_SMD) +#if IS_ENABLED(CONFIG_RPMSG_QCOM_SMD) struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent, struct device_node *node); diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h deleted file mode 100644 index f148e0ffbec7..000000000000 --- a/include/linux/soc/qcom/smd.h +++ /dev/null @@ -1,139 +0,0 @@ -#ifndef __QCOM_SMD_H__ -#define __QCOM_SMD_H__ - -#include -#include - -struct qcom_smd; -struct qcom_smd_channel; -struct qcom_smd_lookup; - -/** - * struct qcom_smd_id - struct used for matching a smd device - * @name: name of the channel - */ -struct qcom_smd_id { - char name[20]; -}; - -/** - * struct qcom_smd_device - smd device struct - * @dev: the device struct - * @channel: handle to the smd channel for this device - */ -struct qcom_smd_device { - struct device dev; - struct qcom_smd_channel *channel; -}; - -typedef int (*qcom_smd_cb_t)(struct qcom_smd_channel *, const void *, size_t); - -/** - * struct qcom_smd_driver - smd driver struct - * @driver: underlying device driver - * @smd_match_table: static channel match table - * @probe: invoked when the smd channel is found - * @remove: invoked when the smd channel is closed - * @callback: invoked when an inbound message is received on the channel, - * should return 0 on success or -EBUSY if the data cannot be - * consumed at this time - */ -struct qcom_smd_driver { - struct device_driver driver; - const struct qcom_smd_id *smd_match_table; - - int (*probe)(struct qcom_smd_device *dev); - void (*remove)(struct qcom_smd_device *dev); - qcom_smd_cb_t callback; -}; - -#if IS_ENABLED(CONFIG_QCOM_SMD) - -int qcom_smd_driver_register(struct qcom_smd_driver *drv); -void qcom_smd_driver_unregister(struct qcom_smd_driver *drv); - -struct qcom_smd_channel *qcom_smd_open_channel(struct qcom_smd_channel *channel, - const char *name, - qcom_smd_cb_t cb); -void qcom_smd_close_channel(struct qcom_smd_channel *channel); -void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel); -void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data); -int qcom_smd_send(struct qcom_smd_channel *channel, const void *data, int len); - - -struct qcom_smd_edge *qcom_smd_register_edge(struct device *parent, - struct device_node *node); -int qcom_smd_unregister_edge(struct qcom_smd_edge *edge); - -#else - -static inline int qcom_smd_driver_register(struct qcom_smd_driver *drv) -{ - return -ENXIO; -} - -static inline void qcom_smd_driver_unregister(struct qcom_smd_driver *drv) -{ - /* This shouldn't be possible */ - WARN_ON(1); -} - -static inline struct qcom_smd_channel * -qcom_smd_open_channel(struct qcom_smd_channel *channel, - const char *name, - qcom_smd_cb_t cb) -{ - /* This shouldn't be possible */ - WARN_ON(1); - return NULL; -} - -static inline void qcom_smd_close_channel(struct qcom_smd_channel *channel) -{ - /* This shouldn't be possible */ - WARN_ON(1); -} - -static inline void *qcom_smd_get_drvdata(struct qcom_smd_channel *channel) -{ - /* This shouldn't be possible */ - WARN_ON(1); - return NULL; -} - -static inline void qcom_smd_set_drvdata(struct qcom_smd_channel *channel, void *data) -{ - /* This shouldn't be possible */ - WARN_ON(1); -} - -static inline int qcom_smd_send(struct qcom_smd_channel *channel, - const void *data, int len) -{ - /* This shouldn't be possible */ - WARN_ON(1); - return -ENXIO; -} - -static inline struct qcom_smd_edge * -qcom_smd_register_edge(struct device *parent, - struct device_node *node) -{ - return ERR_PTR(-ENXIO); -} - -static inline int qcom_smd_unregister_edge(struct qcom_smd_edge *edge) -{ - /* This shouldn't be possible */ - WARN_ON(1); - return -ENXIO; -} - -#endif - -#define module_qcom_smd_driver(__smd_driver) \ - module_driver(__smd_driver, qcom_smd_driver_register, \ - qcom_smd_driver_unregister) - - -#endif -- cgit v1.2.3 From 3b0228656dcb07a1c9fc81e8516475c2d7c4300e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 28 Mar 2017 14:28:02 -0700 Subject: net: devinet: Refactor inet_netconf_notify_devconf to take event Refactor inet_netconf_notify_devconf to take the event as an input arg. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index ee971f335a8b..a2e9d6ea1349 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -153,8 +153,8 @@ struct in_ifaddr { int register_inetaddr_notifier(struct notifier_block *nb); int unregister_inetaddr_notifier(struct notifier_block *nb); -void inet_netconf_notify_devconf(struct net *net, int type, int ifindex, - struct ipv4_devconf *devconf); +void inet_netconf_notify_devconf(struct net *net, int event, int type, + int ifindex, struct ipv4_devconf *devconf); struct net_device *__ip_dev_find(struct net *net, __be32 addr, bool devref); static inline struct net_device *ip_dev_find(struct net *net, __be32 addr) -- cgit v1.2.3 From c6e970a04bdceb7ef1fdbac6be3bd4cd0a0a02bd Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 28 Mar 2017 23:45:06 +0200 Subject: net: break include loop netdevice.h, dsa.h, devlink.h There is an include loop between netdevice.h, dsa.h, devlink.h because of NETDEV_ALIGN, making it impossible to use devlink structures in dsa.h. Break this loop by taking dsa.h out of netdevice.h, add a forward declaration of dsa_switch_tree and netdev_set_default_ethtool_ops() function, which is what netdevice.h requires. No longer having dsa.h in netdevice.h means the includes in dsa.h no longer get included. This breaks a few other files which depend on these includes. Add these directly in the affected file. Signed-off-by: Andrew Lunn Reviewed-by: Florian Fainelli Signed-off-by: David S. Miller --- include/linux/netdevice.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b7365b587818..cc07c3be2705 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -41,7 +41,6 @@ #include #include -#include #ifdef CONFIG_DCB #include #endif @@ -57,6 +56,8 @@ struct netpoll_info; struct device; struct phy_device; +struct dsa_switch_tree; + /* 802.11 specific */ struct wireless_dev; /* 802.15.4 specific */ @@ -2004,15 +2005,6 @@ void dev_net_set(struct net_device *dev, struct net *net) write_pnet(&dev->nd_net, net); } -static inline bool netdev_uses_dsa(struct net_device *dev) -{ -#if IS_ENABLED(CONFIG_NET_DSA) - if (dev->dsa_ptr != NULL) - return dsa_uses_tagged_protocol(dev->dsa_ptr); -#endif - return false; -} - /** * netdev_priv - access network device private data * @dev: network device -- cgit v1.2.3 From ed77d6bcafd75d247cf3c6ad685aa221cda1b8ba Mon Sep 17 00:00:00 2001 From: Emiliano Ingrassia Date: Tue, 28 Mar 2017 09:49:29 +0200 Subject: spi: dynamycally allocated message initialization Invoke the proper function while initializing a dynamically allocated spi_message to avoid NULL pointer dereference during resources deallocation. Signed-off-by: Emiliano Ingrassia Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 75c6bd0ac605..3b0070695375 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -891,7 +891,7 @@ static inline struct spi_message *spi_message_alloc(unsigned ntrans, gfp_t flags unsigned i; struct spi_transfer *t = (struct spi_transfer *)(m + 1); - INIT_LIST_HEAD(&m->transfers); + spi_message_init_no_memset(m); for (i = 0; i < ntrans; i++, t++) spi_message_add_tail(t, m); } -- cgit v1.2.3 From fd086045559d90cd7854818b4c60a7119eda6231 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 27 Mar 2017 16:54:12 -0700 Subject: regulator: core: Limit propagation of parent voltage count and list Commit 26988efe11b1 ("regulator: core: Allow to get voltage count and list from parent") introduces the propagation of the parent voltage count and list for regulators that don't provide this information themselves. The goal is to support simple switch regulators, however as a side effect normal continuous regulators can leak details of their supplies and provide consumers with inconsistent information. Limit the propagation of the voltage count and list to switch regulators. Fixes: 26988efe11b1 ("regulator: core: Allow to get voltage count and list from parent") Signed-off-by: Matthias Kaehlcke Reviewed-by: Javier Martinez Canillas Tested-by: Javier Martinez Canillas Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index dac8e7b16bc6..4cb1c9be6073 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -429,6 +429,8 @@ struct regulator_dev { struct regulator_enable_gpio *ena_pin; unsigned int ena_gpio_state:1; + unsigned int is_switch:1; + /* time when this regulator was disabled last time */ unsigned long last_off_jiffy; }; -- cgit v1.2.3 From 1671d522cdd9933dee7dddfcf9f62c561283824a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 27 Mar 2017 20:06:57 +0800 Subject: block: rename blk_mq_freeze_queue_start() As the .q_usage_counter is used by both legacy and mq path, we need to block new I/O if queue becomes dead in blk_queue_enter(). So rename it and we can use this function in both paths. Reviewed-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Ming Lei Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5b3e201c8d4f..ea2e9dcd3aef 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -243,7 +243,7 @@ void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_unfreeze_queue(struct request_queue *q); -void blk_mq_freeze_queue_start(struct request_queue *q); +void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); -- cgit v1.2.3 From 334335d2f7a077a5ff561d86b0ad43bedd83ca05 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Tue, 28 Mar 2017 16:12:15 -0700 Subject: block: warn if sharing request queue across gendisks Now that the remaining drivers have been converted to one request queue per gendisk, let's warn if a request queue gets registered more than once. This will catch future drivers which might do it inadvertently or any old drivers that I may have missed. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1a7dc42a8918..a2dc6b390d48 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -617,6 +617,7 @@ struct request_queue { #define QUEUE_FLAG_STATS 27 /* track rq completion times */ #define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */ #define QUEUE_FLAG_POLL_STATS 29 /* collecting stats for hybrid polling */ +#define QUEUE_FLAG_REGISTERED 30 /* queue has been registered to a disk */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ -- cgit v1.2.3 From d708f0d5026f48081debdd1c5b0a5636455a9589 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 29 Mar 2017 11:25:48 -0600 Subject: Revert "blkcg: allocate struct blkcg_gq outside request queue spinlock" I inadvertently applied the v5 version of this patch, whereas the agreed upon version was v5. Revert this one so we can apply the right one. This reverts commit 7fc6b87a9ff537e7df32b1278118ce9c5bcd6788. --- include/linux/blk-cgroup.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 955903a8f6cb..01b62e7bac74 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -172,8 +172,7 @@ extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, - struct request_queue *q, gfp_t gfp, - const struct blkcg_policy *pol); + struct request_queue *q); int blkcg_init_queue(struct request_queue *q); void blkcg_drain_queue(struct request_queue *q); void blkcg_exit_queue(struct request_queue *q); @@ -695,8 +694,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { spin_lock_irq(q->queue_lock); - blkg = blkg_lookup_create(blkcg, q, GFP_NOWAIT | __GFP_NOWARN, - NULL); + blkg = blkg_lookup_create(blkcg, q); if (IS_ERR(blkg)) blkg = NULL; spin_unlock_irq(q->queue_lock); -- cgit v1.2.3 From b2e33536c010513e07e92ca914fcc11108d5eef5 Mon Sep 17 00:00:00 2001 From: sayli karnik Date: Thu, 30 Mar 2017 02:01:16 +0530 Subject: Documentation: Add flexible-arrays.rst to the documentation tree Add flexible-arrays.rst to Documentation/core-api. Add kernel-doc comments to allow referencing. Signed-off-by: sayli karnik Signed-off-by: Jonathan Corbet --- include/linux/flex_array.h | 67 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'include/linux') diff --git a/include/linux/flex_array.h b/include/linux/flex_array.h index b6efb0c64408..11366b3ff0b4 100644 --- a/include/linux/flex_array.h +++ b/include/linux/flex_array.h @@ -61,16 +61,83 @@ struct flex_array { FLEX_ARRAY_ELEMENTS_PER_PART(__element_size)); \ } +/** + * flex_array_alloc() - Creates a flexible array. + * @element_size: individual object size. + * @total: maximum number of objects which can be stored. + * @flags: GFP flags + * + * Return: Returns an object of structure flex_array. + */ struct flex_array *flex_array_alloc(int element_size, unsigned int total, gfp_t flags); + +/** + * flex_array_prealloc() - Ensures that memory for the elements indexed in the + * range defined by start and nr_elements has been allocated. + * @fa: array to allocate memory to. + * @start: start address + * @nr_elements: number of elements to be allocated. + * @flags: GFP flags + * + */ int flex_array_prealloc(struct flex_array *fa, unsigned int start, unsigned int nr_elements, gfp_t flags); + +/** + * flex_array_free() - Removes all elements of a flexible array. + * @fa: array to be freed. + */ void flex_array_free(struct flex_array *fa); + +/** + * flex_array_free_parts() - Removes all elements of a flexible array, but + * leaves the array itself in place. + * @fa: array to be emptied. + */ void flex_array_free_parts(struct flex_array *fa); + +/** + * flex_array_put() - Stores data into a flexible array. + * @fa: array where element is to be stored. + * @element_nr: position to copy, must be less than the maximum specified when + * the array was created. + * @src: data source to be copied into the array. + * @flags: GFP flags + * + * Return: Returns zero on success, a negative error code otherwise. + */ int flex_array_put(struct flex_array *fa, unsigned int element_nr, void *src, gfp_t flags); + +/** + * flex_array_clear() - Clears an individual element in the array, sets the + * given element to FLEX_ARRAY_FREE. + * @element_nr: element position to clear. + * @fa: array to which element to be cleared belongs. + * + * Return: Returns zero on success, -EINVAL otherwise. + */ int flex_array_clear(struct flex_array *fa, unsigned int element_nr); + +/** + * flex_array_get() - Retrieves data into a flexible array. + * + * @element_nr: Element position to retrieve data from. + * @fa: array from which data is to be retrieved. + * + * Return: Returns a pointer to the data element, or NULL if that + * particular element has never been allocated. + */ void *flex_array_get(struct flex_array *fa, unsigned int element_nr); + +/** + * flex_array_shrink() - Reduces the allocated size of an array. + * @fa: array to shrink. + * + * Return: Returns number of pages of memory actually freed. + * + */ int flex_array_shrink(struct flex_array *fa); #define flex_array_put_ptr(fa, nr, src, gfp) \ -- cgit v1.2.3 From d3881e5015421a578bc328136471fcf1d02ac389 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Feb 2017 14:32:33 -0500 Subject: PCI: Export PCI device config accessors Replace the inline PCI device config read and write accessors with exported functions. This is preparing for these functions to make use of private data. Tested-by: Krishna Dhulipala Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Wei Zhang --- include/linux/pci.h | 32 ++++++-------------------------- 1 file changed, 6 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eb3da1a04e6c..d705f3088ff9 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -940,32 +940,12 @@ int pci_generic_config_write32(struct pci_bus *bus, unsigned int devfn, struct pci_ops *pci_bus_set_ops(struct pci_bus *bus, struct pci_ops *ops); -static inline int pci_read_config_byte(const struct pci_dev *dev, int where, u8 *val) -{ - return pci_bus_read_config_byte(dev->bus, dev->devfn, where, val); -} -static inline int pci_read_config_word(const struct pci_dev *dev, int where, u16 *val) -{ - return pci_bus_read_config_word(dev->bus, dev->devfn, where, val); -} -static inline int pci_read_config_dword(const struct pci_dev *dev, int where, - u32 *val) -{ - return pci_bus_read_config_dword(dev->bus, dev->devfn, where, val); -} -static inline int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val) -{ - return pci_bus_write_config_byte(dev->bus, dev->devfn, where, val); -} -static inline int pci_write_config_word(const struct pci_dev *dev, int where, u16 val) -{ - return pci_bus_write_config_word(dev->bus, dev->devfn, where, val); -} -static inline int pci_write_config_dword(const struct pci_dev *dev, int where, - u32 val) -{ - return pci_bus_write_config_dword(dev->bus, dev->devfn, where, val); -} +int pci_read_config_byte(const struct pci_dev *dev, int where, u8 *val); +int pci_read_config_word(const struct pci_dev *dev, int where, u16 *val); +int pci_read_config_dword(const struct pci_dev *dev, int where, u32 *val); +int pci_write_config_byte(const struct pci_dev *dev, int where, u8 val); +int pci_write_config_word(const struct pci_dev *dev, int where, u16 val); +int pci_write_config_dword(const struct pci_dev *dev, int where, u32 val); int pcie_capability_read_word(struct pci_dev *dev, int pos, u16 *val); int pcie_capability_read_dword(struct pci_dev *dev, int pos, u32 *val); -- cgit v1.2.3 From 89ee9f7680031d7df91a1a27abac69e034c2e892 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 29 Mar 2017 22:48:59 -0500 Subject: PCI: Add device disconnected state Add a new state to pci_dev to be set when it is unexpectedly disconnected. The PCI driver tear down functions can observe this new device state so they may skip operations that will fail. The pciehp and pcie-dpc drivers are aware when the link is down, so these set the flag when their handlers detect the device is disconnected. Tested-by: Krishna Dhulipala Signed-off-by: Keith Busch Signed-off-by: Bjorn Helgaas Reviewed-by: Christoph Hellwig Reviewed-by: Wei Zhang --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index d705f3088ff9..2887933329a2 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -396,6 +396,8 @@ struct pci_dev { phys_addr_t rom; /* Physical address of ROM if it's not from the BAR */ size_t romlen; /* Length of ROM if it's not from the BAR */ char *driver_override; /* Driver name to force a match */ + + unsigned long priv_flags; /* Private flags for the pci driver */ }; static inline struct pci_dev *pci_physfn(struct pci_dev *dev) -- cgit v1.2.3 From 44fe84459faf1a7781595b7c64cd36daf2f2827d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 27 Mar 2017 13:54:38 +0200 Subject: locking/atomic: Fix atomic_try_cmpxchg() semantics Dmitry noted that the new atomic_try_cmpxchg() primitive is broken when the old pointer doesn't point to the local stack. He writes: "Consider a classical lock-free stack push: node->next = atomic_read(&head); do { } while (!atomic_try_cmpxchg(&head, &node->next, node)); This code is broken with the current implementation, the problem is with unconditional update of *__po. In case of success it writes the same value back into *__po, but in case of cmpxchg success we might have lose ownership of some memory locations and potentially over what __po has pointed to. The same holds for the re-read of *__po. " He also points out that this makes it surprisingly different from the similar C/C++ atomic operation. After investigating the code-gen differences caused by this patch; and a number of alternatives (Linus dislikes this interface lots), we arrived at these results (size x86_64-defconfig/vmlinux): GCC-6.3.0: 10735757 cmpxchg 10726413 try_cmpxchg 10730509 try_cmpxchg + patch 10730445 try_cmpxchg-linus GCC-7 (20170327): 10709514 cmpxchg 10704266 try_cmpxchg 10704266 try_cmpxchg + patch 10704394 try_cmpxchg-linus From this we see that the patch has the advantage of better code-gen on GCC-7 and keeps the interface roughly consistent with the C language variant. Reported-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Fixes: a9ebf306f52c ("locking/atomic: Introduce atomic_try_cmpxchg()") Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index aae5953817d6..c56be7410130 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -428,9 +428,11 @@ #define __atomic_try_cmpxchg(type, _p, _po, _n) \ ({ \ typeof(_po) __po = (_po); \ - typeof(*(_po)) __o = *__po; \ - *__po = atomic_cmpxchg##type((_p), __o, (_n)); \ - (*__po == __o); \ + typeof(*(_po)) __r, __o = *__po; \ + __r = atomic_cmpxchg##type((_p), __o, (_n)); \ + if (unlikely(__r != __o)) \ + *__po = __r; \ + likely(__r == __o); \ }) #define atomic_try_cmpxchg(_p, _po, _n) __atomic_try_cmpxchg(, _p, _po, _n) @@ -1022,9 +1024,11 @@ static inline int atomic_dec_if_positive(atomic_t *v) #define __atomic64_try_cmpxchg(type, _p, _po, _n) \ ({ \ typeof(_po) __po = (_po); \ - typeof(*(_po)) __o = *__po; \ - *__po = atomic64_cmpxchg##type((_p), __o, (_n)); \ - (*__po == __o); \ + typeof(*(_po)) __r, __o = *__po; \ + __r = atomic64_cmpxchg##type((_p), __o, (_n)); \ + if (unlikely(__r != __o)) \ + *__po = __r; \ + likely(__r == __o); \ }) #define atomic64_try_cmpxchg(_p, _po, _n) __atomic64_try_cmpxchg(, _p, _po, _n) -- cgit v1.2.3 From 19d436268dde95389c616bb3819da73f0a8b28a8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 25 Feb 2017 08:56:53 +0100 Subject: debug: Add _ONCE() logic to report_bug() Josh suggested moving the _ONCE logic inside the trap handler, using a bit in the bug_entry::flags field, avoiding the need for the extra variable. Sadly this only works for WARN_ON_ONCE(), since the others have printk() statements prior to triggering the trap. Still, this saves a fair amount of text and some data: text data filename 10682460 4530992 defconfig-build/vmlinux.orig 10665111 4530096 defconfig-build/vmlinux.patched Suggested-by: Josh Poimboeuf Signed-off-by: Peter Zijlstra (Intel) Cc: Andy Lutomirski Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/bug.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index 5828489309bb..687b557fc5eb 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -105,7 +105,7 @@ static inline int is_warning_bug(const struct bug_entry *bug) return bug->flags & BUGFLAG_WARNING; } -const struct bug_entry *find_bug(unsigned long bugaddr); +struct bug_entry *find_bug(unsigned long bugaddr); enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs); -- cgit v1.2.3 From cf25f904ef75aa7c25097eb4981bbc634bf5ff9e Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Fri, 24 Feb 2017 02:48:21 -0600 Subject: x86/events/amd/iommu: Add IOMMU-specific hw_perf_event struct MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Current AMD IOMMU perf PMU inappropriately uses the hardware struct inside the union in struct hw_perf_event, extra_reg in particular. Instead, introduce an AMD IOMMU-specific struct with required parameters to be programmed into the IOMMU performance counter control register. Update the pasid field from 16 to 20 bits while at it. Signed-off-by: Suravee Suthikulpanit [ Fixup macros, shorten get_next_avail_iommu_bnk_cntr() local vars, massage commit message. ] Signed-off-by: Borislav Petkov Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Jörg Rödel Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: iommu@lists.linux-foundation.org Link: http://lkml.kernel.org/r/1487926102-13073-10-git-send-email-Suravee.Suthikulpanit@amd.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b6e75c9d4791..24a635887f28 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -165,6 +165,13 @@ struct hw_perf_event { struct list_head bp_list; }; #endif + struct { /* amd_iommu */ + u8 iommu_bank; + u8 iommu_cntr; + u16 padding; + u64 conf; + u64 conf1; + }; }; /* * If the event is a per task event, this will point to the task in -- cgit v1.2.3 From 959d973e9890150342df76160d966ab1270208df Mon Sep 17 00:00:00 2001 From: Xiaolei Yu Date: Sat, 25 Mar 2017 14:04:58 +0800 Subject: HID: add two missing usages for digitizer They are part of HUTRR34 for multi-touch digitizers: 0x0E Device configuration CA 16.7 0x23 Device settings CL 16.7 Signed-off-by: Xiaolei Yu Signed-off-by: Jiri Kosina --- include/linux/hid.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 28f38e2b8f30..536f11fd21d5 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -268,6 +268,8 @@ struct hid_item { #define HID_CP_APPLICATIONLAUNCHBUTTONS 0x000c0180 #define HID_CP_GENERICGUIAPPLICATIONCONTROLS 0x000c0200 +#define HID_DG_DEVICECONFIG 0x000d000e +#define HID_DG_DEVICESETTINGS 0x000d0023 #define HID_DG_CONFIDENCE 0x000d0047 #define HID_DG_WIDTH 0x000d0048 #define HID_DG_HEIGHT 0x000d0049 -- cgit v1.2.3 From ae7c18380495ac5c14a614fdb6c452c3bf9148ac Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Tue, 7 Mar 2017 20:40:05 +0800 Subject: ACPI: platform-msi: retrieve devid from IORT For devices connecting to an ITS, the devices need to identify themself through a devid; this devid is represented in the IORT table in named component node [1] for platform devices, so this patch adds code that scans the IORT table to retrieve the devices devid. Add an IORT interface to collect ITS devices devid to carry out platform devices MSI mappings with IORT tables. [1]: https://static.docs.arm.com/den0049/b/DEN0049B_IO_Remapping_Table.pdf Signed-off-by: Hanjun Guo [lorenzo.pieralisi@arm.com: rewrote commit log/dropped ITS changes] Signed-off-by: Lorenzo Pieralisi Tested-by: Ming Lei Tested-by: Wei Xu Tested-by: Sinan Kaya Cc: Marc Zyngier Cc: Lorenzo Pieralisi Cc: Tomasz Nowicki Cc: Thomas Gleixner --- include/linux/acpi_iort.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 77e08099e554..fd8b9698e1d1 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -34,6 +34,7 @@ void acpi_iort_init(void); bool iort_node_match(u8 type); u32 iort_msi_map_rid(struct device *dev, u32 req_id); struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); +int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ void iort_set_dma_mask(struct device *dev); const struct iommu_ops *iort_iommu_configure(struct device *dev); -- cgit v1.2.3 From d4f54a186667ffd19eac8e3f48c51d940a9b9784 Mon Sep 17 00:00:00 2001 From: Hanjun Guo Date: Tue, 7 Mar 2017 20:40:06 +0800 Subject: ACPI: platform: setup MSI domain for ACPI based platform device By allowing platform MSI domain to be created on ACPI platforms, a platform device MSI domain can be set-up when it is probed. In order to do that, the MSI domain the platform device connects to should be retrieved, so the iort_get_platform_device_domain() is introduced to retrieve the domain from the IORT kernel layer. With the domain retrieved, we need a proper way to set the domain to platform device. Given that some platform devices (irqchips) require the MSI irqdomain to be their interrupt parent domain, the MSI irqdomain should be determined before platform device is probed but after the platform device is allocated which means that the code setting up the MSI irqdomain, ie acpi_configure_pmsi_domain() should be called in acpi_platform_notify() (that is triggered after adding a device but before the respective driver is probed) for the platform MSI domain code set-up path to work properly. Acked-by: Rafael J. Wysocki [for glue.c] Signed-off-by: Hanjun Guo [lorenzo.pieralisi@arm.com: rewrote commit log] Signed-off-by: Lorenzo Pieralisi Tested-by: Ming Lei Tested-by: Wei Xu Tested-by: Sinan Kaya Cc: Marc Zyngier Cc: Lorenzo Pieralisi Cc: Tomasz Nowicki --- include/linux/acpi_iort.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index fd8b9698e1d1..26e25d85eb3e 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -34,6 +34,7 @@ void acpi_iort_init(void); bool iort_node_match(u8 type); u32 iort_msi_map_rid(struct device *dev, u32 req_id); struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id); +void acpi_configure_pmsi_domain(struct device *dev); int iort_pmsi_get_dev_id(struct device *dev, u32 *dev_id); /* IOMMU interface */ void iort_set_dma_mask(struct device *dev); @@ -46,6 +47,7 @@ static inline u32 iort_msi_map_rid(struct device *dev, u32 req_id) static inline struct irq_domain *iort_get_device_domain(struct device *dev, u32 req_id) { return NULL; } +static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ static inline void iort_set_dma_mask(struct device *dev) { } static inline -- cgit v1.2.3 From 8531e283bee66050734fb0e89d53e85fd5ce24a4 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 10 Mar 2017 21:23:45 +0100 Subject: PCI: Recognize Thunderbolt devices Detect on probe whether a PCI device is part of a Thunderbolt controller. Intel uses a Vendor-Specific Extended Capability (VSEC) with ID 0x1234 on such devices. Detect presence of this VSEC and cache it in a newly added is_thunderbolt bit in struct pci_dev. Also, add a helper to check whether a given PCI device is situated on a Thunderbolt daisy chain (i.e., below a PCI device with is_thunderbolt set). The necessity arises from the following: * If an external Thunderbolt GPU is connected to a dual GPU laptop, that GPU is currently registered with vga_switcheroo even though it can neither drive the laptop's panel nor be powered off by the platform. To vga_switcheroo it will appear as if two discrete GPUs are present. As a result, when the external GPU is runtime suspended, vga_switcheroo will cut power to the internal discrete GPU which may not be runtime suspended at all at this moment. The solution is to not register external GPUs with vga_switcheroo, which necessitates a way to recognize if they're on a Thunderbolt daisy chain. * Dual GPU MacBook Pros introduced 2011+ can no longer switch external DisplayPort ports between GPUs. (They're no longer just used for DP but have become combined DP/Thunderbolt ports.) The driver to switch the ports, drivers/platform/x86/apple-gmux.c, needs to detect presence of a Thunderbolt controller and, if found, keep external ports permanently switched to the discrete GPU. v2: Make kerneldoc for pci_is_thunderbolt_attached() more precise, drop portion of commit message pertaining to separate series. (Bjorn Helgaas) Cc: Andreas Noever Cc: Michael Jamet Cc: Tomas Winkler Cc: Amir Levy Acked-by: Bjorn Helgaas Signed-off-by: Lukas Wunner Link: http://patchwork.freedesktop.org/patch/msgid/0ab165a4a35c0b60f29d4c306c653ead14fcd8f9.1489145162.git.lukas@wunner.de --- include/linux/pci.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eb3da1a04e6c..5948cfdc984e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -358,6 +358,7 @@ struct pci_dev { unsigned int is_virtfn:1; unsigned int reset_fn:1; unsigned int is_hotplug_bridge:1; + unsigned int is_thunderbolt:1; /* Thunderbolt controller */ unsigned int __aer_firmware_first_valid:1; unsigned int __aer_firmware_first:1; unsigned int broken_intx_masking:1; @@ -2160,6 +2161,28 @@ static inline bool pci_ari_enabled(struct pci_bus *bus) return bus->self && bus->self->ari_enabled; } +/** + * pci_is_thunderbolt_attached - whether device is on a Thunderbolt daisy chain + * @pdev: PCI device to check + * + * Walk upwards from @pdev and check for each encountered bridge if it's part + * of a Thunderbolt controller. Reaching the host bridge means @pdev is not + * Thunderbolt-attached. (But rather soldered to the mainboard usually.) + */ +static inline bool pci_is_thunderbolt_attached(struct pci_dev *pdev) +{ + struct pci_dev *parent = pdev; + + if (pdev->is_thunderbolt) + return true; + + while ((parent = pci_upstream_bridge(parent))) + if (parent->is_thunderbolt) + return true; + + return false; +} + /* provide the legacy pci_dma_* API */ #include -- cgit v1.2.3 From a3caf7440dedd2399f90f27ff11ac390bf03e6c4 Mon Sep 17 00:00:00 2001 From: Vidyullatha Kanchanapally Date: Fri, 31 Mar 2017 00:22:34 +0300 Subject: cfg80211: Add support for FILS shared key authentication offload Enhance nl80211 and cfg80211 connect request and response APIs to support FILS shared key authentication offload. The new nl80211 attributes can be used to provide additional information to the driver to establish a FILS connection. Also enhance the set/del PMKSA to allow support for adding and deleting PMKSA based on FILS cache identifier. Add a new feature flag that drivers can use to advertize support for FILS shared key authentication and association in station mode when using their own SME. Signed-off-by: Vidyullatha Kanchanapally Signed-off-by: Jouni Malinen Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 22bf0676d928..294fa6273a62 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1723,6 +1723,9 @@ enum ieee80211_statuscode { WLAN_STATUS_REJECT_DSE_BAND = 96, WLAN_STATUS_DENIED_WITH_SUGGESTED_BAND_AND_CHANNEL = 99, WLAN_STATUS_DENIED_DUE_TO_SPECTRUM_MANAGEMENT = 103, + /* 802.11ai */ + WLAN_STATUS_FILS_AUTHENTICATION_FAILURE = 108, + WLAN_STATUS_UNKNOWN_AUTHENTICATION_SERVER = 109, }; @@ -2104,6 +2107,12 @@ enum ieee80211_key_len { #define FILS_NONCE_LEN 16 #define FILS_MAX_KEK_LEN 64 +#define FILS_ERP_MAX_USERNAME_LEN 16 +#define FILS_ERP_MAX_REALM_LEN 253 +#define FILS_ERP_MAX_RRK_LEN 64 + +#define PMK_MAX_LEN 48 + /* Public action codes */ enum ieee80211_pub_actioncode { WLAN_PUB_ACTION_EXT_CHANSW_ANN = 4, @@ -2355,6 +2364,10 @@ enum ieee80211_sa_query_action { #define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) #define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) #define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) +#define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) +#define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) +#define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) +#define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) #define WLAN_MAX_KEY_LEN 32 -- cgit v1.2.3 From f7048b15900f36fe21398fba94777b8aab3b376d Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Fri, 24 Mar 2017 10:57:59 +0530 Subject: tty: serial_core: Add name field to uart_port struct Introduce a field to store name of uart_port that can be used to easily identify UART port instances on a system that has more than one UART instance. The name is of the form ttyXN(eg. ttyS0, ttyAMA0,..) where N is number that particular UART instance. This field will be useful when printing debug info for a particular port or in register IRQs with unique IRQ name. Port name is populated during uart_add_one_port(). Signed-off-by: Vignesh R Reviewed-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 58484fb35cc8..60530678c633 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -247,6 +247,7 @@ struct uart_port { unsigned char suspended; unsigned char irq_wake; unsigned char unused[2]; + char *name; /* port name */ struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ struct serial_rs485 rs485; -- cgit v1.2.3 From 7ed98e0168bd23d8ea3294e95254cc5b4000c948 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 10 Mar 2017 10:46:14 +0000 Subject: drivers/perf: arm_pmu: manage interrupts per-cpu When requesting or freeing interrupts, we use platform_get_irq() to find relevant irqs, backing this up with additional information in an optional irq_affinity table. This means that our irq request and free paths are tied to a platform_device, and our request path must jump through a number of hoops in order to determine the required affinity of each interrupt. Given that the affinity must be static, we can compute the affinity once up-front at probe time, simplifying the irq request and free paths. By recording interrupts in a per-cpu data structure, we simplify a few paths, and permit a subsequent rework of the request and free paths. Signed-off-by: Mark Rutland [will: rename local nr_irqs variable to avoid conflict with global] Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 8462da266089..05a3eb447fc8 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -75,6 +75,8 @@ struct pmu_hw_events { * already have to allocate this struct per cpu. */ struct arm_pmu *percpu_pmu; + + int irq; }; enum armpmu_attr_groups { @@ -88,7 +90,6 @@ struct arm_pmu { struct pmu pmu; cpumask_t active_irqs; cpumask_t supported_cpus; - int *irq_affinity; char *name; irqreturn_t (*handle_irq)(int irq_num, void *dev); void (*enable)(struct perf_event *event); -- cgit v1.2.3 From c09adab01e4aeecfa3dfae0946409844400c5901 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Fri, 10 Mar 2017 10:46:15 +0000 Subject: drivers/perf: arm_pmu: split irq request from enable For historical reasons, we lazily request and free interrupts in the arm pmu driver. This requires us to refcount use of the pmu (by way of counting the active events) in order to request/free interrupts at the correct times, which complicates the driver somewhat. The existing logic is flawed, as it only considers currently online CPUs when requesting, freeing, or managing the affinity of interrupts. Intervening hotplug events can result in erroneous IRQ affinity, online CPUs for which interrupts have not been requested, or offline CPUs whose interrupts are still requested. To fix this, this patch splits the requesting of interrupts from any per-cpu management (i.e. per-cpu enable/disable, and configuration of cpu affinity). We now request all interrupts up-front at probe time (and never free them, since we never unregister PMUs). The management of affinity, and per-cpu enable/disable now happens in our cpu hotplug callback, ensuring it occurs consistently. This means that we must now invoke the CPU hotplug callback at boot time in order to configure IRQs, and since the callback also resets the PMU hardware, we can remove the duplicate reset in the probe path. This rework renders our event refcounting unnecessary, so this is removed. Signed-off-by: Mark Rutland [will: make armpmu_get_cpu_irq static] Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 05a3eb447fc8..44f43fcf2524 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -105,12 +105,8 @@ struct arm_pmu { void (*start)(struct arm_pmu *); void (*stop)(struct arm_pmu *); void (*reset)(void *); - int (*request_irq)(struct arm_pmu *, irq_handler_t handler); - void (*free_irq)(struct arm_pmu *); int (*map_event)(struct perf_event *event); int num_events; - atomic_t active_events; - struct mutex reserve_mutex; u64 max_period; bool secure_access; /* 32-bit ARM only */ #define ARMV8_PMUV3_MAX_COMMON_EVENTS 0x40 -- cgit v1.2.3 From 1cf1cae963c2e6032aebe1637e995bc2f5d330f4 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 30 Mar 2017 21:45:38 -0700 Subject: bpf: introduce BPF_PROG_TEST_RUN command development and testing of networking bpf programs is quite cumbersome. Despite availability of user space bpf interpreters the kernel is the ultimate authority and execution environment. Current test frameworks for TC include creation of netns, veth, qdiscs and use of various packet generators just to test functionality of a bpf program. XDP testing is even more complicated, since qemu needs to be started with gro/gso disabled and precise queue configuration, transferring of xdp program from host into guest, attaching to virtio/eth0 and generating traffic from the host while capturing the results from the guest. Moreover analyzing performance bottlenecks in XDP program is impossible in virtio environment, since cost of running the program is tiny comparing to the overhead of virtio packet processing, so performance testing can only be done on physical nic with another server generating traffic. Furthermore ongoing changes to user space control plane of production applications cannot be run on the test servers leaving bpf programs stubbed out for testing. Last but not least, the upstream llvm changes are validated by the bpf backend testsuite which has no ability to test the code generated. To improve this situation introduce BPF_PROG_TEST_RUN command to test and performance benchmark bpf programs. Joint work with Daniel Borkmann. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2ae39a3e9ead..bbb513da5075 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -169,6 +169,8 @@ struct bpf_verifier_ops { const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog); + int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); }; struct bpf_prog_type_list { @@ -233,6 +235,11 @@ typedef unsigned long (*bpf_ctx_copy_t)(void *dst, const void *src, u64 bpf_event_output(struct bpf_map *map, u64 flags, void *meta, u64 meta_size, void *ctx, u64 ctx_size, bpf_ctx_copy_t ctx_copy); +int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); -- cgit v1.2.3 From 3d8417d79e0da6a47ff29932ef80486be78af56e Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Fri, 31 Mar 2017 11:47:39 +0200 Subject: udp: use sk_protocol instead of pcflag to detect udplite sockets In the udp_sock struct, the 'forward_deficit' and 'pcflag' fields share the same cacheline. While the first is dirtied by udp_recvmsg, the latter is read, possibly several times, by the bottom half processing to discriminate between udp and udplite sockets. With this patch, sk->sk_protocol is used to check is the socket is really an udplite one, avoiding some cache misses per packet and improving the performance under udp_flood with small packet up to 10%. Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/udp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/udp.h b/include/linux/udp.h index c0f530809d1f..6cb4061a720d 100644 --- a/include/linux/udp.h +++ b/include/linux/udp.h @@ -115,6 +115,6 @@ static inline bool udp_get_no_check6_rx(struct sock *sk) #define udp_portaddr_for_each_entry_rcu(__sk, list) \ hlist_for_each_entry_rcu(__sk, list, __sk_common.skc_portaddr_node) -#define IS_UDPLITE(__sk) (udp_sk(__sk)->pcflag) +#define IS_UDPLITE(__sk) (__sk->sk_protocol == IPPROTO_UDPLITE) #endif /* _LINUX_UDP_H */ -- cgit v1.2.3 From bf17aa36c0f199f5b254262e77eaefda7da0f50b Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Wed, 1 Mar 2017 18:22:01 -0800 Subject: nvme: Correct NVMF enum values to match NVMe-oF rev 1.0 The enum values for QPTYPE, PRTYPE and CMS are off by 1 from the values defined in figure 42 of the NVM Express over Fabrics 1.0: http://www.nvmexpress.org/wp-content/uploads/NVMe_over_Fabrics_1_0_Gold_20160605-1.pdf Fix our enums to match the final spec. Signed-off-by: Roland Dreier Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg --- include/linux/nvme.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c43d435d4225..9061780b141f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -64,26 +64,26 @@ enum { * RDMA_QPTYPE field */ enum { - NVMF_RDMA_QPTYPE_CONNECTED = 0, /* Reliable Connected */ - NVMF_RDMA_QPTYPE_DATAGRAM = 1, /* Reliable Datagram */ + NVMF_RDMA_QPTYPE_CONNECTED = 1, /* Reliable Connected */ + NVMF_RDMA_QPTYPE_DATAGRAM = 2, /* Reliable Datagram */ }; /* RDMA QP Service Type codes for Discovery Log Page entry TSAS * RDMA_QPTYPE field */ enum { - NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 0, /* No Provider Specified */ - NVMF_RDMA_PRTYPE_IB = 1, /* InfiniBand */ - NVMF_RDMA_PRTYPE_ROCE = 2, /* InfiniBand RoCE */ - NVMF_RDMA_PRTYPE_ROCEV2 = 3, /* InfiniBand RoCEV2 */ - NVMF_RDMA_PRTYPE_IWARP = 4, /* IWARP */ + NVMF_RDMA_PRTYPE_NOT_SPECIFIED = 1, /* No Provider Specified */ + NVMF_RDMA_PRTYPE_IB = 2, /* InfiniBand */ + NVMF_RDMA_PRTYPE_ROCE = 3, /* InfiniBand RoCE */ + NVMF_RDMA_PRTYPE_ROCEV2 = 4, /* InfiniBand RoCEV2 */ + NVMF_RDMA_PRTYPE_IWARP = 5, /* IWARP */ }; /* RDMA Connection Management Service Type codes for Discovery Log Page * entry TSAS RDMA_CMS field */ enum { - NVMF_RDMA_CMS_RDMA_CM = 0, /* Sockets based enpoint addressing */ + NVMF_RDMA_CMS_RDMA_CM = 1, /* Sockets based endpoint addressing */ }; #define NVMF_AQ_DEPTH 32 -- cgit v1.2.3 From 5ba6bcbc335771c37d05b88cbfcad5441b57130b Mon Sep 17 00:00:00 2001 From: Jean Delvare Date: Tue, 7 Mar 2017 06:15:15 -0800 Subject: hwmon: Constify str parameter of hwmon_ops->read_string The read_string callback is supposed to retrieve a pointer to a constant string. Signed-off-by: Jean Delvare Reviewed-by: Peter Huewe Signed-off-by: Guenter Roeck --- include/linux/hwmon.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hwmon.h b/include/linux/hwmon.h index 88b673749121..ceb751987c40 100644 --- a/include/linux/hwmon.h +++ b/include/linux/hwmon.h @@ -337,7 +337,7 @@ struct hwmon_ops { int (*read)(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long *val); int (*read_string)(struct device *dev, enum hwmon_sensor_types type, - u32 attr, int channel, char **str); + u32 attr, int channel, const char **str); int (*write)(struct device *dev, enum hwmon_sensor_types type, u32 attr, int channel, long val); }; -- cgit v1.2.3 From 27c0e3748e41ca79171ffa3e97415a20af6facd0 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Feb 2017 18:42:24 -0500 Subject: [iov_iter] new privimitive: iov_iter_revert() opposite to iov_iter_advance(); the caller is responsible for never using it to move back past the initial position. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/linux/uio.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 804e34c6f981..f2d36a3d3005 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -39,7 +39,10 @@ struct iov_iter { }; union { unsigned long nr_segs; - int idx; + struct { + int idx; + int start_idx; + }; }; }; @@ -81,6 +84,7 @@ unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); +void iov_iter_revert(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, -- cgit v1.2.3 From 0e056eb5530da802c07f080d6bbd43c50e799efd Mon Sep 17 00:00:00 2001 From: "mchehab@s-opensource.com" Date: Thu, 30 Mar 2017 17:11:36 -0300 Subject: kernel-api.rst: fix a series of errors when parsing C files ./lib/string.c:134: WARNING: Inline emphasis start-string without end-string. ./mm/filemap.c:522: WARNING: Inline interpreted text or phrase reference start-string without end-string. ./mm/filemap.c:1283: ERROR: Unexpected indentation. ./mm/filemap.c:3003: WARNING: Inline interpreted text or phrase reference start-string without end-string. ./mm/vmalloc.c:1544: WARNING: Inline emphasis start-string without end-string. ./mm/page_alloc.c:4245: ERROR: Unexpected indentation. ./ipc/util.c:676: ERROR: Unexpected indentation. ./drivers/pci/irq.c:35: WARNING: Block quote ends without a blank line; unexpected unindent. ./security/security.c:109: ERROR: Unexpected indentation. ./security/security.c:110: WARNING: Definition list ends without a blank line; unexpected unindent. ./block/genhd.c:275: WARNING: Inline strong start-string without end-string. ./block/genhd.c:283: WARNING: Inline strong start-string without end-string. ./include/linux/clk.h:134: WARNING: Inline emphasis start-string without end-string. ./include/linux/clk.h:134: WARNING: Inline emphasis start-string without end-string. ./ipc/util.c:477: ERROR: Unknown target name: "s". Signed-off-by: Mauro Carvalho Chehab Acked-by: Bjorn Helgaas Signed-off-by: Jonathan Corbet --- include/linux/clk.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk.h b/include/linux/clk.h index e9d36b3e49de..024cd07870d0 100644 --- a/include/linux/clk.h +++ b/include/linux/clk.h @@ -132,8 +132,8 @@ int clk_get_phase(struct clk *clk); * @q: clk compared against p * * Returns true if the two struct clk pointers both point to the same hardware - * clock node. Put differently, returns true if struct clk *p and struct clk *q - * share the same struct clk_core object. + * clock node. Put differently, returns true if @p and @q + * share the same &struct clk_core object. * * Returns false otherwise. Note that two NULL clks are treated as matching. */ -- cgit v1.2.3 From fff292914d3a2f1efd05ca71c2ba72a3c663201e Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 31 Mar 2017 15:20:48 +0300 Subject: security, keys: convert key.usage from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Acked-by: David Howells Signed-off-by: James Morris --- include/linux/key.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index e45212f2777e..9d9fac583dd3 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -23,6 +23,7 @@ #include #include #include +#include #ifdef __KERNEL__ #include @@ -135,7 +136,7 @@ static inline bool is_key_possessed(const key_ref_t key_ref) * - Kerberos TGTs and tickets */ struct key { - atomic_t usage; /* number of references */ + refcount_t usage; /* number of references */ key_serial_t serial; /* key serial number */ union { struct list_head graveyard_link; @@ -242,7 +243,7 @@ extern void key_put(struct key *key); static inline struct key *__key_get(struct key *key) { - atomic_inc(&key->usage); + refcount_inc(&key->usage); return key; } -- cgit v1.2.3 From 3209f68b3ca4667069923a325c88b21131bfdf9f Mon Sep 17 00:00:00 2001 From: David Howells Date: Fri, 31 Mar 2017 18:32:17 +0100 Subject: statx: Include a mask for stx_attributes in struct statx Include a mask in struct stat to indicate which bits of stx_attributes the filesystem actually supports. This would also be useful if we add another system call that allows you to do a 'bulk attribute set' and pass in a statx struct with the masks appropriately set to say what you want to set. Signed-off-by: David Howells Signed-off-by: Al Viro --- include/linux/stat.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stat.h b/include/linux/stat.h index c76e524fb34b..64b6b3aece21 100644 --- a/include/linux/stat.h +++ b/include/linux/stat.h @@ -26,6 +26,7 @@ struct kstat { unsigned int nlink; uint32_t blksize; /* Preferred I/O size */ u64 attributes; + u64 attributes_mask; #define KSTAT_ATTR_FS_IOC_FLAGS \ (STATX_ATTR_COMPRESSED | \ STATX_ATTR_IMMUTABLE | \ -- cgit v1.2.3 From 2475a2b6c877a0c8d1ca42c3f2b30f8ce518ac0b Mon Sep 17 00:00:00 2001 From: Alistair Popple Date: Mon, 3 Apr 2017 19:51:42 +1000 Subject: drivers/of/base.c: Add of_property_read_u64_index There is of_property_read_u32_index but no u64 variant. This patch adds one similar to the u32 version for u64. Signed-off-by: Alistair Popple Acked-by: Rob Herring Signed-off-by: Michael Ellerman --- include/linux/of.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 21e6323de0f3..d08788daae5c 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -292,6 +292,9 @@ extern int of_property_count_elems_of_size(const struct device_node *np, extern int of_property_read_u32_index(const struct device_node *np, const char *propname, u32 index, u32 *out_value); +extern int of_property_read_u64_index(const struct device_node *np, + const char *propname, + u32 index, u64 *out_value); extern int of_property_read_variable_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz_min, size_t sz_max); -- cgit v1.2.3 From 469ff8f7d46d75b36de68a0411a2ce80109ad00b Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Mon, 25 Apr 2016 11:30:39 -0700 Subject: KEYS: Use a typedef for restrict_link function pointers This pointer type needs to be returned from a lookup function, and without a typedef the syntax gets cumbersome. Signed-off-by: Mat Martineau --- include/linux/key.h | 16 +++++++--------- 1 file changed, 7 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 9d9fac583dd3..3bb327043869 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -127,6 +127,10 @@ static inline bool is_key_possessed(const key_ref_t key_ref) return (unsigned long) key_ref & 1UL; } +typedef int (*key_restrict_link_func_t)(struct key *keyring, + const struct key_type *type, + const union key_payload *payload); + /*****************************************************************************/ /* * authentication token / access credential / keyring @@ -215,9 +219,7 @@ struct key { * overrides this, allowing the kernel to add extra keys without * restriction. */ - int (*restrict_link)(struct key *keyring, - const struct key_type *type, - const union key_payload *payload); + key_restrict_link_func_t restrict_link; }; extern struct key *key_alloc(struct key_type *type, @@ -226,9 +228,7 @@ extern struct key *key_alloc(struct key_type *type, const struct cred *cred, key_perm_t perm, unsigned long flags, - int (*restrict_link)(struct key *, - const struct key_type *, - const union key_payload *)); + key_restrict_link_func_t restrict_link); #define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ @@ -304,9 +304,7 @@ extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid const struct cred *cred, key_perm_t perm, unsigned long flags, - int (*restrict_link)(struct key *, - const struct key_type *, - const union key_payload *), + key_restrict_link_func_t restrict_link, struct key *dest); extern int restrict_link_reject(struct key *keyring, -- cgit v1.2.3 From aaf66c883813f0078e3dafe7d20d1461321ac14f Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Tue, 30 Aug 2016 11:33:13 -0700 Subject: KEYS: Split role of the keyring pointer for keyring restrict functions The first argument to the restrict_link_func_t functions was a keyring pointer. These functions are called by the key subsystem with this argument set to the destination keyring, but restrict_link_by_signature expects a pointer to the relevant trusted keyring. Restrict functions may need something other than a single struct key pointer to allow or reject key linkage, so the data used to make that decision (such as the trust keyring) is moved to a new, fourth argument. The first argument is now always the destination keyring. Signed-off-by: Mat Martineau --- include/linux/key.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 3bb327043869..c59d1008c4fc 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -127,9 +127,10 @@ static inline bool is_key_possessed(const key_ref_t key_ref) return (unsigned long) key_ref & 1UL; } -typedef int (*key_restrict_link_func_t)(struct key *keyring, +typedef int (*key_restrict_link_func_t)(struct key *dest_keyring, const struct key_type *type, - const union key_payload *payload); + const union key_payload *payload, + struct key *restriction_key); /*****************************************************************************/ /* @@ -309,7 +310,8 @@ extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid extern int restrict_link_reject(struct key *keyring, const struct key_type *type, - const union key_payload *payload); + const union key_payload *payload, + struct key *restriction_key); extern int keyring_clear(struct key *keyring); -- cgit v1.2.3 From e9cc0f689a7c0c9be6fed6861b3a3f49ad0e7a52 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Mon, 27 Jun 2016 16:10:59 -0700 Subject: KEYS: Add a key restriction struct Key link restrictions require restriction-specific data as well as a restriction-specific function pointer. As a first step toward replacing the restrict_link pointer in struct key, define a more general key_restriction structure that captures the required function, key, and key type pointers. Key type modules should not be pinned on account of this key type pointer because the pointer will be cleared by the garbage collector if the key type is unregistered. Signed-off-by: Mat Martineau --- include/linux/key.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index c59d1008c4fc..a06649f3223d 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -132,6 +132,12 @@ typedef int (*key_restrict_link_func_t)(struct key *dest_keyring, const union key_payload *payload, struct key *restriction_key); +struct key_restriction { + key_restrict_link_func_t check; + struct key *key; + struct key_type *keytype; +}; + /*****************************************************************************/ /* * authentication token / access credential / keyring -- cgit v1.2.3 From 3071f13d75f627ed8648535815a0506d50cbc6ed Mon Sep 17 00:00:00 2001 From: Agustin Vega-Frias Date: Fri, 31 Mar 2017 14:13:43 -0400 Subject: perf: qcom: Add L3 cache PMU driver This adds a new dynamic PMU to the Perf Events framework to program and control the L3 cache PMUs in some Qualcomm Technologies SOCs. The driver supports a distributed cache architecture where the overall cache for a socket is comprised of multiple slices each with its own PMU. Access to each individual PMU is provided even though all CPUs share all the slices. User space needs to aggregate to individual counts to provide a global picture. The driver exports formatting and event information to sysfs so it can be used by the perf user space tools with the syntaxes: perf stat -a -e l3cache_0_0/read-miss/ perf stat -a -e l3cache_0_0/event=0x21/ Acked-by: Mark Rutland Signed-off-by: Agustin Vega-Frias [will: fixed sparse issues] Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 62d240e962f0..cfcfab37d9c4 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -137,6 +137,7 @@ enum cpuhp_state { CPUHP_AP_PERF_ARM_CCN_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, + CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, CPUHP_AP_WORKQUEUE_ONLINE, CPUHP_AP_RCUTREE_ONLINE, CPUHP_AP_ONLINE_DYN, -- cgit v1.2.3 From b80f0f6c9ed3958ff4002b6135f43a1ef312a610 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 3 Apr 2017 12:57:35 -0400 Subject: ftrace: Have init/main.c call ftrace directly to free init memory Relying on free_reserved_area() to call ftrace to free init memory proved to not be sufficient. The issue is that on x86, when debug_pagealloc is enabled, the init memory is not freed, but simply set as not present. Since ftrace was uninformed of this, starting function tracing still tries to update pages that are not present according to the page tables, causing ftrace to bug, as well as killing the kernel itself. Instead of relying on free_reserved_area(), have init/main.c call ftrace directly just before it frees the init memory. Then it needs to use __init_begin and __init_end to know where the init memory location is. Looking at all archs (and testing what I can), it appears that this should work for each of them. Reported-by: kernel test robot Reported-by: Fengguang Wu Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 0276a2c487e6..ef7123219f14 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -147,9 +147,9 @@ struct ftrace_ops_hash { struct mutex regex_lock; }; -void ftrace_free_mem(void *start, void *end); +void ftrace_free_init_mem(void); #else -static inline void ftrace_free_mem(void *start, void *end) { } +static inline void ftrace_free_init_mem(void) { } #endif /* @@ -266,7 +266,7 @@ static inline int ftrace_nr_registered_ops(void) } static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } -static inline void ftrace_free_mem(void *start, void *end) { } +static inline void ftrace_free_init_mem(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_STACK_TRACER -- cgit v1.2.3 From 877c57d0d0cac2c8fc661f708d8ee3fa7aa8d28b Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Fri, 24 Mar 2017 11:45:49 +0200 Subject: tpm_crb: request and relinquish locality 0 This commit adds support for requesting and relinquishing locality 0 in tpm_crb for the course of command transmission. In order to achieve this, two new callbacks are added to struct tpm_class_ops: - request_locality - relinquish_locality With CRB interface you first set either requestAccess or relinquish bit from TPM_LOC_CTRL_x register and then wait for locAssigned and tpmRegValidSts bits to be set in the TPM_LOC_STATE_x register. The reason why were are doing this is to make sure that the driver will work properly with Intel TXT that uses locality 2. There's no explicit guarantee that it would relinquish this locality. In more general sense this commit enables tpm_crb to be a well behaving citizen in a multi locality environment. Signed-off-by: Jarkko Sakkinen Reviewed-by: Jerry Snitselaar Tested-by: Jerry Snitselaar --- include/linux/tpm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index da158f06e0b2..5a090f5ab335 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -48,7 +48,8 @@ struct tpm_class_ops { u8 (*status) (struct tpm_chip *chip); bool (*update_timeouts)(struct tpm_chip *chip, unsigned long *timeout_cap); - + int (*request_locality)(struct tpm_chip *chip, int loc); + void (*relinquish_locality)(struct tpm_chip *chip, int loc); }; #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) -- cgit v1.2.3 From f65fd1aa4f9881d5540192d11f7b8ed2fec936db Mon Sep 17 00:00:00 2001 From: Sasha Neftin Date: Mon, 3 Apr 2017 16:02:50 -0500 Subject: PCI: Avoid FLR for Intel 82579 NICs Per Intel Specification Update 335553-002 (see link below), some 82579 network adapters advertise a Function Level Reset (FLR) capability, but they can hang when an FLR is triggered. To reproduce the problem, attach the device to a VM, then detach and try to attach again. Add a quirk to prevent the use of FLR on these devices. [bhelgaas: changelog, comments] Link: http://www.intel.com/content/dam/www/public/us/en/documents/specification-updates/82579lm-82579v-gigabit-network-connection-spec-update.pdf Signed-off-by: Sasha Neftin Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eb3da1a04e6c..22cad2c66d59 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -178,6 +178,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7), /* Get VPD from function 0 VPD */ PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8), + /* Do not use FLR even if device advertises PCI_AF_CAP */ + PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), }; enum pci_irq_reroute_variant { -- cgit v1.2.3 From c8b5d129ee293bcf972e7279ac996bb8a138505c Mon Sep 17 00:00:00 2001 From: Greg Ungerer Date: Mon, 3 Apr 2017 15:50:03 +1000 Subject: net: usbnet: support 64bit stats MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add support for the net stats64 counters to the usbnet core. With that in place put the hooks into every usbnet driver to use it. This is a strait forward addition of 64bit counters for RX and TX packet and byte counts. It is done in the same style as for the other net drivers that support stats64. Note that the other stats fields remain as 32bit sized values (error counts, etc). The motivation to add this is that it is not particularly difficult to get the RX and TX byte counts to wrap on 32bit platforms. Signed-off-by: Greg Ungerer Acked-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index e2b56917450f..7dffa5624ea6 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -64,6 +64,8 @@ struct usbnet { struct usb_anchor deferred; struct tasklet_struct bh; + struct pcpu_sw_netstats __percpu *stats64; + struct work_struct kevent; unsigned long flags; # define EVENT_TX_HALT 0 @@ -278,5 +280,7 @@ extern int usbnet_status_start(struct usbnet *dev, gfp_t mem_flags); extern void usbnet_status_stop(struct usbnet *dev); extern void usbnet_update_max_qlen(struct usbnet *dev); +extern void usbnet_get_stats64(struct net_device *dev, + struct rtnl_link_stats64 *stats); #endif /* __LINUX_USB_USBNET_H */ -- cgit v1.2.3 From f9dc4d1f0d6f75c102ee13c0a939d9ae880a3c1e Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Mon, 3 Apr 2017 12:21:13 +0300 Subject: qed: Manage with less memory regions for RoCE It's possible some configurations would prevent driver from utilizing all the Memory Regions due to a lack of ILT lines. In such a case, calculate how many memory regions would have to be dropped due to limit, and manage without those. Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 8e0065c52857..625f80f08f91 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -263,7 +263,6 @@ struct qed_rdma_pf_params { * the doorbell BAR). */ u32 min_dpis; /* number of requested DPIs */ - u32 num_mrs; /* number of requested memory regions */ u32 num_qps; /* number of requested Queue Pairs */ u32 num_srqs; /* number of requested SRQ */ u8 roce_edpm_mode; /* see QED_ROCE_EDPM_MODE_ENABLE */ -- cgit v1.2.3 From 815429b39d94c64f6d05eed9e7c1a9bdfdd5bd70 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Wed, 29 Mar 2017 19:30:17 +0900 Subject: extcon: Add new extcon_register_notifier_all() to monitor all external connectors The extcon core already provides the extcon_register_notifier() function in order to register the notifier block which is used to monitor the state change for the specific external connector such as EXTCON_USB, EXTCON_USB_HOST and so on. The extcon consumer uses the this function. The extcon consumer might need to monitor the all supported external connectors from the extcon device. In this case, The extcon consumer should have each notifier_block structure for each external connector. This patch adds the new extcon_register_notifier_all() function that extcon consumer is able to monitor the state change of all supported external connectors by using only one notifier_block structure. - List of new added functions: int extcon_register_notifier_all(struct extcon_dev *edev, struct notifier_block *nb); int extcon_unregister_notifier_all(struct extcon_dev *edev, struct notifier_block *nb); int devm_extcon_register_notifier_all(struct device *dev, struct extcon_dev *edev, struct notifier_block *nb); void devm_extcon_unregister_notifier_all(struct device *dev, struct extcon_dev *edev, struct notifier_block *nb); Suggested-by: Hans de Goede Signed-off-by: Chanwoo Choi Tested-by: Hans de Goede Acked-by: Hans de Goede --- include/linux/extcon.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 7010fb01a81a..7e206a9f88db 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -236,11 +236,11 @@ extern int extcon_set_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop); /* - * Following APIs are to monitor every action of a notifier. - * Registrar gets notified for every external port of a connection device. - * Probably this could be used to debug an action of notifier; however, - * we do not recommend to use this for normal 'notifiee' device drivers who - * want to be notified by a specific external port of the notifier. + * Following APIs are to monitor the status change of the external connectors. + * extcon_register_notifier(*edev, id, *nb) : Register a notifier block + * for specific external connector of the extcon. + * extcon_register_notifier_all(*edev, *nb) : Register a notifier block + * for all supported external connectors of the extcon. */ extern int extcon_register_notifier(struct extcon_dev *edev, unsigned int id, struct notifier_block *nb); @@ -253,6 +253,17 @@ extern void devm_extcon_unregister_notifier(struct device *dev, struct extcon_dev *edev, unsigned int id, struct notifier_block *nb); +extern int extcon_register_notifier_all(struct extcon_dev *edev, + struct notifier_block *nb); +extern int extcon_unregister_notifier_all(struct extcon_dev *edev, + struct notifier_block *nb); +extern int devm_extcon_register_notifier_all(struct device *dev, + struct extcon_dev *edev, + struct notifier_block *nb); +extern void devm_extcon_unregister_notifier_all(struct device *dev, + struct extcon_dev *edev, + struct notifier_block *nb); + /* * Following API get the extcon device from devicetree. * This function use phandle of devicetree to get extcon device directly. -- cgit v1.2.3 From e96a7705e7d3fef96aec9b590c63b2f6f7d2ba22 Mon Sep 17 00:00:00 2001 From: Xunlei Pang Date: Thu, 23 Mar 2017 15:56:08 +0100 Subject: sched/rtmutex/deadline: Fix a PI crash for deadline tasks A crash happened while I was playing with deadline PI rtmutex. BUG: unable to handle kernel NULL pointer dereference at 0000000000000018 IP: [] rt_mutex_get_top_task+0x1f/0x30 PGD 232a75067 PUD 230947067 PMD 0 Oops: 0000 [#1] SMP CPU: 1 PID: 10994 Comm: a.out Not tainted Call Trace: [] enqueue_task+0x2c/0x80 [] activate_task+0x23/0x30 [] pull_dl_task+0x1d5/0x260 [] pre_schedule_dl+0x16/0x20 [] __schedule+0xd3/0x900 [] schedule+0x29/0x70 [] __rt_mutex_slowlock+0x4b/0xc0 [] rt_mutex_slowlock+0xd1/0x190 [] rt_mutex_timed_lock+0x53/0x60 [] futex_lock_pi.isra.18+0x28c/0x390 [] do_futex+0x190/0x5b0 [] SyS_futex+0x80/0x180 This is because rt_mutex_enqueue_pi() and rt_mutex_dequeue_pi() are only protected by pi_lock when operating pi waiters, while rt_mutex_get_top_task(), will access them with rq lock held but not holding pi_lock. In order to tackle it, we introduce new "pi_top_task" pointer cached in task_struct, and add new rt_mutex_update_top_task() to update its value, it can be called by rt_mutex_setprio() which held both owner's pi_lock and rq lock. Thus "pi_top_task" can be safely accessed by enqueue_task_dl() under rq lock. Originally-From: Peter Zijlstra Signed-off-by: Xunlei Pang Signed-off-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt Reviewed-by: Thomas Gleixner Cc: juri.lelli@arm.com Cc: bigeasy@linutronix.de Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: bristot@redhat.com Link: http://lkml.kernel.org/r/20170323150216.157682758@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/init_task.h | 1 + include/linux/sched.h | 2 ++ include/linux/sched/rt.h | 1 + 3 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 91d9049f0039..2c487e0879d5 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -181,6 +181,7 @@ extern struct cred init_cred; #ifdef CONFIG_RT_MUTEXES # define INIT_RT_MUTEXES(tsk) \ .pi_waiters = RB_ROOT, \ + .pi_top_task = NULL, \ .pi_waiters_leftmost = NULL, #else # define INIT_RT_MUTEXES(tsk) diff --git a/include/linux/sched.h b/include/linux/sched.h index d67eee84fd43..1ea2eee7bc4f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -775,6 +775,8 @@ struct task_struct { /* PI waiters blocked on a rt_mutex held by this task: */ struct rb_root pi_waiters; struct rb_node *pi_waiters_leftmost; + /* Updated under owner's pi_lock and rq lock */ + struct task_struct *pi_top_task; /* Deadlock detection and priority inheritance handling: */ struct rt_mutex_waiter *pi_blocked_on; #endif diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 3bd668414f61..10ee7eeb0ee2 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -21,6 +21,7 @@ static inline int rt_task(struct task_struct *p) extern int rt_mutex_getprio(struct task_struct *p); extern void rt_mutex_setprio(struct task_struct *p, int prio); extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); +extern void rt_mutex_update_top_task(struct task_struct *p); extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) -- cgit v1.2.3 From acd58620e415aee4a43a808d7d2fd87259ee0001 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 23 Mar 2017 15:56:11 +0100 Subject: sched/rtmutex: Refactor rt_mutex_setprio() With the introduction of SCHED_DEADLINE the whole notion that priority is a single number is gone, therefore the @prio argument to rt_mutex_setprio() doesn't make sense anymore. So rework the code to pass a pi_task instead. Note this also fixes a problem with pi_top_task caching; previously we would not set the pointer (call rt_mutex_update_top_task) if the priority didn't change, this could lead to a stale pointer. As for the XXX, I think its fine to use pi_task->prio, because if it differs from waiter->prio, a PI chain update is immenent. Signed-off-by: Peter Zijlstra (Intel) Cc: juri.lelli@arm.com Cc: bigeasy@linutronix.de Cc: xlpang@redhat.com Cc: rostedt@goodmis.org Cc: mathieu.desnoyers@efficios.com Cc: jdesfossez@efficios.com Cc: bristot@redhat.com Link: http://lkml.kernel.org/r/20170323150216.303827095@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/sched/rt.h | 24 ++++++++---------------- 1 file changed, 8 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index 10ee7eeb0ee2..f93329aba31a 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -18,28 +18,20 @@ static inline int rt_task(struct task_struct *p) } #ifdef CONFIG_RT_MUTEXES -extern int rt_mutex_getprio(struct task_struct *p); -extern void rt_mutex_setprio(struct task_struct *p, int prio); -extern int rt_mutex_get_effective_prio(struct task_struct *task, int newprio); -extern void rt_mutex_update_top_task(struct task_struct *p); -extern struct task_struct *rt_mutex_get_top_task(struct task_struct *task); +/* + * Must hold either p->pi_lock or task_rq(p)->lock. + */ +static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *p) +{ + return p->pi_top_task; +} +extern void rt_mutex_setprio(struct task_struct *p, struct task_struct *pi_task); extern void rt_mutex_adjust_pi(struct task_struct *p); static inline bool tsk_is_pi_blocked(struct task_struct *tsk) { return tsk->pi_blocked_on != NULL; } #else -static inline int rt_mutex_getprio(struct task_struct *p) -{ - return p->normal_prio; -} - -static inline int rt_mutex_get_effective_prio(struct task_struct *task, - int newprio) -{ - return newprio; -} - static inline struct task_struct *rt_mutex_get_top_task(struct task_struct *task) { return NULL; -- cgit v1.2.3 From 6d56111c92d247bb64301029fe88365aa4caf16e Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Tue, 21 Mar 2017 22:05:22 +0100 Subject: KVM: arm/arm64: vgic: Fix GICC_PMR uaccess on GICv3 and clarify ABI As an oversight, for GICv2, we accidentally export the GICC_PMR register in the format of the GICH_VMCR.VMPriMask field in the lower 5 bits of a word, meaning that userspace must always use the lower 5 bits to communicate with the KVM device and must shift the value left by 3 places to obtain the actual priority mask level. Since GICv3 supports the full 8 bits of priority masking in the ICH_VMCR, we have to fix the value we export when emulating a GICv2 on top of a hardware GICv3 and exporting the emulated GICv2 state to userspace. Take the chance to clarify this aspect of the ABI. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/irqchip/arm-gic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index eafc965b3eb8..dc30f3d057eb 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -96,6 +96,9 @@ #define GICH_MISR_EOI (1 << 0) #define GICH_MISR_U (1 << 1) +#define GICV_PMR_PRIORITY_SHIFT 3 +#define GICV_PMR_PRIORITY_MASK (0x1f << GICV_PMR_PRIORITY_SHIFT) + #ifndef __ASSEMBLY__ #include -- cgit v1.2.3 From dabf54dd1c6369160f8d4c793a8613dfb4e7848a Mon Sep 17 00:00:00 2001 From: Yegor Yefremov Date: Fri, 17 Feb 2017 16:52:33 +0100 Subject: can: ti_hecc: Convert TI HECC driver to DT only driver This patch converts TI HECC driver to DT only driver. This results in removing ti_hecc.h containing now obsolete platform data. Former transceiver_switch callback function will be now modelled via regulator API. Signed-off-by: Anton Glukhov Signed-off-by: Yegor Yefremov Signed-off-by: Marc Kleine-Budde --- include/linux/can/platform/ti_hecc.h | 44 ------------------------------------ 1 file changed, 44 deletions(-) delete mode 100644 include/linux/can/platform/ti_hecc.h (limited to 'include/linux') diff --git a/include/linux/can/platform/ti_hecc.h b/include/linux/can/platform/ti_hecc.h deleted file mode 100644 index a52f47ca6c8a..000000000000 --- a/include/linux/can/platform/ti_hecc.h +++ /dev/null @@ -1,44 +0,0 @@ -#ifndef _CAN_PLATFORM_TI_HECC_H -#define _CAN_PLATFORM_TI_HECC_H - -/* - * TI HECC (High End CAN Controller) driver platform header - * - * Copyright (C) 2009 Texas Instruments Incorporated - http://www.ti.com/ - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License as - * published by the Free Software Foundation version 2. - * - * This program is distributed as is WITHOUT ANY WARRANTY of any - * kind, whether express or implied; without even the implied warranty - * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -/** - * struct hecc_platform_data - HECC Platform Data - * - * @scc_hecc_offset: mostly 0 - should really never change - * @scc_ram_offset: SCC RAM offset - * @hecc_ram_offset: HECC RAM offset - * @mbx_offset: Mailbox RAM offset - * @int_line: Interrupt line to use - 0 or 1 - * @version: version for future use - * @transceiver_switch: platform specific callback fn for transceiver control - * - * Platform data structure to get all platform specific settings. - * this structure also accounts the fact that the IP may have different - * RAM and mailbox offsets for different SOC's - */ -struct ti_hecc_platform_data { - u32 scc_hecc_offset; - u32 scc_ram_offset; - u32 hecc_ram_offset; - u32 mbx_offset; - u32 int_line; - u32 version; - void (*transceiver_switch) (int); -}; -#endif /* !_CAN_PLATFORM_TI_HECC_H */ -- cgit v1.2.3 From 8e8cda6d737d356054c9eeef642aec0e8ae7e6bc Mon Sep 17 00:00:00 2001 From: Mario Kicherer Date: Tue, 21 Feb 2017 12:19:47 +0100 Subject: can: initial support for network namespaces This patch adds initial support for network namespaces. The changes only enable support in the CAN raw, proc and af_can code. GW and BCM still have their checks that ensure that they are used only from the main namespace. The patch boils down to moving the global structures, i.e. the global filter list and their /proc stats, into a per-namespace structure and passing around the corresponding "struct net" in a lot of different places. Changes since v1: - rebased on current HEAD (2bfe01e) - fixed overlong line Signed-off-by: Mario Kicherer Signed-off-by: Marc Kleine-Budde --- include/linux/can/core.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index df08a41d5be5..319a0da827b8 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -45,12 +45,13 @@ struct can_proto { extern int can_proto_register(const struct can_proto *cp); extern void can_proto_unregister(const struct can_proto *cp); -int can_rx_register(struct net_device *dev, canid_t can_id, canid_t mask, +int can_rx_register(struct net *net, struct net_device *dev, + canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data, char *ident, struct sock *sk); -extern void can_rx_unregister(struct net_device *dev, canid_t can_id, - canid_t mask, +extern void can_rx_unregister(struct net *net, struct net_device *dev, + canid_t can_id, canid_t mask, void (*func)(struct sk_buff *, void *), void *data); -- cgit v1.2.3 From 62e24c5775ecb387a3eb33701378ccfa6dbc98ee Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Fri, 5 Feb 2016 13:41:39 +0100 Subject: reset: add exported __reset_control_get, return NULL if optional Rename the internal __reset_control_get/put functions to __reset_control_get/put_internal and add an exported __reset_control_get equivalent to __of_reset_control_get that takes a struct device parameter. This avoids the confusing call to __of_reset_control_get in the non-DT case and fixes the devm_reset_control_get_optional function to return NULL if RESET_CONTROLLER is enabled but dev->of_node == NULL. Fixes: bb475230b8e5 ("reset: make optional functions really optional") Reported-by: Andy Shevchenko Tested-by: Andy Shevchenko Cc: Ramiro Oliveira Signed-off-by: Philipp Zabel --- include/linux/reset.h | 22 ++++++++++++++-------- 1 file changed, 14 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/reset.h b/include/linux/reset.h index 96fb139bdd08..13d8681210d5 100644 --- a/include/linux/reset.h +++ b/include/linux/reset.h @@ -15,6 +15,9 @@ int reset_control_status(struct reset_control *rstc); struct reset_control *__of_reset_control_get(struct device_node *node, const char *id, int index, bool shared, bool optional); +struct reset_control *__reset_control_get(struct device *dev, const char *id, + int index, bool shared, + bool optional); void reset_control_put(struct reset_control *rstc); struct reset_control *__devm_reset_control_get(struct device *dev, const char *id, int index, bool shared, @@ -72,6 +75,13 @@ static inline struct reset_control *__of_reset_control_get( return optional ? NULL : ERR_PTR(-ENOTSUPP); } +static inline struct reset_control *__reset_control_get( + struct device *dev, const char *id, + int index, bool shared, bool optional) +{ + return optional ? NULL : ERR_PTR(-ENOTSUPP); +} + static inline struct reset_control *__devm_reset_control_get( struct device *dev, const char *id, int index, bool shared, bool optional) @@ -102,8 +112,7 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id) #ifndef CONFIG_RESET_CONTROLLER WARN_ON(1); #endif - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false, - false); + return __reset_control_get(dev, id, 0, false, false); } /** @@ -131,22 +140,19 @@ __must_check reset_control_get_exclusive(struct device *dev, const char *id) static inline struct reset_control *reset_control_get_shared( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true, - false); + return __reset_control_get(dev, id, 0, true, false); } static inline struct reset_control *reset_control_get_optional_exclusive( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, false, - true); + return __reset_control_get(dev, id, 0, false, true); } static inline struct reset_control *reset_control_get_optional_shared( struct device *dev, const char *id) { - return __of_reset_control_get(dev ? dev->of_node : NULL, id, 0, true, - true); + return __reset_control_get(dev, id, 0, true, true); } /** -- cgit v1.2.3 From b1a951fe469eb51646bf2e6d2c5f4a19fe1d4627 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Sun, 5 Feb 2017 21:47:22 +0200 Subject: net/utils: generic inet_pton_with_scope helper Several locations in the stack need to handle ipv4/ipv6 (with scope) and port strings conversion to sockaddr. Add a helper that takes either AF_INET, AF_INET6 or AF_UNSPEC (for wildcard) to centralize this handling. Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Acked-by: David S. Miller Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/inet.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/inet.h b/include/linux/inet.h index 4cca05c9678e..636ebe87e6f8 100644 --- a/include/linux/inet.h +++ b/include/linux/inet.h @@ -43,6 +43,8 @@ #define _LINUX_INET_H #include +#include +#include /* * These mimic similar macros defined in user-space for inet_ntop(3). @@ -54,4 +56,8 @@ extern __be32 in_aton(const char *str); extern int in4_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); extern int in6_pton(const char *src, int srclen, u8 *dst, int delim, const char **end); + +extern int inet_pton_with_scope(struct net *net, unsigned short af, + const char *src, const char *port, struct sockaddr_storage *addr); + #endif /* _LINUX_INET_H */ -- cgit v1.2.3 From 0f222ccce359d21f927d07df2069e7029497b790 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 23 Mar 2017 20:41:22 -0700 Subject: nvme_fc: Sync FC-NVME header with standard Update FC-NVME definitions to match FC-NVME r1.14 (16-020vB) plus change voted in by 2/22 FC-NVME Adhoc (see HOSTID below). Includes the following: - Addition of "status_code" field to ERSP IU - Addition of FC-NVME LS RJT reason_codes and reason_explanations - CreateAssociation payload, HostID field shortened to 16 bytes Signed-off-by: James Smart Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/nvme-fc.h | 68 ++++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 59 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc.h b/include/linux/nvme-fc.h index 4b45226bd604..e997c4a49a88 100644 --- a/include/linux/nvme-fc.h +++ b/include/linux/nvme-fc.h @@ -16,8 +16,7 @@ */ /* - * This file contains definitions relative to FC-NVME r1.11 and a few - * newer items + * This file contains definitions relative to FC-NVME r1.14 (16-020vB). */ #ifndef _NVME_FC_H @@ -47,8 +46,15 @@ struct nvme_fc_cmd_iu { #define NVME_FC_SIZEOF_ZEROS_RSP 12 +enum { + FCNVME_SC_SUCCESS = 0, + FCNVME_SC_INVALID_FIELD = 1, + FCNVME_SC_INVALID_CONNID = 2, +}; + struct nvme_fc_ersp_iu { - __u8 rsvd0[2]; + __u8 status_code; + __u8 rsvd1; __be16 iu_len; __be32 rsn; __be32 xfrd_len; @@ -58,7 +64,7 @@ struct nvme_fc_ersp_iu { }; -/* FC-NVME r1.03/16-119v0 NVME Link Services */ +/* FC-NVME Link Services */ enum { FCNVME_LS_RSVD = 0, FCNVME_LS_RJT = 1, @@ -68,7 +74,7 @@ enum { FCNVME_LS_DISCONNECT = 5, }; -/* FC-NVME r1.03/16-119v0 NVME Link Service Descriptors */ +/* FC-NVME Link Service Descriptors */ enum { FCNVME_LSDESC_RSVD = 0x0, FCNVME_LSDESC_RQST = 0x1, @@ -92,7 +98,6 @@ static inline __be32 fcnvme_lsdesc_len(size_t sz) return cpu_to_be32(sz - (2 * sizeof(u32))); } - struct fcnvme_ls_rqst_w0 { u8 ls_cmd; /* FCNVME_LS_xxx */ u8 zeros[3]; @@ -106,8 +111,53 @@ struct fcnvme_lsdesc_rqst { __be32 rsvd12; }; +/* FC-NVME LS RJT reason_code values */ +enum fcnvme_ls_rjt_reason { + FCNVME_RJT_RC_NONE = 0, + /* no reason - not to be sent */ + + FCNVME_RJT_RC_INVAL = 0x01, + /* invalid NVMe_LS command code */ + + FCNVME_RJT_RC_LOGIC = 0x03, + /* logical error */ + + FCNVME_RJT_RC_UNAB = 0x09, + /* unable to perform command request */ + + FCNVME_RJT_RC_UNSUP = 0x0b, + /* command not supported */ + + FCNVME_RJT_RC_INPROG = 0x0e, + /* command already in progress */ + FCNVME_RJT_RC_INV_ASSOC = 0x40, + /* Invalid Association ID*/ + FCNVME_RJT_RC_INV_CONN = 0x41, + /* Invalid Connection ID*/ + + FCNVME_RJT_RC_VENDOR = 0xff, + /* vendor specific error */ +}; + +/* FC-NVME LS RJT reason_explanation values */ +enum fcnvme_ls_rjt_explan { + FCNVME_RJT_EXP_NONE = 0x00, + /* No additional explanation */ + + FCNVME_RJT_EXP_OXID_RXID = 0x17, + /* invalid OX_ID-RX_ID combination */ + + FCNVME_RJT_EXP_INSUF_RES = 0x29, + /* insufficient resources */ + + FCNVME_RJT_EXP_UNAB_DATA = 0x2a, + /* unable to supply requested data */ + + FCNVME_RJT_EXP_INV_LEN = 0x2d, + /* Invalid payload length */ +}; /* FCNVME_LSDESC_RJT */ struct fcnvme_lsdesc_rjt { @@ -119,15 +169,15 @@ struct fcnvme_lsdesc_rjt { * Reject reason and explanaction codes are generic * to ELs's from LS-3. */ - u8 reason_code; - u8 reason_explanation; + u8 reason_code; /* fcnvme_ls_rjt_reason */ + u8 reason_explanation; /* fcnvme_ls_rjt_explan */ u8 vendor; __be32 rsvd12; }; -#define FCNVME_ASSOC_HOSTID_LEN 64 +#define FCNVME_ASSOC_HOSTID_LEN 16 #define FCNVME_ASSOC_HOSTNQN_LEN 256 #define FCNVME_ASSOC_SUBNQN_LEN 256 -- cgit v1.2.3 From 62eeacb0e04f2aff7099a7765f386bb7ba53d5e2 Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 23 Mar 2017 20:41:27 -0700 Subject: nvme_fc: Clean up host fcpio done status handling As Dan Carpenter pointed out: mixing 16-bit nvme status with 32-bit error status from driver. Corrected comment on fcp request struct status field, and converted done routine to explicitly set nvme status codes for nvme status. Signed-off-by: James Smart Reported-by: Dan Carpenter Reviewed-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/nvme-fc-driver.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index f21471f7ee40..16eb264980c2 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -137,9 +137,9 @@ enum nvmefc_fcp_datadir { * transferred. Should equal payload_length on success. * @rcv_rsplen: length, in bytes, of the FCP RSP IU received. * @status: Completion status of the FCP operation. must be 0 upon success, - * NVME_SC_FC_xxx value upon failure. Note: this is NOT a - * reflection of the NVME CQE completion status. Only the status - * of the FCP operation at the NVME-FC level. + * negative errno value upon failure (ex: -EIO). Note: this is + * NOT a reflection of the NVME CQE completion status. Only the + * status of the FCP operation at the NVME-FC level. */ struct nvmefc_fcp_req { void *cmdaddr; -- cgit v1.2.3 From a5ea7a0fcbd7376b2c9fcb15fe59fec298c9ce9f Mon Sep 17 00:00:00 2001 From: Dave Gerlach Date: Tue, 4 Apr 2017 08:51:29 -0700 Subject: PM / Domains: Add generic data pointer to genpd data struct Add a void *data pointer to struct generic_pm_domain_data. Because this exists for each device associated with a genpd it will allow us to assign per-device data if needed on a platform for control of that specific device. Acked-by: Ulf Hansson Acked-by: Kevin Hilman Signed-off-by: Dave Gerlach Signed-off-by: Santosh Shilimkar --- include/linux/pm_domain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 5339ed5bd6f9..b213d22daefd 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -117,6 +117,7 @@ struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_timing_data td; struct notifier_block nb; + void *data; }; #ifdef CONFIG_PM_GENERIC_DOMAINS -- cgit v1.2.3 From bf616d21f41174389c6d720ae21bf40f154474c8 Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 4 Apr 2017 16:54:21 +0100 Subject: Annotate module params that specify hardware parameters (eg. ioport) Provided an annotation for module parameters that specify hardware parameters (such as io ports, iomem addresses, irqs, dma channels, fixed dma buffers and other types). This will enable such parameters to be locked down in the core parameter parser for secure boot support. I've also included annotations as to what sort of hardware configuration each module is dealing with for future use. Some of these are straightforward (ioport, iomem, irq, dma), but there are also: (1) drivers that switch the semantics of a parameter between ioport and iomem depending on a second parameter, (2) drivers that appear to reserve a CPU memory buffer at a fixed address, (3) other parameters, such as bus types and irq selection bitmasks. For the moment, the hardware configuration type isn't actually stored, though its validity is checked. Signed-off-by: David Howells --- include/linux/moduleparam.h | 65 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 64 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 52666d90ca94..6be1949ebcdf 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -60,9 +60,11 @@ struct kernel_param_ops { * Flags available for kernel_param * * UNSAFE - the parameter is dangerous and setting it will taint the kernel + * HWPARAM - Hardware param not permitted in lockdown mode */ enum { - KERNEL_PARAM_FL_UNSAFE = (1 << 0) + KERNEL_PARAM_FL_UNSAFE = (1 << 0), + KERNEL_PARAM_FL_HWPARAM = (1 << 1), }; struct kernel_param { @@ -451,6 +453,67 @@ extern int param_set_bint(const char *val, const struct kernel_param *kp); perm, -1, 0); \ __MODULE_PARM_TYPE(name, "array of " #type) +enum hwparam_type { + hwparam_ioport, /* Module parameter configures an I/O port */ + hwparam_iomem, /* Module parameter configures an I/O mem address */ + hwparam_ioport_or_iomem, /* Module parameter could be either, depending on other option */ + hwparam_irq, /* Module parameter configures an I/O port */ + hwparam_dma, /* Module parameter configures a DMA channel */ + hwparam_dma_addr, /* Module parameter configures a DMA buffer address */ + hwparam_other, /* Module parameter configures some other value */ +}; + +/** + * module_param_hw_named - A parameter representing a hw parameters + * @name: a valid C identifier which is the parameter name. + * @value: the actual lvalue to alter. + * @type: the type of the parameter + * @hwtype: what the value represents (enum hwparam_type) + * @perm: visibility in sysfs. + * + * Usually it's a good idea to have variable names and user-exposed names the + * same, but that's harder if the variable must be non-static or is inside a + * structure. This allows exposure under a different name. + */ +#define module_param_hw_named(name, value, type, hwtype, perm) \ + param_check_##type(name, &(value)); \ + __module_param_call(MODULE_PARAM_PREFIX, name, \ + ¶m_ops_##type, &value, \ + perm, -1, \ + KERNEL_PARAM_FL_HWPARAM | (hwparam_##hwtype & 0)); \ + __MODULE_PARM_TYPE(name, #type) + +#define module_param_hw(name, type, hwtype, perm) \ + module_param_hw_named(name, name, type, hwtype, perm) + +/** + * module_param_hw_array - A parameter representing an array of hw parameters + * @name: the name of the array variable + * @type: the type, as per module_param() + * @hwtype: what the value represents (enum hwparam_type) + * @nump: optional pointer filled in with the number written + * @perm: visibility in sysfs + * + * Input and output are as comma-separated values. Commas inside values + * don't work properly (eg. an array of charp). + * + * ARRAY_SIZE(@name) is used to determine the number of elements in the + * array, so the definition must be visible. + */ +#define module_param_hw_array(name, type, hwtype, nump, perm) \ + param_check_##type(name, &(name)[0]); \ + static const struct kparam_array __param_arr_##name \ + = { .max = ARRAY_SIZE(name), .num = nump, \ + .ops = ¶m_ops_##type, \ + .elemsize = sizeof(name[0]), .elem = name }; \ + __module_param_call(MODULE_PARAM_PREFIX, name, \ + ¶m_array_ops, \ + .arr = &__param_arr_##name, \ + perm, -1, \ + KERNEL_PARAM_FL_HWPARAM | (hwparam_##hwtype & 0)); \ + __MODULE_PARM_TYPE(name, "array of " #type) + + extern const struct kernel_param_ops param_array_ops; extern const struct kernel_param_ops param_ops_string; -- cgit v1.2.3 From 65c2e69b3ccaa359032cfc35c4dbb8d235f63e5b Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 23 Mar 2017 19:00:44 +0000 Subject: include: pe.h: allow for use in assembly Some of the definitions in include/linux/pe.h would be useful for the EFI stub headers, where values are currently open-coded. Unfortunately they cannot be used as some structures are also defined in pe.h without !__ASSEMBLY__ guards. This patch moves the structure definitions into an #ifdef __ASSEMBLY__ block, so that the common value definitions can be used from assembly. Signed-off-by: Mark Rutland Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- include/linux/pe.h | 174 +++++++++++++++++++++++++++-------------------------- 1 file changed, 89 insertions(+), 85 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pe.h b/include/linux/pe.h index e170b95e763b..a8a594117df3 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -23,34 +23,6 @@ #define MZ_MAGIC 0x5a4d /* "MZ" */ -struct mz_hdr { - uint16_t magic; /* MZ_MAGIC */ - uint16_t lbsize; /* size of last used block */ - uint16_t blocks; /* pages in file, 0x3 */ - uint16_t relocs; /* relocations */ - uint16_t hdrsize; /* header size in "paragraphs" */ - uint16_t min_extra_pps; /* .bss */ - uint16_t max_extra_pps; /* runtime limit for the arena size */ - uint16_t ss; /* relative stack segment */ - uint16_t sp; /* initial %sp register */ - uint16_t checksum; /* word checksum */ - uint16_t ip; /* initial %ip register */ - uint16_t cs; /* initial %cs relative to load segment */ - uint16_t reloc_table_offset; /* offset of the first relocation */ - uint16_t overlay_num; /* overlay number. set to 0. */ - uint16_t reserved0[4]; /* reserved */ - uint16_t oem_id; /* oem identifier */ - uint16_t oem_info; /* oem specific */ - uint16_t reserved1[10]; /* reserved */ - uint32_t peaddr; /* address of pe header */ - char message[64]; /* message to print */ -}; - -struct mz_reloc { - uint16_t offset; - uint16_t segment; -}; - #define PE_MAGIC 0x00004550 /* "PE\0\0" */ #define PE_OPT_MAGIC_PE32 0x010b #define PE_OPT_MAGIC_PE32_ROM 0x0107 @@ -98,17 +70,6 @@ struct mz_reloc { #define IMAGE_FILE_UP_SYSTEM_ONLY 0x4000 #define IMAGE_FILE_BYTES_REVERSED_HI 0x8000 -struct pe_hdr { - uint32_t magic; /* PE magic */ - uint16_t machine; /* machine type */ - uint16_t sections; /* number of sections */ - uint32_t timestamp; /* time_t */ - uint32_t symbol_table; /* symbol table offset */ - uint32_t symbols; /* number of symbols */ - uint16_t opt_hdr_size; /* size of optional header */ - uint16_t flags; /* flags */ -}; - #define IMAGE_FILE_OPT_ROM_MAGIC 0x107 #define IMAGE_FILE_OPT_PE32_MAGIC 0x10b #define IMAGE_FILE_OPT_PE32_PLUS_MAGIC 0x20b @@ -134,6 +95,93 @@ struct pe_hdr { #define IMAGE_DLLCHARACTERISTICS_WDM_DRIVER 0x2000 #define IMAGE_DLLCHARACTERISTICS_TERMINAL_SERVER_AWARE 0x8000 +/* they actually defined 0x00000000 as well, but I think we'll skip that one. */ +#define IMAGE_SCN_RESERVED_0 0x00000001 +#define IMAGE_SCN_RESERVED_1 0x00000002 +#define IMAGE_SCN_RESERVED_2 0x00000004 +#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 /* don't pad - obsolete */ +#define IMAGE_SCN_RESERVED_3 0x00000010 +#define IMAGE_SCN_CNT_CODE 0x00000020 /* .text */ +#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */ +#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */ +#define IMAGE_SCN_LNK_OTHER 0x00000100 /* reserved */ +#define IMAGE_SCN_LNK_INFO 0x00000200 /* .drectve comments */ +#define IMAGE_SCN_RESERVED_4 0x00000400 +#define IMAGE_SCN_LNK_REMOVE 0x00000800 /* .o only - scn to be rm'd*/ +#define IMAGE_SCN_LNK_COMDAT 0x00001000 /* .o only - COMDAT data */ +#define IMAGE_SCN_RESERVED_5 0x00002000 /* spec omits this */ +#define IMAGE_SCN_RESERVED_6 0x00004000 /* spec omits this */ +#define IMAGE_SCN_GPREL 0x00008000 /* global pointer referenced data */ +/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */ +#define IMAGE_SCN_MEM_PURGEABLE 0x00010000 /* reserved for "future" use */ +#define IMAGE_SCN_16BIT 0x00020000 /* reserved for "future" use */ +#define IMAGE_SCN_LOCKED 0x00040000 /* reserved for "future" use */ +#define IMAGE_SCN_PRELOAD 0x00080000 /* reserved for "future" use */ +/* and here they just stuck a 1-byte integer in the middle of a bitfield */ +#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 /* it does what it says on the box */ +#define IMAGE_SCN_ALIGN_2BYTES 0x00200000 +#define IMAGE_SCN_ALIGN_4BYTES 0x00300000 +#define IMAGE_SCN_ALIGN_8BYTES 0x00400000 +#define IMAGE_SCN_ALIGN_16BYTES 0x00500000 +#define IMAGE_SCN_ALIGN_32BYTES 0x00600000 +#define IMAGE_SCN_ALIGN_64BYTES 0x00700000 +#define IMAGE_SCN_ALIGN_128BYTES 0x00800000 +#define IMAGE_SCN_ALIGN_256BYTES 0x00900000 +#define IMAGE_SCN_ALIGN_512BYTES 0x00a00000 +#define IMAGE_SCN_ALIGN_1024BYTES 0x00b00000 +#define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000 +#define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000 +#define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000 +#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */ +#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */ +#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */ +#define IMAGE_SCN_MEM_NOT_PAGED 0x08000000 /* not pageable */ +#define IMAGE_SCN_MEM_SHARED 0x10000000 /* can be shared */ +#define IMAGE_SCN_MEM_EXECUTE 0x20000000 /* can be executed as code */ +#define IMAGE_SCN_MEM_READ 0x40000000 /* readable */ +#define IMAGE_SCN_MEM_WRITE 0x80000000 /* writeable */ + +#ifndef __ASSEMBLY__ + +struct mz_hdr { + uint16_t magic; /* MZ_MAGIC */ + uint16_t lbsize; /* size of last used block */ + uint16_t blocks; /* pages in file, 0x3 */ + uint16_t relocs; /* relocations */ + uint16_t hdrsize; /* header size in "paragraphs" */ + uint16_t min_extra_pps; /* .bss */ + uint16_t max_extra_pps; /* runtime limit for the arena size */ + uint16_t ss; /* relative stack segment */ + uint16_t sp; /* initial %sp register */ + uint16_t checksum; /* word checksum */ + uint16_t ip; /* initial %ip register */ + uint16_t cs; /* initial %cs relative to load segment */ + uint16_t reloc_table_offset; /* offset of the first relocation */ + uint16_t overlay_num; /* overlay number. set to 0. */ + uint16_t reserved0[4]; /* reserved */ + uint16_t oem_id; /* oem identifier */ + uint16_t oem_info; /* oem specific */ + uint16_t reserved1[10]; /* reserved */ + uint32_t peaddr; /* address of pe header */ + char message[64]; /* message to print */ +}; + +struct mz_reloc { + uint16_t offset; + uint16_t segment; +}; + +struct pe_hdr { + uint32_t magic; /* PE magic */ + uint16_t machine; /* machine type */ + uint16_t sections; /* number of sections */ + uint32_t timestamp; /* time_t */ + uint32_t symbol_table; /* symbol table offset */ + uint32_t symbols; /* number of symbols */ + uint16_t opt_hdr_size; /* size of optional header */ + uint16_t flags; /* flags */ +}; + /* the fact that pe32 isn't padded where pe32+ is 64-bit means union won't * work right. vomit. */ struct pe32_opt_hdr { @@ -243,52 +291,6 @@ struct section_header { uint32_t flags; }; -/* they actually defined 0x00000000 as well, but I think we'll skip that one. */ -#define IMAGE_SCN_RESERVED_0 0x00000001 -#define IMAGE_SCN_RESERVED_1 0x00000002 -#define IMAGE_SCN_RESERVED_2 0x00000004 -#define IMAGE_SCN_TYPE_NO_PAD 0x00000008 /* don't pad - obsolete */ -#define IMAGE_SCN_RESERVED_3 0x00000010 -#define IMAGE_SCN_CNT_CODE 0x00000020 /* .text */ -#define IMAGE_SCN_CNT_INITIALIZED_DATA 0x00000040 /* .data */ -#define IMAGE_SCN_CNT_UNINITIALIZED_DATA 0x00000080 /* .bss */ -#define IMAGE_SCN_LNK_OTHER 0x00000100 /* reserved */ -#define IMAGE_SCN_LNK_INFO 0x00000200 /* .drectve comments */ -#define IMAGE_SCN_RESERVED_4 0x00000400 -#define IMAGE_SCN_LNK_REMOVE 0x00000800 /* .o only - scn to be rm'd*/ -#define IMAGE_SCN_LNK_COMDAT 0x00001000 /* .o only - COMDAT data */ -#define IMAGE_SCN_RESERVED_5 0x00002000 /* spec omits this */ -#define IMAGE_SCN_RESERVED_6 0x00004000 /* spec omits this */ -#define IMAGE_SCN_GPREL 0x00008000 /* global pointer referenced data */ -/* spec lists 0x20000 twice, I suspect they meant 0x10000 for one of them */ -#define IMAGE_SCN_MEM_PURGEABLE 0x00010000 /* reserved for "future" use */ -#define IMAGE_SCN_16BIT 0x00020000 /* reserved for "future" use */ -#define IMAGE_SCN_LOCKED 0x00040000 /* reserved for "future" use */ -#define IMAGE_SCN_PRELOAD 0x00080000 /* reserved for "future" use */ -/* and here they just stuck a 1-byte integer in the middle of a bitfield */ -#define IMAGE_SCN_ALIGN_1BYTES 0x00100000 /* it does what it says on the box */ -#define IMAGE_SCN_ALIGN_2BYTES 0x00200000 -#define IMAGE_SCN_ALIGN_4BYTES 0x00300000 -#define IMAGE_SCN_ALIGN_8BYTES 0x00400000 -#define IMAGE_SCN_ALIGN_16BYTES 0x00500000 -#define IMAGE_SCN_ALIGN_32BYTES 0x00600000 -#define IMAGE_SCN_ALIGN_64BYTES 0x00700000 -#define IMAGE_SCN_ALIGN_128BYTES 0x00800000 -#define IMAGE_SCN_ALIGN_256BYTES 0x00900000 -#define IMAGE_SCN_ALIGN_512BYTES 0x00a00000 -#define IMAGE_SCN_ALIGN_1024BYTES 0x00b00000 -#define IMAGE_SCN_ALIGN_2048BYTES 0x00c00000 -#define IMAGE_SCN_ALIGN_4096BYTES 0x00d00000 -#define IMAGE_SCN_ALIGN_8192BYTES 0x00e00000 -#define IMAGE_SCN_LNK_NRELOC_OVFL 0x01000000 /* extended relocations */ -#define IMAGE_SCN_MEM_DISCARDABLE 0x02000000 /* scn can be discarded */ -#define IMAGE_SCN_MEM_NOT_CACHED 0x04000000 /* cannot be cached */ -#define IMAGE_SCN_MEM_NOT_PAGED 0x08000000 /* not pageable */ -#define IMAGE_SCN_MEM_SHARED 0x10000000 /* can be shared */ -#define IMAGE_SCN_MEM_EXECUTE 0x20000000 /* can be executed as code */ -#define IMAGE_SCN_MEM_READ 0x40000000 /* readable */ -#define IMAGE_SCN_MEM_WRITE 0x80000000 /* writeable */ - enum x64_coff_reloc_type { IMAGE_REL_AMD64_ABSOLUTE = 0, IMAGE_REL_AMD64_ADDR64, @@ -445,4 +447,6 @@ struct win_certificate { uint16_t cert_type; }; +#endif /* !__ASSEMBLY__ */ + #endif /* __LINUX_PE_H */ -- cgit v1.2.3 From 6f5541ba0eed842445a99b411d0f34103bcbbea1 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Thu, 23 Mar 2017 19:00:45 +0000 Subject: include: pe.h: add some missing definitions Add the missing IMAGE_FILE_MACHINE_ARM64 and IMAGE_DEBUG_TYPE_CODEVIEW definitions. We'll need them for the arm64 EFI stub... Signed-off-by: Mark Rutland [ardb: add IMAGE_DEBUG_TYPE_CODEVIEW as well] Signed-off-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- include/linux/pe.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pe.h b/include/linux/pe.h index a8a594117df3..143ce75be5f0 100644 --- a/include/linux/pe.h +++ b/include/linux/pe.h @@ -34,6 +34,7 @@ #define IMAGE_FILE_MACHINE_AMD64 0x8664 #define IMAGE_FILE_MACHINE_ARM 0x01c0 #define IMAGE_FILE_MACHINE_ARMV7 0x01c4 +#define IMAGE_FILE_MACHINE_ARM64 0xaa64 #define IMAGE_FILE_MACHINE_EBC 0x0ebc #define IMAGE_FILE_MACHINE_I386 0x014c #define IMAGE_FILE_MACHINE_IA64 0x0200 @@ -141,6 +142,8 @@ #define IMAGE_SCN_MEM_READ 0x40000000 /* readable */ #define IMAGE_SCN_MEM_WRITE 0x80000000 /* writeable */ +#define IMAGE_DEBUG_TYPE_CODEVIEW 2 + #ifndef __ASSEMBLY__ struct mz_hdr { -- cgit v1.2.3 From 2b6aa412ff23a02ac777ad307249c60a839cfd25 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Wed, 31 Aug 2016 16:05:43 -0700 Subject: KEYS: Use structure to capture key restriction function and data Replace struct key's restrict_link function pointer with a pointer to the new struct key_restriction. The structure contains pointers to the restriction function as well as relevant data for evaluating the restriction. The garbage collector checks restrict_link->keytype when key types are unregistered. Restrictions involving a removed key type are converted to use restrict_link_reject so that restrictions cannot be removed by unregistering key types. Signed-off-by: Mat Martineau --- include/linux/key.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index a06649f3223d..d2916363689c 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -217,7 +217,7 @@ struct key { }; /* This is set on a keyring to restrict the addition of a link to a key - * to it. If this method isn't provided then it is assumed that the + * to it. If this structure isn't provided then it is assumed that the * keyring is open to any addition. It is ignored for non-keyring * keys. * @@ -226,7 +226,7 @@ struct key { * overrides this, allowing the kernel to add extra keys without * restriction. */ - key_restrict_link_func_t restrict_link; + struct key_restriction *restrict_link; }; extern struct key *key_alloc(struct key_type *type, @@ -235,7 +235,7 @@ extern struct key *key_alloc(struct key_type *type, const struct cred *cred, key_perm_t perm, unsigned long flags, - key_restrict_link_func_t restrict_link); + struct key_restriction *restrict_link); #define KEY_ALLOC_IN_QUOTA 0x0000 /* add to quota, reject if would overrun */ @@ -311,7 +311,7 @@ extern struct key *keyring_alloc(const char *description, kuid_t uid, kgid_t gid const struct cred *cred, key_perm_t perm, unsigned long flags, - key_restrict_link_func_t restrict_link, + struct key_restriction *restrict_link, struct key *dest); extern int restrict_link_reject(struct key *keyring, -- cgit v1.2.3 From efba797b977c99bc6e0c301299272c80fb8b287f Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Fri, 6 May 2016 15:38:17 -0700 Subject: KEYS: Add an optional lookup_restriction hook to key_type The restrict_link functions used to validate keys as they are linked to a keyring can be associated with specific key types. Each key type may be loaded (or not) at runtime, so lookup of restrict_link functions needs to be part of the key type implementation to ensure that the requested keys can be examined. Signed-off-by: Mat Martineau --- include/linux/key-type.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/key-type.h b/include/linux/key-type.h index eaee981c5558..8496cf64575c 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -147,6 +147,14 @@ struct key_type { */ request_key_actor_t request_key; + /* Look up a keyring access restriction (optional) + * + * - NULL is a valid return value (meaning the requested restriction + * is known but will never block addition of a key) + * - should return -EINVAL if the restriction is unknown + */ + struct key_restriction *(*lookup_restriction)(const char *params); + /* internal fields */ struct list_head link; /* link in types list */ struct lock_class_key lock_class; /* key->sem lock class */ -- cgit v1.2.3 From 6563c91fd645556c7801748f15bc727c77fcd311 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Wed, 1 Mar 2017 16:44:09 -0800 Subject: KEYS: Add KEYCTL_RESTRICT_KEYRING Keyrings recently gained restrict_link capabilities that allow individual keys to be validated prior to linking. This functionality was only available using internal kernel APIs. With the KEYCTL_RESTRICT_KEYRING command existing keyrings can be configured to check the content of keys before they are linked, and then allow or disallow linkage of that key to the keyring. To restrict a keyring, call: keyctl(KEYCTL_RESTRICT_KEYRING, key_serial_t keyring, const char *type, const char *restriction) where 'type' is the name of a registered key type and 'restriction' is a string describing how key linkage is to be restricted. The restriction option syntax is specific to each key type. Signed-off-by: Mat Martineau --- include/linux/key.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index d2916363689c..0c9b93b0d1f7 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -219,7 +219,8 @@ struct key { /* This is set on a keyring to restrict the addition of a link to a key * to it. If this structure isn't provided then it is assumed that the * keyring is open to any addition. It is ignored for non-keyring - * keys. + * keys. Only set this value using keyring_restrict(), keyring_alloc(), + * or key_alloc(). * * This is intended for use with rings of trusted keys whereby addition * to the keyring needs to be controlled. KEY_ALLOC_BYPASS_RESTRICTION @@ -328,6 +329,9 @@ extern key_ref_t keyring_search(key_ref_t keyring, extern int keyring_add_key(struct key *keyring, struct key *key); +extern int keyring_restrict(key_ref_t keyring, const char *type, + const char *restriction); + extern struct key *key_lookup(key_serial_t id); static inline key_serial_t key_serial(const struct key *key) -- cgit v1.2.3 From 96dc4f9fb64690fc34410415fd1fc609cf803f61 Mon Sep 17 00:00:00 2001 From: Sahara Date: Thu, 16 Feb 2017 18:29:15 +0000 Subject: usercopy: Move enum for arch_within_stack_frames() This patch moves the arch_within_stack_frames() return value enum up in the header files so that per-architecture implementations can reuse the same return values. Signed-off-by: Sahara Signed-off-by: James Morse [kees: adjusted naming and commit log] Signed-off-by: Kees Cook --- include/linux/thread_info.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 58373875e8ee..0dbe41be6181 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -22,6 +22,18 @@ #endif #include + +/* + * For per-arch arch_within_stack_frames() implementations, defined in + * asm/thread_info.h. + */ +enum { + BAD_STACK = -1, + NOT_STACK = 0, + GOOD_FRAME, + GOOD_STACK, +}; + #include #ifdef __KERNEL__ -- cgit v1.2.3 From f1c316a3ab9d24df6022682422fe897492f2c0c8 Mon Sep 17 00:00:00 2001 From: Stephan Mueller Date: Fri, 19 Aug 2016 20:39:09 +0200 Subject: KEYS: add SP800-56A KDF support for DH SP800-56A defines the use of DH with key derivation function based on a counter. The input to the KDF is defined as (DH shared secret || other information). The value for the "other information" is to be provided by the caller. The KDF is implemented using the hash support from the kernel crypto API. The implementation uses the symmetric hash support as the input to the hash operation is usually very small. The caller is allowed to specify the hash name that he wants to use to derive the key material allowing the use of all supported hashes provided with the kernel crypto API. As the KDF implements the proper truncation of the DH shared secret to the requested size, this patch fills the caller buffer up to its size. The patch is tested with a new test added to the keyutils user space code which uses a CAVS test vector testing the compliance with SP800-56A. Signed-off-by: Stephan Mueller Signed-off-by: David Howells --- include/linux/compat.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index aef47be2a5c1..993c87182e02 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -295,6 +295,13 @@ struct compat_old_sigaction { }; #endif +struct compat_keyctl_kdf_params { + compat_uptr_t hashname; + compat_uptr_t otherinfo; + __u32 otherinfolen; + __u32 __spare[8]; +}; + struct compat_statfs; struct compat_statfs64; struct compat_old_linux_dirent; -- cgit v1.2.3 From 9b3fe6796d7c0e0c2b87243ce0c7f4744c54efad Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Tue, 28 Mar 2017 08:42:49 -0700 Subject: PCI: imx6: Add code to support i.MX7D Add various bits of code needed to support i.MX7D variant of the IP. Signed-off-by: Andrey Smirnov Signed-off-by: Bjorn Helgaas Reviewed-by: Lucas Stach Acked-by: Lee Jones Acked-by: Rob Herring Cc: yurovsky@gmail.com Cc: Mark Rutland Cc: Fabio Estevam Cc: Dong Aisheng Cc: linux-arm-kernel@lists.infradead.org Cc: devicetree@vger.kernel.org --- include/linux/mfd/syscon/imx7-iomuxc-gpr.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx7-iomuxc-gpr.h b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h index 4585d6105d68..abbd52466573 100644 --- a/include/linux/mfd/syscon/imx7-iomuxc-gpr.h +++ b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h @@ -44,4 +44,8 @@ #define IMX7D_GPR5_CSI_MUX_CONTROL_MIPI (0x1 << 4) +#define IMX7D_GPR12_PCIE_PHY_REFCLK_SEL BIT(5) + +#define IMX7D_GPR22_PCIE_PHY_PLL_LOCKED BIT(31) + #endif /* __LINUX_IMX7_IOMUXC_GPR_H */ -- cgit v1.2.3 From e7f6ccaab127147c52ac4b624c54ad0059bd08e9 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 28 Mar 2017 12:36:35 +0300 Subject: NFC: pn544: Get rid of platform data Legacy platform data must go away. We are on the safe side here since there are no users of it in the kernel. If anyone by any odd reason needs it the GPIO lookup tables and built-in device properties at your service. Signed-off-by: Andy Shevchenko Signed-off-by: Samuel Ortiz --- include/linux/platform_data/pn544.h | 43 ------------------------------------- 1 file changed, 43 deletions(-) delete mode 100644 include/linux/platform_data/pn544.h (limited to 'include/linux') diff --git a/include/linux/platform_data/pn544.h b/include/linux/platform_data/pn544.h deleted file mode 100644 index 5ce1ab983f44..000000000000 --- a/include/linux/platform_data/pn544.h +++ /dev/null @@ -1,43 +0,0 @@ -/* - * Driver include for the PN544 NFC chip. - * - * Copyright (C) Nokia Corporation - * - * Author: Jari Vanhala - * Contact: Matti Aaltoenn - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef _PN544_H_ -#define _PN544_H_ - -#include - -enum { - NFC_GPIO_ENABLE, - NFC_GPIO_FW_RESET, - NFC_GPIO_IRQ -}; - -/* board config */ -struct pn544_nfc_platform_data { - int (*request_resources) (struct i2c_client *client); - void (*free_resources) (void); - void (*enable) (int fw); - int (*test) (void); - void (*disable) (void); - int (*get_gpio)(int type); -}; - -#endif /* _PN544_H_ */ -- cgit v1.2.3 From 79557b33cca2fa005235b45ab16b81f95f441bd8 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 7 Mar 2017 12:25:43 +0200 Subject: NFC: st21nfca: Get rid of platform data Legacy platform data must go away. We are on the safe side here since there are no users of it in the kernel. If anyone by any odd reason needs it the GPIO lookup tables and built-in device properties at your service. Signed-off-by: Andy Shevchenko Signed-off-by: Samuel Ortiz --- include/linux/platform_data/st21nfca.h | 33 --------------------------------- 1 file changed, 33 deletions(-) delete mode 100644 include/linux/platform_data/st21nfca.h (limited to 'include/linux') diff --git a/include/linux/platform_data/st21nfca.h b/include/linux/platform_data/st21nfca.h deleted file mode 100644 index cc2bdafb0c69..000000000000 --- a/include/linux/platform_data/st21nfca.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * Driver include for the ST21NFCA NFC chip. - * - * Copyright (C) 2014 STMicroelectronics SAS. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, see . - */ - -#ifndef _ST21NFCA_HCI_H_ -#define _ST21NFCA_HCI_H_ - -#include - -#define ST21NFCA_HCI_DRIVER_NAME "st21nfca_hci" - -struct st21nfca_nfc_platform_data { - unsigned int gpio_ena; - unsigned int irq_polarity; - bool is_ese_present; - bool is_uicc_present; -}; - -#endif /* _ST21NFCA_HCI_H_ */ -- cgit v1.2.3 From 6e7300cff1c410dde7ac4354b6a0a8cb0a561e54 Mon Sep 17 00:00:00 2001 From: Bhupesh Sharma Date: Tue, 4 Apr 2017 17:02:41 +0100 Subject: efi/bgrt: Enable ACPI BGRT handling on arm64 Now that the ACPI BGRT handling code has been made generic, we can enable it for arm64. Signed-off-by: Bhupesh Sharma [ Updated commit log to reflect that BGRT is only enabled for arm64, and added missing 'return' statement to the dummy acpi_parse_bgrt() function. ] Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Link: http://lkml.kernel.org/r/20170404160245.27812-8-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi-bgrt.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi-bgrt.h b/include/linux/efi-bgrt.h index 2fd3993c370b..e6f624b53c3d 100644 --- a/include/linux/efi-bgrt.h +++ b/include/linux/efi-bgrt.h @@ -6,6 +6,7 @@ #ifdef CONFIG_ACPI_BGRT void efi_bgrt_init(struct acpi_table_header *table); +int __init acpi_parse_bgrt(struct acpi_table_header *table); /* The BGRT data itself; only valid if bgrt_image != NULL. */ extern size_t bgrt_image_size; @@ -14,6 +15,10 @@ extern struct acpi_table_bgrt bgrt_tab; #else /* !CONFIG_ACPI_BGRT */ static inline void efi_bgrt_init(struct acpi_table_header *table) {} +static inline int __init acpi_parse_bgrt(struct acpi_table_header *table) +{ + return 0; +} #endif /* !CONFIG_ACPI_BGRT */ -- cgit v1.2.3 From 60f38de7a8d4e816100ceafd1b382df52527bd50 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Apr 2017 17:09:08 +0100 Subject: efi/libstub: Unify command line param parsing Merge the parsing of the command line carried out in arm-stub.c with the handling in efi_parse_options(). Note that this also fixes the missing handling of CONFIG_CMDLINE_FORCE=y, in which case the builtin command line should supersede the one passed by the firmware. Signed-off-by: Ard Biesheuvel Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bhe@redhat.com Cc: bhsharma@redhat.com Cc: bp@alien8.de Cc: eugene@hp.com Cc: evgeny.kalugin@intel.com Cc: jhugo@codeaurora.org Cc: leif.lindholm@linaro.org Cc: linux-efi@vger.kernel.org Cc: mark.rutland@arm.com Cc: roy.franz@cavium.com Cc: rruigrok@codeaurora.org Link: http://lkml.kernel.org/r/20170404160910.28115-1-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 94d34e0be24f..e485e87615d1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1471,7 +1471,7 @@ efi_status_t handle_cmdline_files(efi_system_table_t *sys_table_arg, unsigned long *load_addr, unsigned long *load_size); -efi_status_t efi_parse_options(char *cmdline); +efi_status_t efi_parse_options(char const *cmdline); efi_status_t efi_setup_gop(efi_system_table_t *sys_table_arg, struct screen_info *si, efi_guid_t *proto, -- cgit v1.2.3 From eeff7d634f4750306785be709ca444140c29b043 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 4 Apr 2017 17:09:09 +0100 Subject: efi/libstub/arm/arm64: Disable debug prints on 'quiet' cmdline arg The EFI stub currently prints a number of diagnostic messages that do not carry a lot of information. Since these prints are not controlled by 'loglevel' or other command line parameters, and since they appear on the EFI framebuffer as well (if enabled), it would be nice if we could turn them off. So let's add support for the 'quiet' command line parameter in the stub, and disable the non-error prints if it is passed. Signed-off-by: Ard Biesheuvel Acked-by: Mark Rutland Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: bhe@redhat.com Cc: bhsharma@redhat.com Cc: bp@alien8.de Cc: eugene@hp.com Cc: evgeny.kalugin@intel.com Cc: jhugo@codeaurora.org Cc: leif.lindholm@linaro.org Cc: linux-efi@vger.kernel.org Cc: roy.franz@cavium.com Cc: rruigrok@codeaurora.org Link: http://lkml.kernel.org/r/20170404160910.28115-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index e485e87615d1..ec36f42a2add 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -1435,9 +1435,6 @@ static inline int efi_runtime_map_copy(void *buf, size_t bufsz) /* prototypes shared between arch specific and generic stub code */ -#define pr_efi(sys_table, msg) efi_printk(sys_table, "EFI stub: "msg) -#define pr_efi_err(sys_table, msg) efi_printk(sys_table, "EFI stub: ERROR: "msg) - void efi_printk(efi_system_table_t *sys_table_arg, char *str); void efi_free(efi_system_table_t *sys_table_arg, unsigned long size, -- cgit v1.2.3 From b2376407f98920c9b0c411948675f58a9640be35 Mon Sep 17 00:00:00 2001 From: Vic Yang Date: Fri, 24 Mar 2017 18:44:01 +0100 Subject: mfd: cros-ec: Fix host command buffer size For SPI, we can get up to 32 additional bytes for response preamble. The current overhead (2 bytes) may cause problems when we try to receive a big response. Update it to 32 bytes. Without this fix we could see a kernel BUG when we receive a big response from the Chrome EC when is connected via SPI. Signed-off-by: Vic Yang Tested-by: Enric Balletbo i Serra Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 7a01c94496f1..3eef9fb9968a 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -35,10 +35,11 @@ * Max bus-specific overhead incurred by request/responses. * I2C requires 1 additional byte for requests. * I2C requires 2 additional bytes for responses. + * SPI requires up to 32 additional bytes for responses. * */ #define EC_PROTO_VERSION_UNKNOWN 0 #define EC_MAX_REQUEST_OVERHEAD 1 -#define EC_MAX_RESPONSE_OVERHEAD 2 +#define EC_MAX_RESPONSE_OVERHEAD 32 /* * Command interface between EC and AP, for LPC, I2C and SPI interfaces. -- cgit v1.2.3 From def12888c161e6fec0702e5ec9c3962846e3a21d Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 4 Apr 2017 09:23:42 -0400 Subject: rtnl: Add support for netdev event to link messages When netdev events happen, a rtnetlink_event() handler will send messages for every event in it's white list. These messages contain current information about a particular device, but they do not include the iformation about which event just happened. The consumer of the message has to try to infer this information. In some cases (ex: NETDEV_NOTIFY_PEERS), that is not possible. This patch adds a new extension to RTM_NEWLINK message called IFLA_EVENT that would have an encoding of the which event triggered this message. This would allow the the message consumer to easily determine if it is interested in a particular event or not. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 57e54847b0b9..0459018173cf 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -18,7 +18,8 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, - unsigned change, gfp_t flags); + unsigned change, unsigned long event, + gfp_t flags); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags); -- cgit v1.2.3 From f2fbc9dd78970accd7649e8b87c7f00a0da0cdbc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 5 Apr 2017 08:39:18 -0700 Subject: blk-mq: Remove blk_mq_queue_data.list The block layer core sets blk_mq_queue_data.list but no block drivers read that member. Hence remove it and also the code that is used to set this member. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ea2e9dcd3aef..bdea90d75274 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -81,7 +81,6 @@ struct blk_mq_tag_set { struct blk_mq_queue_data { struct request *rq; - struct list_head *list; bool last; }; -- cgit v1.2.3 From 0ae797a8ba05a2354db5e81c1d7df04671dd1c25 Mon Sep 17 00:00:00 2001 From: Arto Merilainen Date: Wed, 14 Dec 2016 13:16:13 +0200 Subject: drm/tegra: Add VIC support This patch adds support for Video Image Compositor engine which can be used for 2d operations. Signed-off-by: Andrew Chew Signed-off-by: Arto Merilainen Signed-off-by: Mikko Perttunen Signed-off-by: Thierry Reding --- include/linux/host1x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 1ffbf2a8cb99..3d04aa1dc83e 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -26,6 +26,7 @@ enum host1x_class { HOST1X_CLASS_HOST1X = 0x1, HOST1X_CLASS_GR2D = 0x51, HOST1X_CLASS_GR2D_SB = 0x52, + HOST1X_CLASS_VIC = 0x5D, HOST1X_CLASS_GR3D = 0x60, }; -- cgit v1.2.3 From d6c1dc3f52e3a65f35c58433ba57d14c0bad902f Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Tue, 4 Apr 2017 18:59:50 +0530 Subject: regulator: Add settling time for non-linear voltage transition Some regulators (some PWM regulators) have the voltage transition non-linear i.e. exponentially. On such cases, the settling time for voltage transition can not be presented in the voltage-ramp-delay. Add new property for non-linear voltage transition and handle this in getting the voltage settling time. Signed-off-by: Laxman Dewangan Signed-off-by: Mark Brown --- include/linux/regulator/machine.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/machine.h b/include/linux/regulator/machine.h index ad3e5158e586..598a493b3927 100644 --- a/include/linux/regulator/machine.h +++ b/include/linux/regulator/machine.h @@ -108,6 +108,8 @@ struct regulator_state { * @initial_state: Suspend state to set by default. * @initial_mode: Mode to set at startup. * @ramp_delay: Time to settle down after voltage change (unit: uV/us) + * @settling_time: Time to settle down after voltage change when voltage + * change is non-linear (unit: microseconds). * @active_discharge: Enable/disable active discharge. The enum * regulator_active_discharge values are used for * initialisation. @@ -149,6 +151,7 @@ struct regulation_constraints { unsigned int initial_mode; unsigned int ramp_delay; + unsigned int settling_time; unsigned int enable_time; unsigned int active_discharge; -- cgit v1.2.3 From 4c546b8a34690ca858e50f2017b8bb6e358365d1 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Mon, 3 Apr 2017 11:23:54 +0900 Subject: memblock: add memblock_clear_nomap() This function, with a combination of memblock_mark_nomap(), will be used in a later kdump patch for arm64 when it temporarily isolates some range of memory from the other memory blocks in order to create a specific kernel mapping at boot time. Signed-off-by: AKASHI Takahiro Reviewed-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- include/linux/memblock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index bdfc65af4152..e82daffcfc44 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -93,6 +93,7 @@ int memblock_mark_hotplug(phys_addr_t base, phys_addr_t size); int memblock_clear_hotplug(phys_addr_t base, phys_addr_t size); int memblock_mark_mirror(phys_addr_t base, phys_addr_t size); int memblock_mark_nomap(phys_addr_t base, phys_addr_t size); +int memblock_clear_nomap(phys_addr_t base, phys_addr_t size); ulong choose_memblock_flags(void); /* Low level functions */ -- cgit v1.2.3 From c9ca9b4e2198a4dbeb83739460d4a7ff9ffed24f Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Mon, 3 Apr 2017 11:23:55 +0900 Subject: memblock: add memblock_cap_memory_range() Add memblock_cap_memory_range() which will remove all the memblock regions except the memory range specified in the arguments. In addition, rework is done on memblock_mem_limit_remove_map() to re-implement it using memblock_cap_memory_range(). This function, like memblock_mem_limit_remove_map(), will not remove memblocks with MEMMAP_NOMAP attribute as they may be mapped and accessed later as "device memory." See the commit a571d4eb55d8 ("mm/memblock.c: add new infrastructure to address the mem limit issue"). This function is used, in a succeeding patch in the series of arm64 kdump suuport, to limit the range of usable memory, or System RAM, on crash dump kernel. (Please note that "mem=" parameter is of little use for this purpose.) Signed-off-by: AKASHI Takahiro Reviewed-by: Will Deacon Acked-by: Catalin Marinas Acked-by: Dennis Chen Cc: linux-mm@kvack.org Cc: Andrew Morton Reviewed-by: Ard Biesheuvel Signed-off-by: Catalin Marinas --- include/linux/memblock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index e82daffcfc44..4ce24a376262 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -336,6 +336,7 @@ phys_addr_t memblock_mem_size(unsigned long limit_pfn); phys_addr_t memblock_start_of_DRAM(void); phys_addr_t memblock_end_of_DRAM(void); void memblock_enforce_memory_limit(phys_addr_t memory_limit); +void memblock_cap_memory_range(phys_addr_t base, phys_addr_t size); void memblock_mem_limit_remove_map(phys_addr_t limit); bool memblock_is_memory(phys_addr_t addr); int memblock_is_map_memory(phys_addr_t addr); -- cgit v1.2.3 From 64c7f1d1572cacadfc0a4ca5a937aeffa486de58 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Apr 2017 19:18:12 +0200 Subject: block, scsi: move the retries field to struct scsi_request Instead of bloating the generic struct request with it. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a2dc6b390d48..ce6f9a6534c9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -224,7 +224,6 @@ struct request { unsigned long deadline; struct list_head timeout_list; unsigned int timeout; - int retries; /* * completion callback. -- cgit v1.2.3 From 1dd5198b2df913aec9b77c14529f9ff1b6d33e30 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Apr 2017 12:16:38 -0600 Subject: block: move timeout field in struct request to pack better After commit 64c7f1d1572c, we went from 1 to 2 holes in my test setup. If we move the timeout field a bit, we remove both of those holes and shrink struct request by 8 bytes. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ce6f9a6534c9..3cf241b0814d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -215,6 +215,8 @@ struct request { unsigned short ioprio; + unsigned int timeout; + void *special; /* opaque pointer available for LLD use */ int errors; @@ -223,7 +225,6 @@ struct request { unsigned long deadline; struct list_head timeout_list; - unsigned int timeout; /* * completion callback. -- cgit v1.2.3 From adf5e5168bd51c42332ebaa709351fa6ed65ea73 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 27 Jan 2017 12:22:54 +0000 Subject: iommu: Better document the IOMMU_PRIV flag This is a fairly subtle thing - let's make sure it's described as clearly as possible to avoid potential misunderstandings. Signed-off-by: Robin Murphy Signed-off-by: Will Deacon --- include/linux/iommu.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 2e4de0deee53..88ec8c6580d3 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -32,10 +32,13 @@ #define IOMMU_NOEXEC (1 << 3) #define IOMMU_MMIO (1 << 4) /* e.g. things like MSI doorbells */ /* - * This is to make the IOMMU API setup privileged - * mapppings accessible by the master only at higher - * privileged execution level and inaccessible at - * less privileged levels. + * Where the bus hardware includes a privilege level as part of its access type + * markings, and certain devices are capable of issuing transactions marked as + * either 'supervisor' or 'user', the IOMMU_PRIV flag requests that the other + * given permission flags only apply to accesses at the higher privilege level, + * and that unprivileged transactions should have as little access as possible. + * This would usually imply the same permissions as kernel mappings on the CPU, + * if the IOMMU page table format is equivalent. */ #define IOMMU_PRIV (1 << 5) -- cgit v1.2.3 From a7a453f56a1a116027f84ac53b365eb045a0e279 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 28 Mar 2017 15:14:40 +0100 Subject: regulator: helpers: Add regmap set_soft_start helper Add a helper function regulator_set_soft_start_regmap to allow regmap based regulators to easily enable soft start. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index dac8e7b16bc6..1054c033e783 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -292,6 +292,10 @@ enum regulator_type { * set_active_discharge * @active_discharge_reg: Register for control when using regmap * set_active_discharge + * @soft_start_reg: Register for control when using regmap set_soft_start + * @soft_start_mask: Mask for control when using regmap set_soft_start + * @soft_start_val_on: Enabling value for control when using regmap + * set_soft_start * * @enable_time: Time taken for initial enable of regulator (in uS). * @off_on_delay: guard time (in uS), before re-enabling a regulator @@ -345,6 +349,9 @@ struct regulator_desc { unsigned int active_discharge_off; unsigned int active_discharge_mask; unsigned int active_discharge_reg; + unsigned int soft_start_reg; + unsigned int soft_start_mask; + unsigned int soft_start_val_on; unsigned int enable_time; @@ -476,6 +483,7 @@ int regulator_set_voltage_time_sel(struct regulator_dev *rdev, unsigned int new_selector); int regulator_set_bypass_regmap(struct regulator_dev *rdev, bool enable); int regulator_get_bypass_regmap(struct regulator_dev *rdev, bool *enable); +int regulator_set_soft_start_regmap(struct regulator_dev *rdev); int regulator_set_active_discharge_regmap(struct regulator_dev *rdev, bool enable); -- cgit v1.2.3 From f7d37bc3cb20828ac43b22cbd40222877ee2c46a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Tue, 28 Mar 2017 15:14:41 +0100 Subject: regulator: helpers: Add regmap set_pull_down helper Add a helper function regulator_set_pull_down_regmap to allow regmap based regulators to easily enable pull down. Signed-off-by: Charles Keepax Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 1054c033e783..8a9078dd2a5f 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -296,6 +296,10 @@ enum regulator_type { * @soft_start_mask: Mask for control when using regmap set_soft_start * @soft_start_val_on: Enabling value for control when using regmap * set_soft_start + * @pull_down_reg: Register for control when using regmap set_pull_down + * @pull_down_mask: Mask for control when using regmap set_pull_down + * @pull_down_val_on: Enabling value for control when using regmap + * set_pull_down * * @enable_time: Time taken for initial enable of regulator (in uS). * @off_on_delay: guard time (in uS), before re-enabling a regulator @@ -352,6 +356,9 @@ struct regulator_desc { unsigned int soft_start_reg; unsigned int soft_start_mask; unsigned int soft_start_val_on; + unsigned int pull_down_reg; + unsigned int pull_down_mask; + unsigned int pull_down_val_on; unsigned int enable_time; @@ -484,6 +491,7 @@ int regulator_set_voltage_time_sel(struct regulator_dev *rdev, int regulator_set_bypass_regmap(struct regulator_dev *rdev, bool enable); int regulator_get_bypass_regmap(struct regulator_dev *rdev, bool *enable); int regulator_set_soft_start_regmap(struct regulator_dev *rdev); +int regulator_set_pull_down_regmap(struct regulator_dev *rdev); int regulator_set_active_discharge_regmap(struct regulator_dev *rdev, bool enable); -- cgit v1.2.3 From 113c3075931a334f899008f6c753abe70a3a9323 Mon Sep 17 00:00:00 2001 From: "R. Parameswaran" Date: Wed, 5 Apr 2017 16:50:35 -0700 Subject: New kernel function to get IP overhead on a socket. A new function, kernel_sock_ip_overhead(), is provided to calculate the cumulative overhead imposed by the IP Header and IP options, if any, on a socket's payload. The new function returns an overhead of zero for sockets that do not belong to the IPv4 or IPv6 address families. This is used in the L2TP code path to compute the total outer IP overhead on the L2TP tunnel socket when calculating the default MTU for Ethernet pseudowires. Signed-off-by: R. Parameswaran Signed-off-by: David S. Miller --- include/linux/net.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 0620f5e18c96..a42fab24c8af 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -298,6 +298,9 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); +/* Following routine returns the IP overhead imposed by a socket. */ +u32 kernel_sock_ip_overhead(struct sock *sk); + #define MODULE_ALIAS_NETPROTO(proto) \ MODULE_ALIAS("net-pf-" __stringify(proto)) -- cgit v1.2.3 From 08737a3fa30a4c6c10b4c4b682125c7d3c494094 Mon Sep 17 00:00:00 2001 From: "Mintz, Yuval" Date: Thu, 6 Apr 2017 15:58:33 +0300 Subject: qed: Inform qedi the number of possible CQs Now that management firmware is capable of telling us the number of CQs available for a given PF, qed needs to communicate the number to qedi so it would know have many to use. Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_iscsi_if.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_iscsi_if.h b/include/linux/qed/qed_iscsi_if.h index f70bb81b8b6a..3414649133d2 100644 --- a/include/linux/qed/qed_iscsi_if.h +++ b/include/linux/qed/qed_iscsi_if.h @@ -67,6 +67,8 @@ struct qed_dev_iscsi_info { void __iomem *primary_dbq_rq_addr; void __iomem *secondary_bdq_rq_addr; + + u8 num_cqs; }; struct qed_iscsi_id_params { -- cgit v1.2.3 From 102722fc6832a16850c05595b98c9232549d99f3 Mon Sep 17 00:00:00 2001 From: Guy Ergas Date: Mon, 20 Feb 2017 16:18:17 +0200 Subject: net/mlx5e: Add support for RXFCS feature flag Add support for rx-fcs flag from ethtool. In case this flag is set, update all RQs to scatter the FCS data into the packet. Signed-off-by: Guy Ergas Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 56bc842b0620..1993adbd2c82 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -5122,6 +5122,7 @@ struct mlx5_ifc_modify_rq_out_bits { enum { MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_VSD = 1ULL << 1, + MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_SCATTER_FCS = 1ULL << 2, MLX5_MODIFY_RQ_IN_MODIFY_BITMASK_RQ_COUNTER_SET_ID = 1ULL << 3, }; -- cgit v1.2.3 From 6118714275f0a313ecc296a87ed1af32d9691bed Mon Sep 17 00:00:00 2001 From: Tony Lindgren Date: Thu, 30 Mar 2017 09:16:39 -0700 Subject: pinctrl: core: Fix pinctrl_register_and_init() with pinctrl_enable() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Recent pinctrl changes to allow dynamic allocation of pins exposed one more issue with the pinctrl pins claimed early by the controller itself. This caused a regression for IMX6 pinctrl hogs. Before enabling the pin controller driver we need to wait until it has been properly initialized, then claim the hogs, and only then enable it. To fix the regression, split the code into pinctrl_claim_hogs() and pinctrl_enable(). And then let's require that pinctrl_enable() is always called by the pin controller driver when ready after calling pinctrl_register_and_init(). Depends-on: 950b0d91dc10 ("pinctrl: core: Fix regression caused by delayed work for hogs") Fixes: df61b366af26 ("pinctrl: core: Use delayed work for hogs") Fixes: e566fc11ea76 ("pinctrl: imx: use generic pinctrl helpers for managing groups") Cc: Haojian Zhuang Cc: Masahiro Yamada Cc: Mika Penttilä Cc: Mika Westerberg Cc: Nishanth Menon Cc: Shawn Guo Cc: Stefan Agner Tested-by: Geert Uytterhoeven Tested-by: Gary Bisson Tested-by: Fabio Estevam Signed-off-by: Tony Lindgren Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinctrl.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinctrl.h b/include/linux/pinctrl/pinctrl.h index 8ce2d87a238b..5e45385c5bdc 100644 --- a/include/linux/pinctrl/pinctrl.h +++ b/include/linux/pinctrl/pinctrl.h @@ -145,8 +145,9 @@ struct pinctrl_desc { extern int pinctrl_register_and_init(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data, struct pinctrl_dev **pctldev); +extern int pinctrl_enable(struct pinctrl_dev *pctldev); -/* Please use pinctrl_register_and_init() instead */ +/* Please use pinctrl_register_and_init() and pinctrl_enable() instead */ extern struct pinctrl_dev *pinctrl_register(struct pinctrl_desc *pctldesc, struct device *dev, void *driver_data); -- cgit v1.2.3 From 4c0facddb7d88c78c8bd977c16faa647f079ccda Mon Sep 17 00:00:00 2001 From: Laxman Dewangan Date: Thu, 6 Apr 2017 19:05:52 +0530 Subject: gpio: core: Decouple open drain/source flag with active low/high Currently, the GPIO interface is said to Open Drain if it is Single Ended and active LOW. Similarly, it is said as Open Source if it is Single Ended and active HIGH. The active HIGH/LOW is used in the interface for setting the pin state to HIGH or LOW when enabling/disabling the interface. In Open Drain interface, pin is set to HIGH by putting pin in high impedance and LOW by driving to the LOW. In Open Source interface, pin is set to HIGH by driving pin to HIGH and set to LOW by putting pin in high impedance. With above, the Open Drain/Source is unrelated to the active LOW/HIGH in interface. There is interface where the enable/disable of interface is ether active LOW or HIGH but it is Open Drain type. Hence decouple the Open Drain with Single Ended + Active LOW and Open Source with Single Ended + Active HIGH. Adding different flag for the Open Drain/Open Source which is valid only when Single ended flag is enabled. Signed-off-by: Laxman Dewangan Signed-off-by: Linus Walleij --- include/linux/of_gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index 3f87ea5b8bee..1e089d5a182b 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -30,6 +30,7 @@ struct device_node; enum of_gpio_flags { OF_GPIO_ACTIVE_LOW = 0x1, OF_GPIO_SINGLE_ENDED = 0x2, + OF_GPIO_OPEN_DRAIN = 0x4, }; #ifdef CONFIG_OF_GPIO -- cgit v1.2.3 From 404123c2db798027e852480ed9c4accef9f1d9e6 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 29 Mar 2017 19:06:20 +0300 Subject: virtio: allow drivers to validate features Some drivers can't support all features in all configurations. At the moment we blindly set FEATURES_OK and later FAILED. Support this better by adding a callback drivers can use to do some early checks. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 04b0d3f95043..7edfbdb55a99 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -167,6 +167,7 @@ struct virtio_driver { unsigned int feature_table_size; const unsigned int *feature_table_legacy; unsigned int feature_table_size_legacy; + int (*validate)(struct virtio_device *dev); int (*probe)(struct virtio_device *dev); void (*scan)(struct virtio_device *dev); void (*remove)(struct virtio_device *dev); -- cgit v1.2.3 From dedb67c4b4e5fa2e6e149a2ce93e7848aaa9d762 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 28 Mar 2017 22:27:32 +0530 Subject: netfilter: Add nfnl_msg_type() helper function Add and use nfnl_msg_type() function to replace opencoded nfnetlink message type. I suggested this change, Arushi Singhal made an initial patch to address this but was missing several spots. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 1b49209dd5c7..996711d8a7b4 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -41,6 +41,11 @@ int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, int flags); +static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type) +{ + return subsys << 8 | msg_type; +} + void nfnl_lock(__u8 subsys_id); void nfnl_unlock(__u8 subsys_id); #ifdef CONFIG_PROVE_LOCKING -- cgit v1.2.3 From ad6260da1e23cf937806e42c8490af3ff4530474 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Mon, 27 Mar 2017 14:30:40 +0200 Subject: KVM: x86: drop legacy device assignment Legacy device assignment has been deprecated since 4.2 (released 1.5 years ago). VFIO is better and everyone should have switched to it. If they haven't, this should convince them. :) Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d0250744507a..f1339a7756b3 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -877,22 +877,6 @@ void kvm_unregister_irq_ack_notifier(struct kvm *kvm, int kvm_request_irq_source_id(struct kvm *kvm); void kvm_free_irq_source_id(struct kvm *kvm, int irq_source_id); -#ifdef CONFIG_KVM_DEVICE_ASSIGNMENT -int kvm_iommu_map_pages(struct kvm *kvm, struct kvm_memory_slot *slot); -void kvm_iommu_unmap_pages(struct kvm *kvm, struct kvm_memory_slot *slot); -#else -static inline int kvm_iommu_map_pages(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ - return 0; -} - -static inline void kvm_iommu_unmap_pages(struct kvm *kvm, - struct kvm_memory_slot *slot) -{ -} -#endif - /* * search_memslots() and __gfn_to_memslot() are here because they are * used in non-modular code in arch/powerpc/kvm/book3s_hv_rm_mmu.c. -- cgit v1.2.3 From 4b4357e02523ec63ad853f927f5d93a25101a1d2 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Fri, 31 Mar 2017 13:53:23 +0200 Subject: kvm: make KVM_COALESCED_MMIO_PAGE_OFFSET public MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Its value has never changed; we might as well make it part of the ABI instead of using the return value of KVM_CHECK_EXTENSION(KVM_CAP_COALESCED_MMIO). Because PPC does not always make MMIO available, the code has to be made dependent on CONFIG_KVM_MMIO rather than KVM_COALESCED_MMIO_PAGE_OFFSET. Signed-off-by: Paolo Bonzini Signed-off-by: Radim Krčmář --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f1339a7756b3..7e74ae4d99bb 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -403,7 +403,7 @@ struct kvm { struct kvm_vm_stat stat; struct kvm_arch arch; refcount_t users_count; -#ifdef KVM_COALESCED_MMIO_PAGE_OFFSET +#ifdef CONFIG_KVM_MMIO struct kvm_coalesced_mmio_ring *coalesced_mmio_ring; spinlock_t ring_lock; struct list_head coalesced_zones; -- cgit v1.2.3 From 54d5329d425650fafaf90660a139c771d2d49cae Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 7 Apr 2017 08:52:27 -0600 Subject: blk-mq-sched: fix crash in switch error path In elevator_switch(), if blk_mq_init_sched() fails, we attempt to fall back to the original scheduler. However, at this point, we've already torn down the original scheduler's tags, so this causes a crash. Doing the fallback like the legacy elevator path is much harder for mq, so fix it by just falling back to none, instead. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/elevator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index aebecc4ed088..22d39e8d4de1 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -211,7 +211,7 @@ extern ssize_t elv_iosched_show(struct request_queue *, char *); extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); -extern void elevator_exit(struct elevator_queue *); +extern void elevator_exit(struct request_queue *, struct elevator_queue *); extern int elevator_change(struct request_queue *, const char *); extern bool elv_bio_merge_ok(struct request *, struct bio *); extern struct elevator_queue *elevator_alloc(struct request_queue *, -- cgit v1.2.3 From dbde775cdbf5e401b8739f30c87d1af12c0028db Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 7 Apr 2017 11:10:44 +1000 Subject: block: simple improvements for bio->flags The comment for the 'flags' field of 'bio' mentions "command" which is no longer stored there, and doesn't mention the bvec pool number, which is. BIO_RESET_BITS is set in such a way that it would need to be updated if new bits were added, which is easy to miss. BVEC_POOL_BITS is larger than needed. The BVEC_POOL_IDX() ranges from 0 to 6, so 3 bits are sufficient. This patch make improvements in each of these areas. Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 67bcf8a5326e..1ebbc289b642 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -33,7 +33,7 @@ struct bio { * top bits REQ_OP. Use * accessors. */ - unsigned short bi_flags; /* status, command, etc */ + unsigned short bi_flags; /* status, etc and bvec pool number */ unsigned short bi_ioprio; struct bvec_iter bi_iter; @@ -110,12 +110,7 @@ struct bio { #define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ #define BIO_THROTTLED 9 /* This bio has already been subjected to * throttling rules. Don't do it again. */ - -/* - * Flags starting here get preserved by bio_reset() - this includes - * BVEC_POOL_IDX() - */ -#define BIO_RESET_BITS 10 +/* See BVEC_POOL_OFFSET below before adding new flags */ /* * We support 6 different bvec pools, the last one is magic in that it @@ -125,13 +120,22 @@ struct bio { #define BVEC_POOL_MAX (BVEC_POOL_NR - 1) /* - * Top 4 bits of bio flags indicate the pool the bvecs came from. We add + * Top 3 bits of bio flags indicate the pool the bvecs came from. We add * 1 to the actual index so that 0 indicates that there are no bvecs to be * freed. */ -#define BVEC_POOL_BITS (4) +#define BVEC_POOL_BITS (3) #define BVEC_POOL_OFFSET (16 - BVEC_POOL_BITS) #define BVEC_POOL_IDX(bio) ((bio)->bi_flags >> BVEC_POOL_OFFSET) +#if (1<< BVEC_POOL_BITS) < (BVEC_POOL_NR+1) +# error "BVEC_POOL_BITS is too small" +#endif + +/* + * Flags starting here get preserved by bio_reset() - this includes + * only BVEC_POOL_IDX() + */ +#define BIO_RESET_BITS BVEC_POOL_OFFSET /* * Operations and flags common to the bio and request structures. -- cgit v1.2.3 From fbbaf700e7b163a0f1704b2d542ee28be11fce21 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 7 Apr 2017 09:40:52 -0600 Subject: block: trace completion of all bios. Currently only dm and md/raid5 bios trigger trace_block_bio_complete(). Now that we have bio_chain() and bio_inc_remaining(), it is not possible, in general, for a driver to know when the bio is really complete. Only bio_endio() knows that. So move the trace_block_bio_complete() call to bio_endio(). Now trace_block_bio_complete() pairs with trace_block_bio_queue(). Any bio for which a 'queue' event is traced, will subsequently generate a 'complete' event. There are a few cases where completion tracing is not wanted. 1/ If blk_update_request() has already generated a completion trace event at the 'request' level, there is no point generating one at the bio level too. In this case the bi_sector and bi_size will have changed, so the bio level event would be wrong 2/ If the bio hasn't actually been queued yet, but is being aborted early, then a trace event could be confusing. Some filesystems call bio_endio() but do not want tracing. 3/ The bio_integrity code interposes itself by replacing bi_end_io, then restoring it and calling bio_endio() again. This would produce two identical trace events if left like that. To handle these, we introduce a flag BIO_TRACE_COMPLETION and only produce the trace event when this is set. We address point 1 above by clearing the flag in blk_update_request(). We address point 2 above by only setting the flag when generic_make_request() is called. We address point 3 above by clearing the flag after generating a completion event. When bio_split() is used on a bio, particularly in blk_queue_split(), there is an extra complication. A new bio is split off the front, and may be handle directly without going through generic_make_request(). The old bio, which has been advanced, is passed to generic_make_request(), so it will trigger a trace event a second time. Probably the best result when a split happens is to see a single 'queue' event for the whole bio, then multiple 'complete' events - one for each component. To achieve this was can: - copy the BIO_TRACE_COMPLETION flag to the new bio in bio_split() - avoid generating a 'queue' event if BIO_TRACE_COMPLETION is already set. This way, the split-off bio won't create a queue event, the original won't either even if it re-submitted to generic_make_request(), but both will produce completion events, each for their own range. So if generic_make_request() is called (which generates a QUEUED event), then bi_endio() will create a single COMPLETE event for each range that the bio is split into, unless the driver has explicitly requested it not to. Signed-off-by: NeilBrown Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1ebbc289b642..72aa9519167e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -110,6 +110,8 @@ struct bio { #define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ #define BIO_THROTTLED 9 /* This bio has already been subjected to * throttling rules. Don't do it again. */ +#define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion + * of this bio. */ /* See BVEC_POOL_OFFSET below before adding new flags */ /* -- cgit v1.2.3 From 7587a5ae7eef0439f7be31f1b5959af062bbc5ec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 7 Apr 2017 11:16:52 -0700 Subject: blk-mq: Introduce blk_mq_delay_run_hw_queue() Introduce a function that runs a hardware queue unconditionally after a delay. Note: there is already a function that stops and restarts a hardware queue after a delay, namely blk_mq_delay_queue(). This function will be used in the next patch in this series. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Long Li Cc: K. Y. Srinivasan Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b296a9006117..9382c5da7a2e 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -51,6 +51,7 @@ struct blk_mq_hw_ctx { atomic_t nr_active; + struct delayed_work delayed_run_work; struct delayed_work delay_work; struct hlist_node cpuhp_dead; @@ -238,6 +239,7 @@ void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); +void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, -- cgit v1.2.3 From 6d8c6c0f97ad8a3517c42b179c1dc8e77397d0e2 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 7 Apr 2017 12:40:09 -0600 Subject: blk-mq: Restart a single queue if tag sets are shared To improve scalability, if hardware queues are shared, restart a single hardware queue in round-robin fashion. Rename blk_mq_sched_restart_queues() to reflect the new semantics. Remove blk_mq_sched_mark_restart_queue() because this function has no callers. Remove flag QUEUE_FLAG_RESTART because this patch removes the code that uses this flag. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5a7da607ca04..7548f332121a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -610,7 +610,6 @@ struct request_queue { #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ #define QUEUE_FLAG_DAX 26 /* device supports DAX */ #define QUEUE_FLAG_STATS 27 /* track rq completion times */ -#define QUEUE_FLAG_RESTART 28 /* queue needs restart at completion */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ -- cgit v1.2.3 From ee056f98126170ca8b16b9a4a6e20aae7c5c184e Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Wed, 5 Apr 2017 12:01:34 -0700 Subject: blk-mq-sched: provide hooks for initializing hardware queue data Schedulers need to be informed when a hardware queue is added or removed at runtime so they can allocate/free per-hardware queue data. So, replace the blk_mq_sched_init_hctx_data() helper, which only makes sense at init time, with .init_hctx() and .exit_hctx() hooks. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/elevator.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 22d39e8d4de1..b7ec315ee7e7 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -93,6 +93,8 @@ struct blk_mq_hw_ctx; struct elevator_mq_ops { int (*init_sched)(struct request_queue *, struct elevator_type *); void (*exit_sched)(struct elevator_queue *); + int (*init_hctx)(struct blk_mq_hw_ctx *, unsigned int); + void (*exit_hctx)(struct blk_mq_hw_ctx *, unsigned int); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); -- cgit v1.2.3 From 5d9854eaea776441b38a9a45b4e6879524c4f48c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Fri, 7 Apr 2017 17:13:17 -0700 Subject: iio: hid-sensor: Store restore poll and hysteresis on S3 This change undo the change done by 'commit 3bec24747446 ("iio: hid-sensor-trigger: Change get poll value function order to avoid sensor properties losing after resume from S3")' as this breaks some USB/i2c sensor hubs. Instead of relying on HW for restoring poll and hysteresis, driver stores and restores on resume (S3). In this way user space modified settings are not lost for any kind of sensor hub behavior. In this change, whenever user space modifies sampling frequency or hysteresis driver will get the feature value from the hub and store in the per device hid_sensor_common data structure. On resume callback from S3, system will set the feature to sensor hub, if user space ever modified the feature value. Fixes: 3bec24747446 ("iio: hid-sensor-trigger: Change get poll value function order to avoid sensor properties losing after resume from S3") Reported-by: Ritesh Raj Sarraf Tested-by: Ritesh Raj Sarraf Tested-by: Song, Hongyan Cc: stable@vger.kernel.org Signed-off-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index 7ef111d3ecc5..f32d7c392c1e 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -231,6 +231,8 @@ struct hid_sensor_common { unsigned usage_id; atomic_t data_ready; atomic_t user_requested_state; + int poll_interval; + int raw_hystersis; struct iio_trigger *trigger; int timestamp_ns_scale; struct hid_sensor_hub_attribute_info poll; -- cgit v1.2.3 From 42d5ec954719917e2b7a9160fe05d2316eece5bf Mon Sep 17 00:00:00 2001 From: Alan Tull Date: Thu, 23 Mar 2017 19:34:27 -0500 Subject: fpga: add config complete timeout Adding timeout for maximum allowed time for FPGA to go to operating mode after a FPGA region has been programmed. Signed-off-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- include/linux/fpga/fpga-mgr.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h index e2ef94fd37af..b4ac24c4411d 100644 --- a/include/linux/fpga/fpga-mgr.h +++ b/include/linux/fpga/fpga-mgr.h @@ -77,11 +77,14 @@ enum fpga_mgr_states { * @flags: boolean flags as defined above * @enable_timeout_us: maximum time to enable traffic through bridge (uSec) * @disable_timeout_us: maximum time to disable traffic through bridge (uSec) + * @config_complete_timeout_us: maximum time for FPGA to switch to operating + * status in the write_complete op. */ struct fpga_image_info { u32 flags; u32 enable_timeout_us; u32 disable_timeout_us; + u32 config_complete_timeout_us; }; /** -- cgit v1.2.3 From d201cc17a8a31cc6c4f3944988fe9e2f04b021fb Mon Sep 17 00:00:00 2001 From: Matthew Gerlach Date: Thu, 23 Mar 2017 19:34:28 -0500 Subject: fpga pr ip: Core driver support for Altera Partial Reconfiguration IP. Adding the core functions necessary for a fpga-mgr driver for the Altera Partial IP component. It is intended for these functions to be used by the various bus implementations like the platform bus or the PCIe bus. Signed-off-by: Matthew Gerlach Acked-by: Alan Tull Signed-off-by: Greg Kroah-Hartman --- include/linux/fpga/altera-pr-ip-core.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) create mode 100644 include/linux/fpga/altera-pr-ip-core.h (limited to 'include/linux') diff --git a/include/linux/fpga/altera-pr-ip-core.h b/include/linux/fpga/altera-pr-ip-core.h new file mode 100644 index 000000000000..3810a9033f49 --- /dev/null +++ b/include/linux/fpga/altera-pr-ip-core.h @@ -0,0 +1,29 @@ +/* + * Driver for Altera Partial Reconfiguration IP Core + * + * Copyright (C) 2016 Intel Corporation + * + * Based on socfpga-a10.c Copyright (C) 2015-2016 Altera Corporation + * by Alan Tull + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ + +#ifndef _ALT_PR_IP_CORE_H +#define _ALT_PR_IP_CORE_H +#include + +int alt_pr_register(struct device *dev, void __iomem *reg_base); +int alt_pr_unregister(struct device *dev); + +#endif /* _ALT_PR_IP_CORE_H */ -- cgit v1.2.3 From 895ce6c877cb0282a7dc2178f3643e6635716a2a Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sat, 11 Mar 2017 08:44:57 +0800 Subject: debugfs: set no_llseek in DEFINE_DEBUGFS_ATTRIBUTE In DEFINE_DEBUGFS_ATTRIBUTE() macro, since we use nonseekable_open() to open, we should use no_llseek() to seek, not generic_file_llseek(). Signed-off-by: Geliang Tang Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 7dff776e6d16..9174b0d28582 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -74,7 +74,7 @@ static const struct file_operations __fops = { \ .release = simple_attr_release, \ .read = debugfs_attr_read, \ .write = debugfs_attr_write, \ - .llseek = generic_file_llseek, \ + .llseek = no_llseek, \ } #if defined(CONFIG_DEBUG_FS) -- cgit v1.2.3 From 171058fb0883247b3a484a542b5dc89753c57cb5 Mon Sep 17 00:00:00 2001 From: Michal Sojka Date: Thu, 16 Mar 2017 14:50:08 +0100 Subject: uio: Allow handling of non page-aligned memory regions Since commit b65502879556 ("uio: we cannot mmap unaligned page contents") addresses and sizes of UIO memory regions must be page-aligned. If the address in the BAR register is not page-aligned (which is the case of the mf264 card), the mentioned commit forces the UIO driver to round the address down to the page size. Then, there is no easy way for user-space to learn the offset of the actual memory region within the page, because the offset seen in /sys/class/uio/uio?/maps/map?/offset is calculated from the rounded address and thus it is always zero. Fix that problem by including the offset in struct uio_mem. UIO drivers can set this field and userspace can read its value from /sys/class/uio/uio?/maps/map?/offset. The following commits update the uio_mf264 driver to set this new offs field. Drivers for hardware with page-aligned BARs need not to be modified provided that they initialize struct uio_info (which contains uio_mem) with zeros. Signed-off-by: Michal Sojka Signed-off-by: Greg Kroah-Hartman --- include/linux/uio_driver.h | 13 ++++++++----- 1 file changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio_driver.h b/include/linux/uio_driver.h index 32c0e83d6239..3c85c81b0027 100644 --- a/include/linux/uio_driver.h +++ b/include/linux/uio_driver.h @@ -23,11 +23,13 @@ struct uio_map; /** * struct uio_mem - description of a UIO memory region * @name: name of the memory region for identification - * @addr: address of the device's memory (phys_addr is used since - * addr can be logical, virtual, or physical & phys_addr_t - * should always be large enough to handle any of the - * address types) - * @size: size of IO + * @addr: address of the device's memory rounded to page + * size (phys_addr is used since addr can be + * logical, virtual, or physical & phys_addr_t + * should always be large enough to handle any of + * the address types) + * @offs: offset of device memory within the page + * @size: size of IO (multiple of page size) * @memtype: type of memory addr points to * @internal_addr: ioremap-ped version of addr, for driver internal use * @map: for use by the UIO core only. @@ -35,6 +37,7 @@ struct uio_map; struct uio_mem { const char *name; phys_addr_t addr; + unsigned long offs; resource_size_t size; int memtype; void __iomem *internal_addr; -- cgit v1.2.3 From 808a8b73772c6ac7d999c0508d2f757831cd83ca Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Wed, 5 Apr 2017 11:06:30 +0200 Subject: iio: adc: sun4i-gpadc-iio: add support for A33 thermal sensor This adds support for the Allwinner A33 thermal sensor. Unlike the A10, A13 and A31, the Allwinner A33 only has one channel which is dedicated to the thermal sensor. Moreover, its thermal sensor does not generate interruptions, thus we only need to directly read the register storing the temperature value. The MFD used by the A10, A13 and A31, was created to avoid breaking the DT binding, but since the nodes for the ADC weren't there for the A33, it is not needed. Though the A33 does not have an internal ADC, it has a thermal sensor which shares the same registers with GPADC of the already supported SoCs and almost the same bits, for the same purpose (thermal sensor). The thermal sensor behaves exactly the same (except the presence of interrupts or not) on the different SoCs. Signed-off-by: Quentin Schulz Acked-by: Lee Jones Signed-off-by: Jonathan Cameron --- include/linux/mfd/sun4i-gpadc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/sun4i-gpadc.h b/include/linux/mfd/sun4i-gpadc.h index 509e736d27fb..139872c2e0fe 100644 --- a/include/linux/mfd/sun4i-gpadc.h +++ b/include/linux/mfd/sun4i-gpadc.h @@ -38,6 +38,10 @@ #define SUN6I_GPADC_CTRL1_ADC_CHAN_SELECT(x) (GENMASK(3, 0) & BIT(x)) #define SUN6I_GPADC_CTRL1_ADC_CHAN_MASK GENMASK(3, 0) +/* TP_CTRL1 bits for sun8i SoCs */ +#define SUN8I_GPADC_CTRL1_CHOP_TEMP_EN BIT(8) +#define SUN8I_GPADC_CTRL1_GPADC_CALI_EN BIT(7) + #define SUN4I_GPADC_CTRL2 0x08 #define SUN4I_GPADC_CTRL2_TP_SENSITIVE_ADJUST(x) ((GENMASK(3, 0) & (x)) << 28) -- cgit v1.2.3 From 6fe729c4bdae41b4c5a5ff21312f021a48c69399 Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Tue, 4 Apr 2017 07:22:33 -0700 Subject: serdev: Add serdev_device_write subroutine Add serdev_device_write() a blocking call allowing to transfer arbitraty amount of data (potentially exceeding amount that serdev_device_write_buf can process in a single call) To support that, also add serdev_device_write_wakeup(). Drivers wanting to use full extent of serdev_device_write functionality are expected to provide serdev_device_write_wakeup() as a sole handler of .write_wakeup event or call it as a part of driver's custom .write_wakeup code. Because serdev_device_write() subroutine is a superset of serdev_device_write_buf() the patch re-impelements latter is terms of the former. For drivers wanting to just use serdev_device_write_buf() .write_wakeup handler is optional. Cc: cphealy@gmail.com Cc: Guenter Roeck Cc: linux-serial@vger.kernel.org Cc: linux-kernel@vger.kernel.org Reviewed-by: Rob Herring Reviewed-by: Andy Shevchenko Signed-off-by: Andrey Smirnov Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 5176cdc2057f..0beaff886992 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -39,12 +39,16 @@ struct serdev_device_ops { * @nr: Device number on serdev bus. * @ctrl: serdev controller managing this device. * @ops: Device operations. + * @write_comp Completion used by serdev_device_write() internally + * @write_lock Lock to serialize access when writing data */ struct serdev_device { struct device dev; int nr; struct serdev_controller *ctrl; const struct serdev_device_ops *ops; + struct completion write_comp; + struct mutex write_lock; }; static inline struct serdev_device *to_serdev_device(struct device *d) @@ -186,7 +190,8 @@ int serdev_device_open(struct serdev_device *); void serdev_device_close(struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); void serdev_device_set_flow_control(struct serdev_device *, bool); -int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); +void serdev_device_write_wakeup(struct serdev_device *); +int serdev_device_write(struct serdev_device *, const unsigned char *, size_t, unsigned long); void serdev_device_write_flush(struct serdev_device *); int serdev_device_write_room(struct serdev_device *); @@ -223,7 +228,8 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev return 0; } static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} -static inline int serdev_device_write_buf(struct serdev_device *sdev, const unsigned char *buf, size_t count) +static inline int serdev_device_write(struct serdev_device *sdev, const unsigned char *buf, + size_t count, unsigned long timeout) { return -ENODEV; } @@ -259,4 +265,11 @@ static inline struct device *serdev_tty_port_register(struct tty_port *port, static inline void serdev_tty_port_unregister(struct tty_port *port) {} #endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */ +static inline int serdev_device_write_buf(struct serdev_device *serdev, + const unsigned char *data, + size_t count) +{ + return serdev_device_write(serdev, data, count, 0); +} + #endif /*_LINUX_SERDEV_H */ -- cgit v1.2.3 From 2e94d5ae5da1d2e798045a53b5e234a42b090908 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 31 Mar 2017 21:35:17 +0300 Subject: serial: core: constify struct uart_port {name} field Don't allow modifications of port name. It's serial core's business only. Signed-off-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 60530678c633..64d892f1e5cd 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -247,7 +247,7 @@ struct uart_port { unsigned char suspended; unsigned char irq_wake; unsigned char unused[2]; - char *name; /* port name */ + const char *name; /* port name */ struct attribute_group *attr_group; /* port specific attributes */ const struct attribute_group **tty_groups; /* all attributes (serial core use only) */ struct serial_rs485 rs485; -- cgit v1.2.3 From 1d62ac13634840e02f9b20df9d8e21204f9ab8b8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Apr 2017 19:21:00 +0200 Subject: block: renumber REQ_OP_WRITE_ZEROES Make life easy for implementations that needs to send a data buffer to the device (e.g. SCSI) by numbering it as a data out command. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 72aa9519167e..c5bae0a669d1 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -174,7 +174,7 @@ enum req_opf { /* write the same sector many times */ REQ_OP_WRITE_SAME = 7, /* write the zero filled sector many times */ - REQ_OP_WRITE_ZEROES = 8, + REQ_OP_WRITE_ZEROES = 9, /* SCSI passthrough using struct scsi_request */ REQ_OP_SCSI_IN = 32, -- cgit v1.2.3 From ac62d6208a7977107a47be4eb8566d6e5034b5f5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Apr 2017 19:21:05 +0200 Subject: dm: support REQ_OP_WRITE_ZEROES Copy & paste from the REQ_OP_WRITE_SAME code. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/device-mapper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a7e6903866fd..3829bee2302a 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -254,6 +254,12 @@ struct dm_target { */ unsigned num_write_same_bios; + /* + * The number of WRITE ZEROES bios that will be submitted to the target. + * The bio number can be accessed with dm_bio_get_target_bio_nr. + */ + unsigned num_write_zeroes_bios; + /* * The minimum number of extra bytes allocated in each io for the * target to use. -- cgit v1.2.3 From ee472d835c264a4cb77f8cf878603e1e40f3559e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Apr 2017 19:21:08 +0200 Subject: block: add a flags argument to (__)blkdev_issue_zeroout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Turn the existing discard flag into a new BLKDEV_ZERO_UNMAP flag with similar semantics, but without referring to diѕcard. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d76bebbc632e..bd60f4401c9d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1336,23 +1336,27 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, return bqt->tag_index[tag]; } +extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); +extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, + sector_t nr_sects, gfp_t gfp_mask, struct page *page); #define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ #define BLKDEV_DISCARD_ZERO (1 << 1) /* must reliably zero data */ -extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, int flags, struct bio **biop); -extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, struct page *page); + +#define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ + extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, - bool discard); + unsigned flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, bool discard); + sector_t nr_sects, gfp_t gfp_mask, unsigned flags); + static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { @@ -1366,7 +1370,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, return blkdev_issue_zeroout(sb->s_bdev, block << (sb->s_blocksize_bits - 9), nr_blocks << (sb->s_blocksize_bits - 9), - gfp_mask, true); + gfp_mask, 0); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -- cgit v1.2.3 From d928be9f853b9755692d7e9aed402c1809a88e56 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Apr 2017 19:21:09 +0200 Subject: block: add a REQ_NOUNMAP flag for REQ_OP_WRITE_ZEROES If this flag is set logical provisioning capable device should release space for the zeroed blocks if possible, if it is not set devices should keep the blocks anchored. Also remove an out of sync kerneldoc comment for a static function that would have become even more out of data with this change. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c5bae0a669d1..61339bc44400 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -201,6 +201,10 @@ enum req_flag_bits { __REQ_PREFLUSH, /* request for cache flush */ __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_BACKGROUND, /* background IO */ + + /* command specific flags for REQ_OP_WRITE_ZEROES: */ + __REQ_NOUNMAP, /* do not free blocks when zeroing */ + __REQ_NR_BITS, /* stops here */ }; @@ -218,6 +222,8 @@ enum req_flag_bits { #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) +#define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) + #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) -- cgit v1.2.3 From cb365b9675fda026caba4cb5df83292cb7c0811a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Apr 2017 19:21:10 +0200 Subject: block: add a new BLKDEV_ZERO_NOFALLBACK flag This avoids fallbacks to explicit zeroing in (__)blkdev_issue_zeroout if the caller doesn't want them. Also clean up the convoluted check for the return condition that this new flag is added to. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bd60f4401c9d..21a30f011674 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1350,6 +1350,7 @@ extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, struct bio **biop); #define BLKDEV_ZERO_NOUNMAP (1 << 0) /* do not free blocks */ +#define BLKDEV_ZERO_NOFALLBACK (1 << 1) /* don't write explicit zeroes */ extern int __blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct bio **biop, -- cgit v1.2.3 From 48920ff2a5a940cd07d12cc79e4a2c75f1185aee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 5 Apr 2017 19:21:23 +0200 Subject: block: remove the discard_zeroes_data flag Now that we use the proper REQ_OP_WRITE_ZEROES operation everywhere we can kill this hack. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 15 --------------- include/linux/device-mapper.h | 5 ----- 2 files changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 21a30f011674..ec993573e0a8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -339,7 +339,6 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; unsigned char cluster; - unsigned char discard_zeroes_data; unsigned char raid_partial_stripes_expensive; enum blk_zoned_model zoned; }; @@ -1341,7 +1340,6 @@ extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); #define BLKDEV_DISCARD_SECURE (1 << 0) /* issue a secure erase */ -#define BLKDEV_DISCARD_ZERO (1 << 1) /* must reliably zero data */ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); @@ -1541,19 +1539,6 @@ static inline int bdev_discard_alignment(struct block_device *bdev) return q->limits.discard_alignment; } -static inline unsigned int queue_discard_zeroes_data(struct request_queue *q) -{ - if (q->limits.max_discard_sectors && q->limits.discard_zeroes_data == 1) - return 1; - - return 0; -} - -static inline unsigned int bdev_discard_zeroes_data(struct block_device *bdev) -{ - return queue_discard_zeroes_data(bdev_get_queue(bdev)); -} - static inline unsigned int bdev_write_same(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 3829bee2302a..c7ea33e38fb9 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -296,11 +296,6 @@ struct dm_target { * on max_io_len boundary. */ bool split_discard_bios:1; - - /* - * Set if this target does not return zeroes on discarded blocks. - */ - bool discard_zeroes_data_unsupported:1; }; /* Each target can link one of these into the table */ -- cgit v1.2.3 From 7f564528a480084e2318cd48caba7aef4a54a77f Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Sat, 8 Apr 2017 20:36:24 +0200 Subject: skbuff: Extend gso_type to unsigned int. All available gso_type flags are currently in use, so extend gso_type from 'unsigned short' to 'unsigned int' to be able to add further flags. We reorder the struct skb_shared_info to use two bytes of the four byte hole before dataref. All fields before dataref are cleared, i.e. four bytes more than before the change. The remaining two byte hole is moved to the beginning of the structure, this protects us from immediate overwites on out of bound writes to the sk_buff head. Structure layout on x86-64 before the change: struct skb_shared_info { unsigned char nr_frags; /* 0 1 */ __u8 tx_flags; /* 1 1 */ short unsigned int gso_size; /* 2 2 */ short unsigned int gso_segs; /* 4 2 */ short unsigned int gso_type; /* 6 2 */ struct sk_buff * frag_list; /* 8 8 */ struct skb_shared_hwtstamps hwtstamps; /* 16 8 */ u32 tskey; /* 24 4 */ __be32 ip6_frag_id; /* 28 4 */ atomic_t dataref; /* 32 4 */ /* XXX 4 bytes hole, try to pack */ void * destructor_arg; /* 40 8 */ skb_frag_t frags[17]; /* 48 272 */ /* --- cacheline 5 boundary (320 bytes) --- */ /* size: 320, cachelines: 5, members: 12 */ /* sum members: 316, holes: 1, sum holes: 4 */ }; Structure layout on x86-64 after the change: struct skb_shared_info { short unsigned int _unused; /* 0 2 */ unsigned char nr_frags; /* 2 1 */ __u8 tx_flags; /* 3 1 */ short unsigned int gso_size; /* 4 2 */ short unsigned int gso_segs; /* 6 2 */ struct sk_buff * frag_list; /* 8 8 */ struct skb_shared_hwtstamps hwtstamps; /* 16 8 */ unsigned int gso_type; /* 24 4 */ u32 tskey; /* 28 4 */ __be32 ip6_frag_id; /* 32 4 */ atomic_t dataref; /* 36 4 */ void * destructor_arg; /* 40 8 */ skb_frag_t frags[17]; /* 48 272 */ /* --- cacheline 5 boundary (320 bytes) --- */ /* size: 320, cachelines: 5, members: 13 */ }; Signed-off-by: Steffen Klassert Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c776abd86937..741d75cfc686 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -413,14 +413,15 @@ struct ubuf_info { * the end of the header data, ie. at skb->end. */ struct skb_shared_info { + unsigned short _unused; unsigned char nr_frags; __u8 tx_flags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ unsigned short gso_segs; - unsigned short gso_type; struct sk_buff *frag_list; struct skb_shared_hwtstamps hwtstamps; + unsigned int gso_type; u32 tskey; __be32 ip6_frag_id; -- cgit v1.2.3 From bf74b20d00b13919db7ae5d1015636e76f56f6ae Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sun, 9 Apr 2017 14:45:21 -0700 Subject: Revert "rtnl: Add support for netdev event to link messages" This reverts commit def12888c161e6fec0702e5ec9c3962846e3a21d. As per discussion between Roopa Prabhu and David Ahern, it is advisable that we instead have the code collect the setlink triggered events into a bitmask emitted in the IFLA_EVENT netlink attribute. Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 0459018173cf..57e54847b0b9 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -18,8 +18,7 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, - unsigned change, unsigned long event, - gfp_t flags); + unsigned change, gfp_t flags); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags); -- cgit v1.2.3 From 5812f0106c449533d0eea0b16a6244ec3d6d4abb Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 29 Mar 2017 13:55:57 +0530 Subject: phy: exynos4: Remove duplicated defines of PHY register defines Phy drivers access PMU region through regmap provided by exynos-pmu driver. However there is no need to duplicate defines for PMU registers. Instead just use whatever is defined in exynos-regs-pmu.h. Additionally MIPI PHY registers for Exynos5433 start from the same address as Exynos4 and Exynos5250 so re-use existing defines. This reduces number of defines and allows removal of one header file. Suggested-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Acked-by: Lee Jones Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kishon Vijay Abraham I --- include/linux/mfd/syscon/exynos5-pmu.h | 3 --- include/linux/soc/samsung/exynos-regs-pmu.h | 9 ++++++++- 2 files changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/exynos5-pmu.h b/include/linux/mfd/syscon/exynos5-pmu.h index c28ff21ca4d2..77c93551ee58 100644 --- a/include/linux/mfd/syscon/exynos5-pmu.h +++ b/include/linux/mfd/syscon/exynos5-pmu.h @@ -38,9 +38,6 @@ /* Exynos5433 specific register definitions */ #define EXYNOS5433_USBHOST30_PHY_CONTROL (0x728) -#define EXYNOS5433_MIPI_PHY0_CONTROL (0x710) -#define EXYNOS5433_MIPI_PHY1_CONTROL (0x714) -#define EXYNOS5433_MIPI_PHY2_CONTROL (0x718) #define EXYNOS5_PHY_ENABLE BIT(0) #define EXYNOS5_MIPI_PHY_S_RESETN BIT(1) diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 49df0a01a2cc..e57d75889a09 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2010-2012 Samsung Electronics Co., Ltd. + * Copyright (c) 2010-2015 Samsung Electronics Co., Ltd. * http://www.samsung.com * * EXYNOS - Power management unit definition @@ -50,6 +50,13 @@ #define S5P_WAKEUP_MASK 0x0608 #define S5P_WAKEUP_MASK2 0x0614 +/* MIPI_PHYn_CONTROL, valid for Exynos3250, Exynos4, Exynos5250 and Exynos5433 */ +#define EXYNOS4_MIPI_PHY_CONTROL(n) (0x0710 + (n) * 4) +#define EXYNOS4_MIPI_PHY_ENABLE (1 << 0) +#define EXYNOS4_MIPI_PHY_SRESETN (1 << 1) +#define EXYNOS4_MIPI_PHY_MRESETN (1 << 2) +#define EXYNOS4_MIPI_PHY_RESET_MASK (3 << 1) + #define S5P_INFORM0 0x0800 #define S5P_INFORM1 0x0804 #define S5P_INFORM5 0x0814 -- cgit v1.2.3 From 424c9841480f1761285748b08aa85ac774a30db1 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 14 Mar 2017 18:46:50 +0200 Subject: phy: exynos5: Remove duplicated defines of PHY register defines Phy drivers access PMU region through regmap provided by exynos-pmu driver. However there is no need to duplicate defines for PMU registers. Instead just use whatever is defined in exynos-regs-pmu.h. This reduces number of defines. Suggested-by: Marek Szyprowski Signed-off-by: Krzysztof Kozlowski Acked-by: Lee Jones Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kishon Vijay Abraham I --- include/linux/mfd/syscon/exynos5-pmu.h | 27 --------------------------- include/linux/soc/samsung/exynos-regs-pmu.h | 8 ++++++++ 2 files changed, 8 insertions(+), 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/exynos5-pmu.h b/include/linux/mfd/syscon/exynos5-pmu.h index 77c93551ee58..0a4ddabc395e 100644 --- a/include/linux/mfd/syscon/exynos5-pmu.h +++ b/include/linux/mfd/syscon/exynos5-pmu.h @@ -12,33 +12,6 @@ #ifndef _LINUX_MFD_SYSCON_PMU_EXYNOS5_H_ #define _LINUX_MFD_SYSCON_PMU_EXYNOS5_H_ -/* Exynos5 PMU register definitions */ -#define EXYNOS5_HDMI_PHY_CONTROL (0x700) -#define EXYNOS5_USBDRD_PHY_CONTROL (0x704) - -/* Exynos5250 specific register definitions */ -#define EXYNOS5_USBHOST_PHY_CONTROL (0x708) -#define EXYNOS5_EFNAND_PHY_CONTROL (0x70c) -#define EXYNOS5_MIPI_PHY0_CONTROL (0x710) -#define EXYNOS5_MIPI_PHY1_CONTROL (0x714) -#define EXYNOS5_ADC_PHY_CONTROL (0x718) -#define EXYNOS5_MTCADC_PHY_CONTROL (0x71c) -#define EXYNOS5_DPTX_PHY_CONTROL (0x720) -#define EXYNOS5_SATA_PHY_CONTROL (0x724) - -/* Exynos5420 specific register definitions */ -#define EXYNOS5420_USBDRD1_PHY_CONTROL (0x708) -#define EXYNOS5420_USBHOST_PHY_CONTROL (0x70c) -#define EXYNOS5420_MIPI_PHY0_CONTROL (0x714) -#define EXYNOS5420_MIPI_PHY1_CONTROL (0x718) -#define EXYNOS5420_MIPI_PHY2_CONTROL (0x71c) -#define EXYNOS5420_ADC_PHY_CONTROL (0x720) -#define EXYNOS5420_MTCADC_PHY_CONTROL (0x724) -#define EXYNOS5420_DPTX_PHY_CONTROL (0x728) - -/* Exynos5433 specific register definitions */ -#define EXYNOS5433_USBHOST30_PHY_CONTROL (0x728) - #define EXYNOS5_PHY_ENABLE BIT(0) #define EXYNOS5_MIPI_PHY_S_RESETN BIT(1) #define EXYNOS5_MIPI_PHY_M_RESETN BIT(2) diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index e57d75889a09..4ee54b3fcd57 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -349,6 +349,8 @@ #define EXYNOS5_AUTO_WDTRESET_DISABLE 0x0408 #define EXYNOS5_MASK_WDTRESET_REQUEST 0x040C +#define EXYNOS5_USBDRD_PHY_CONTROL 0x0704 +#define EXYNOS5_DPTX_PHY_CONTROL 0x0720 #define EXYNOS5_USE_RETENTION BIT(4) #define EXYNOS5_SYS_WDTRESET (1 << 20) @@ -502,6 +504,11 @@ #define EXYNOS5420_KFC_CORE_RESET(_nr) \ ((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr)) +#define EXYNOS5420_USBDRD1_PHY_CONTROL 0x0708 +#define EXYNOS5420_MIPI_PHY0_CONTROL 0x0714 +#define EXYNOS5420_MIPI_PHY1_CONTROL 0x0718 +#define EXYNOS5420_MIPI_PHY2_CONTROL 0x071C +#define EXYNOS5420_DPTX_PHY_CONTROL 0x0728 #define EXYNOS5420_ARM_CORE2_SYS_PWR_REG 0x1020 #define EXYNOS5420_DIS_IRQ_ARM_CORE2_LOCAL_SYS_PWR_REG 0x1024 #define EXYNOS5420_DIS_IRQ_ARM_CORE2_CENTRAL_SYS_PWR_REG 0x1028 @@ -639,6 +646,7 @@ | EXYNOS5420_KFC_USE_STANDBY_WFI3) /* For EXYNOS5433 */ +#define EXYNOS5433_USBHOST30_PHY_CONTROL (0x0728) #define EXYNOS5433_PAD_RETENTION_AUD_OPTION (0x3028) #define EXYNOS5433_PAD_RETENTION_MMC2_OPTION (0x30C8) #define EXYNOS5433_PAD_RETENTION_TOP_OPTION (0x3108) -- cgit v1.2.3 From cf09ee599714e630ea610ff4c4fd8c71e2b1f616 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 14 Mar 2017 18:46:51 +0200 Subject: phy: exynos-mipi-video: Use consistent method to address phy registers Exynos4 MIPI phy registers are defined with macro calculating the offset for given phyN. Use the same method for Exynos5420 to be consistent. Signed-off-by: Krzysztof Kozlowski Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kishon Vijay Abraham I --- include/linux/soc/samsung/exynos-regs-pmu.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index 4ee54b3fcd57..c261ed927e1e 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -505,9 +505,7 @@ ((EXYNOS5420_KFC_CORE_RESET0 | EXYNOS5420_KFC_ETM_RESET0) << (_nr)) #define EXYNOS5420_USBDRD1_PHY_CONTROL 0x0708 -#define EXYNOS5420_MIPI_PHY0_CONTROL 0x0714 -#define EXYNOS5420_MIPI_PHY1_CONTROL 0x0718 -#define EXYNOS5420_MIPI_PHY2_CONTROL 0x071C +#define EXYNOS5420_MIPI_PHY_CONTROL(n) (0x0714 + (n) * 4) #define EXYNOS5420_DPTX_PHY_CONTROL 0x0728 #define EXYNOS5420_ARM_CORE2_SYS_PWR_REG 0x1020 #define EXYNOS5420_DIS_IRQ_ARM_CORE2_LOCAL_SYS_PWR_REG 0x1024 -- cgit v1.2.3 From 7a66647b25b68c2a2da51bc9845fc91dd27529a9 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 29 Mar 2017 13:59:57 +0530 Subject: phy: exynos: Use one define for enable bit There is no need for separate defines for Exynos4 and Exynos5 phy enable bit and MIPI phy reset bits. In both cases there are the same so simplify it. This reduces number of defines and allows removal of one header file. Signed-off-by: Krzysztof Kozlowski Acked-by: Lee Jones Reviewed-by: Bartlomiej Zolnierkiewicz Signed-off-by: Kishon Vijay Abraham I --- include/linux/soc/samsung/exynos-regs-pmu.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/samsung/exynos-regs-pmu.h b/include/linux/soc/samsung/exynos-regs-pmu.h index c261ed927e1e..bebdde5dccd6 100644 --- a/include/linux/soc/samsung/exynos-regs-pmu.h +++ b/include/linux/soc/samsung/exynos-regs-pmu.h @@ -52,7 +52,8 @@ /* MIPI_PHYn_CONTROL, valid for Exynos3250, Exynos4, Exynos5250 and Exynos5433 */ #define EXYNOS4_MIPI_PHY_CONTROL(n) (0x0710 + (n) * 4) -#define EXYNOS4_MIPI_PHY_ENABLE (1 << 0) +/* Phy enable bit, common for all phy registers, not only MIPI */ +#define EXYNOS4_PHY_ENABLE (1 << 0) #define EXYNOS4_MIPI_PHY_SRESETN (1 << 1) #define EXYNOS4_MIPI_PHY_MRESETN (1 << 2) #define EXYNOS4_MIPI_PHY_RESET_MASK (3 << 1) -- cgit v1.2.3 From f437a3f477cce402dbec6537b29e9e33962c9f73 Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 6 Apr 2017 16:16:11 +0800 Subject: crypto: api - Extend algorithm name limit to 128 bytes With the new explicit IV generators, we may now exceed the 64-byte length limit on the algorithm name, e.g., with echainiv(authencesn(hmac(sha256-generic),cbc(des3_ede-generic))) This patch extends the length limit to 128 bytes. Reported-by: Alexander Sverdlin Signed-off-by: Herbert Xu Acked-by: Alexander Sverdlin Tested-by: Alexander Sverdlin --- include/linux/crypto.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index c0b0cf3d2d2f..84da9978e951 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -123,7 +123,7 @@ /* * Miscellaneous stuff. */ -#define CRYPTO_MAX_ALG_NAME 64 +#define CRYPTO_MAX_ALG_NAME 128 /* * The macro CRYPTO_MINALIGN_ATTR (along with the void * type in the actual -- cgit v1.2.3 From db47d5f856467ce0dd3af7e20a33df3d901266df Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 25 Jan 2017 20:30:29 +0100 Subject: x86/nmi, EDAC: Get rid of DRAM error reporting thru PCI SERR NMI Apparently, some machines used to report DRAM errors through a PCI SERR NMI. This is why we have a call into EDAC in the NMI handler. See c0d121720220 ("drivers/edac: add new nmi rescan"). From looking at the patch above, that's two drivers: e752x_edac.c and e7xxx_edac.c. Now, I wanna say those are old machines which are probably decommissioned already. Tony says that "[t]the newest CPU supported by either of those drivers is the Xeon E7520 (a.k.a. "Nehalem") released in Q1'2010. Possibly some folks are still using these ... but people that hold onto h/w for 7 years generally cling to old s/w too ... so I'd guess it unlikely that we will get complaints for breaking these in upstream." So even if there is a small number still in use, we did load EDAC with edac_op_state == EDAC_OPSTATE_POLL by default (we still do, in fact) which means a default EDAC setup without any parameters supplied on the command line or otherwise would never even log the error in the NMI handler because we're polling by default: inline int edac_handler_set(void) { if (edac_op_state == EDAC_OPSTATE_POLL) return 0; return atomic_read(&edac_handlers); } So, long story short, I'd like to get rid of that nastiness called edac_stub.c and confine all the EDAC drivers solely to drivers/edac/. If we ever have to do stuff like that again, it should be notifiers we're using and not some insanity like this one. Signed-off-by: Borislav Petkov Acked-by: Thomas Gleixner Cc: Tony Luck --- include/linux/edac.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 5b6adf964248..bf8daabf3d51 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -31,8 +31,6 @@ extern int edac_op_state; extern int edac_err_assert; extern atomic_t edac_handlers; -extern int edac_handler_set(void); -extern void edac_atomic_assert_error(void); extern struct bus_type *edac_get_sysfs_subsys(void); enum { -- cgit v1.2.3 From 97bb6c17ad5a0892beb45070dfe8c7d6d0e5326e Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 26 Jan 2017 16:49:59 +0100 Subject: EDAC: Get rid of edac_handlers Use mc_devices list instead to check whether we have EDAC driver instances successfully registered with EDAC core. Signed-off-by: Borislav Petkov --- include/linux/edac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index bf8daabf3d51..9fd6fe53ab2a 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -29,7 +29,6 @@ struct device; extern int edac_op_state; extern int edac_err_assert; -extern atomic_t edac_handlers; extern struct bus_type *edac_get_sysfs_subsys(void); -- cgit v1.2.3 From d3116a0837261405e0febb8043fe7040c8ebccb4 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Thu, 26 Jan 2017 18:25:11 +0100 Subject: EDAC: Remove edac_err_assert ... and the glue around it. It is not needed anymore. Signed-off-by: Borislav Petkov --- include/linux/edac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index 9fd6fe53ab2a..c55e93975079 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -28,7 +28,6 @@ struct device; #define EDAC_OPSTATE_INT 2 extern int edac_op_state; -extern int edac_err_assert; extern struct bus_type *edac_get_sysfs_subsys(void); -- cgit v1.2.3 From fee27d7d97886515a60cce38b4152b7f5b5a21fc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 4 Feb 2017 17:42:03 +0100 Subject: EDAC: Delete edac_stub.c Move the remaining functionality to edac_mc.c. Convert "edac_report=" to a module parameter. Signed-off-by: Borislav Petkov --- include/linux/edac.h | 26 +++----------------------- 1 file changed, 3 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index c55e93975079..faf87e1eca21 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -29,7 +29,9 @@ struct device; extern int edac_op_state; -extern struct bus_type *edac_get_sysfs_subsys(void); +struct bus_type *edac_get_sysfs_subsys(void); +int get_edac_report_status(void); +void set_edac_report_status(int new); enum { EDAC_REPORTING_ENABLED, @@ -37,28 +39,6 @@ enum { EDAC_REPORTING_FORCE }; -extern int edac_report_status; -#ifdef CONFIG_EDAC -static inline int get_edac_report_status(void) -{ - return edac_report_status; -} - -static inline void set_edac_report_status(int new) -{ - edac_report_status = new; -} -#else -static inline int get_edac_report_status(void) -{ - return EDAC_REPORTING_DISABLED; -} - -static inline void set_edac_report_status(int new) -{ -} -#endif - static inline void opstate_init(void) { switch (edac_op_state) { -- cgit v1.2.3 From bffc7dece92edd0b6445b76a378e2fa9e324c7ed Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Sat, 4 Feb 2017 18:10:14 +0100 Subject: EDAC: Rename report status accessors Change them to have the edac_ prefix. No functionality change. Signed-off-by: Borislav Petkov --- include/linux/edac.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index faf87e1eca21..8ae0f45fafd6 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -30,8 +30,8 @@ struct device; extern int edac_op_state; struct bus_type *edac_get_sysfs_subsys(void); -int get_edac_report_status(void); -void set_edac_report_status(int new); +int edac_get_report_status(void); +void edac_set_report_status(int new); enum { EDAC_REPORTING_ENABLED, -- cgit v1.2.3 From 9dd813c15b2c101168808d4f5941a29985758973 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 14 Mar 2017 12:31:02 +0100 Subject: fsnotify: Move mark list head from object into dedicated structure Currently notification marks are attached to object (inode or vfsmnt) by a hlist_head in the object. The list is also protected by a spinlock in the object. So while there is any mark attached to the list of marks, the object must be pinned in memory (and thus e.g. last iput() deleting inode cannot happen). Also for list iteration in fsnotify() to work, we must hold fsnotify_mark_srcu lock so that mark itself and mark->obj_list.next cannot get freed. Thus we are required to wait for response to fanotify events from userspace process with fsnotify_mark_srcu lock held. That causes issues when userspace process is buggy and does not reply to some event - basically the whole notification subsystem gets eventually stuck. So to be able to drop fsnotify_mark_srcu lock while waiting for response, we have to pin the mark in memory and make sure it stays in the object list (as removing the mark waiting for response could lead to lost notification events for groups later in the list). However we don't want inode reclaim to block on such mark as that would lead to system just locking up elsewhere. This commit is the first in the series that paves way towards solving these conflicting lifetime needs. Instead of anchoring the list of marks directly in the object, we anchor it in a dedicated structure (fsnotify_mark_connector) and just point to that structure from the object. The following commits will also add spinlock protecting the list and object pointer to the structure. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fs.h | 4 +++- include/linux/fsnotify_backend.h | 10 ++++++++++ 2 files changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7251f7bb45e8..66e52342be2d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -546,6 +546,8 @@ is_uncached_acl(struct posix_acl *acl) #define IOP_XATTR 0x0008 #define IOP_DEFAULT_READLINK 0x0010 +struct fsnotify_mark_connector; + /* * Keep mostly read-only and often accessed (especially for * the RCU path lookup and 'stat' data) fields at the beginning @@ -645,7 +647,7 @@ struct inode { #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ - struct hlist_head i_fsnotify_marks; + struct fsnotify_mark_connector *i_fsnotify_marks; #endif #if IS_ENABLED(CONFIG_FS_ENCRYPTION) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index e6e689b5569e..8b63085f8855 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -194,6 +194,15 @@ struct fsnotify_group { #define FSNOTIFY_EVENT_PATH 1 #define FSNOTIFY_EVENT_INODE 2 +/* + * Inode / vfsmount point to this structure which tracks all marks attached to + * the inode / vfsmount. The structure is freed only when inode / vfsmount gets + * freed. + */ +struct fsnotify_mark_connector { + struct hlist_head list; +}; + /* * A mark is simply an object attached to an in core inode which allows an * fsnotify listener to indicate they are either no longer interested in events @@ -346,6 +355,7 @@ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the marks where mark->flags & flags is true*/ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags); +extern void fsnotify_connector_free(struct fsnotify_mark_connector **connp); extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_unmount_inodes(struct super_block *sb); -- cgit v1.2.3 From 86ffe245c430f07f95d5d28d3b694ea72f4492e7 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 14 Mar 2017 14:29:35 +0100 Subject: fsnotify: Move object pointer to fsnotify_mark_connector Move pointer to inode / vfsmount from mark itself to the fsnotify_mark_connector structure. This is another step on the path towards decoupling inode / vfsmount lifetime from notification mark lifetime. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 8b63085f8855..06f9a2cc1463 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -200,6 +200,13 @@ struct fsnotify_group { * freed. */ struct fsnotify_mark_connector { +#define FSNOTIFY_OBJ_TYPE_INODE 0x01 +#define FSNOTIFY_OBJ_TYPE_VFSMOUNT 0x02 + unsigned int flags; /* Type of object [lock] */ + union { /* Object pointer [lock] */ + struct inode *inode; + struct vfsmount *mnt; + }; struct hlist_head list; }; @@ -234,14 +241,10 @@ struct fsnotify_mark { spinlock_t lock; /* List of marks for inode / vfsmount [obj_lock] */ struct hlist_node obj_list; - union { /* Object pointer [mark->lock, group->mark_mutex] */ - struct inode *inode; /* inode this mark is associated with */ - struct vfsmount *mnt; /* vfsmount this mark is associated with */ - }; + /* Head of list of marks for an object [mark->lock, group->mark_mutex] */ + struct fsnotify_mark_connector *connector; /* Events types to ignore [mark->lock, group->mark_mutex] */ __u32 ignored_mask; -#define FSNOTIFY_MARK_FLAG_INODE 0x01 -#define FSNOTIFY_MARK_FLAG_VFSMOUNT 0x02 #define FSNOTIFY_MARK_FLAG_OBJECT_PINNED 0x04 #define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08 #define FSNOTIFY_MARK_FLAG_ALIVE 0x10 @@ -353,7 +356,7 @@ extern void fsnotify_free_mark(struct fsnotify_mark *mark); extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the inode marks */ extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group); -/* run all the marks in a group, and clear all of the marks where mark->flags & flags is true*/ +/* run all the marks in a group, and clear all of the marks attached to given object type */ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags); extern void fsnotify_connector_free(struct fsnotify_mark_connector **connp); extern void fsnotify_get_mark(struct fsnotify_mark *mark); -- cgit v1.2.3 From e911d8af87dba7642138f4320ca3db80629989f2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 14 Mar 2017 14:48:00 +0100 Subject: fsnotify: Make fsnotify_mark_connector hold inode reference Currently inode reference is held by fsnotify marks. Change the rules so that inode reference is held by fsnotify_mark_connector structure whenever the list is non-empty. This simplifies the code and is more logical. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 06f9a2cc1463..96333fb09309 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -196,8 +196,9 @@ struct fsnotify_group { /* * Inode / vfsmount point to this structure which tracks all marks attached to - * the inode / vfsmount. The structure is freed only when inode / vfsmount gets - * freed. + * the inode / vfsmount. The reference to inode / vfsmount is held by this + * structure whenever the list is non-empty. The structure is freed only when + * inode / vfsmount gets freed. */ struct fsnotify_mark_connector { #define FSNOTIFY_OBJ_TYPE_INODE 0x01 @@ -245,10 +246,9 @@ struct fsnotify_mark { struct fsnotify_mark_connector *connector; /* Events types to ignore [mark->lock, group->mark_mutex] */ __u32 ignored_mask; -#define FSNOTIFY_MARK_FLAG_OBJECT_PINNED 0x04 -#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x08 -#define FSNOTIFY_MARK_FLAG_ALIVE 0x10 -#define FSNOTIFY_MARK_FLAG_ATTACHED 0x20 +#define FSNOTIFY_MARK_FLAG_IGNORED_SURV_MODIFY 0x01 +#define FSNOTIFY_MARK_FLAG_ALIVE 0x02 +#define FSNOTIFY_MARK_FLAG_ATTACHED 0x04 unsigned int flags; /* flags [mark->lock] */ void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */ }; -- cgit v1.2.3 From a242677bb1e6faa9bd82bd33afb2621071258231 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Mar 2017 09:16:27 +0100 Subject: fsnotify: Move locking into fsnotify_recalc_mask() Move locking of locks protecting a list of marks into fsnotify_recalc_mask(). This reduces code churn in the following patch which changes the lock protecting the list of marks. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 96333fb09309..b954f1b2571c 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -327,6 +327,8 @@ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group /* functions used to manipulate the marks attached to inodes */ +/* Calculate mask of events for a list of marks */ +extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); /* run all marks associated with a vfsmount and update mnt->mnt_fsnotify_mask */ extern void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt); /* run all marks associated with an inode and update inode->i_fsnotify_mask */ -- cgit v1.2.3 From 04662cab59fc3e8421fd7a0539d304d51d2750a4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Feb 2017 08:19:43 +0100 Subject: fsnotify: Lock object list with connector lock So far list of marks attached to an object (inode / vfsmount) was protected by i_lock or mnt_root->d_lock. This dictates that the list must be empty before the object can be destroyed although the list is now anchored in the fsnotify_mark_connector structure. Protect the list by a spinlock in the fsnotify_mark_connector structure to decouple lifetime of a list of marks from a lifetime of the object. This also simplifies the code quite a bit since we don't have to differentiate between inode and vfsmount lists in quite a few places anymore. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index b954f1b2571c..02c6fac652a4 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -201,6 +201,7 @@ struct fsnotify_group { * inode / vfsmount gets freed. */ struct fsnotify_mark_connector { + spinlock_t lock; #define FSNOTIFY_OBJ_TYPE_INODE 0x01 #define FSNOTIFY_OBJ_TYPE_VFSMOUNT 0x02 unsigned int flags; /* Type of object [lock] */ @@ -240,7 +241,7 @@ struct fsnotify_mark { struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; - /* List of marks for inode / vfsmount [obj_lock] */ + /* List of marks for inode / vfsmount [connector->lock] */ struct hlist_node obj_list; /* Head of list of marks for an object [mark->lock, group->mark_mutex] */ struct fsnotify_mark_connector *connector; -- cgit v1.2.3 From 08991e83b7286635167bab40927665a90fb00d81 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Feb 2017 09:21:58 +0100 Subject: fsnotify: Free fsnotify_mark_connector when there is no mark attached Currently we free fsnotify_mark_connector structure only when inode / vfsmount is getting freed. This can however impose noticeable memory overhead when marks get attached to inodes only temporarily. So free the connector structure once the last mark is detached from the object. Since notification infrastructure can be working with the connector under the protection of fsnotify_mark_srcu, we have to be careful and free the fsnotify_mark_connector only after SRCU period passes. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fs.h | 2 +- include/linux/fsnotify_backend.h | 11 +++++++---- 2 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 66e52342be2d..c0b6150c5fcc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -647,7 +647,7 @@ struct inode { #ifdef CONFIG_FSNOTIFY __u32 i_fsnotify_mask; /* all events this inode cares about */ - struct fsnotify_mark_connector *i_fsnotify_marks; + struct fsnotify_mark_connector __rcu *i_fsnotify_marks; #endif #if IS_ENABLED(CONFIG_FS_ENCRYPTION) diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 02c6fac652a4..84d71b6f75f6 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -197,8 +197,8 @@ struct fsnotify_group { /* * Inode / vfsmount point to this structure which tracks all marks attached to * the inode / vfsmount. The reference to inode / vfsmount is held by this - * structure whenever the list is non-empty. The structure is freed only when - * inode / vfsmount gets freed. + * structure. We destroy this structure when there are no more marks attached + * to it. The structure is protected by fsnotify_mark_srcu. */ struct fsnotify_mark_connector { spinlock_t lock; @@ -209,7 +209,11 @@ struct fsnotify_mark_connector { struct inode *inode; struct vfsmount *mnt; }; - struct hlist_head list; + union { + struct hlist_head list; + /* Used listing heads to free after srcu period expires */ + struct fsnotify_mark_connector *destroy_next; + }; }; /* @@ -361,7 +365,6 @@ extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the marks attached to given object type */ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags); -extern void fsnotify_connector_free(struct fsnotify_mark_connector **connp); extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_unmount_inodes(struct super_block *sb); -- cgit v1.2.3 From 6b3f05d24d355f50f3d9814304650fcab0efb482 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2016 12:15:30 +0100 Subject: fsnotify: Detach mark from object list when last reference is dropped Instead of removing mark from object list from fsnotify_detach_mark(), remove the mark when last reference to the mark is dropped. This will allow fanotify to wait for userspace response to event without having to hold onto fsnotify_mark_srcu. To avoid pinning inodes by elevated refcount (and thus e.g. delaying file deletion) while someone holds mark reference, we detach connector from the object also from fsnotify_destroy_marks() and not only after removing last mark from the list as it was now. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 84d71b6f75f6..a483614b25d0 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -245,9 +245,9 @@ struct fsnotify_mark { struct list_head g_list; /* Protects inode / mnt pointers, flags, masks */ spinlock_t lock; - /* List of marks for inode / vfsmount [connector->lock] */ + /* List of marks for inode / vfsmount [connector->lock, mark ref] */ struct hlist_node obj_list; - /* Head of list of marks for an object [mark->lock, group->mark_mutex] */ + /* Head of list of marks for an object [mark ref] */ struct fsnotify_mark_connector *connector; /* Events types to ignore [mark->lock, group->mark_mutex] */ __u32 ignored_mask; -- cgit v1.2.3 From abc77577a669f424c5d0c185b9994f2621c52aa4 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 10 Nov 2016 16:02:11 +0100 Subject: fsnotify: Provide framework for dropping SRCU lock in ->handle_event fanotify wants to drop fsnotify_mark_srcu lock when waiting for response from userspace so that the whole notification subsystem is not blocked during that time. This patch provides a framework for safely getting mark reference for a mark found in the object list which pins the mark in that list. We can then drop fsnotify_mark_srcu, wait for userspace response and then safely continue iteration of the object list once we reaquire fsnotify_mark_srcu. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index a483614b25d0..5bb6d988b9f6 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -80,6 +80,7 @@ struct fsnotify_event; struct fsnotify_mark; struct fsnotify_event_private_data; struct fsnotify_fname; +struct fsnotify_iter_info; /* * Each group much define these ops. The fsnotify infrastructure will call @@ -163,6 +164,8 @@ struct fsnotify_group { struct fsnotify_event *overflow_event; /* Event we queue when the * notification list is too * full */ + atomic_t user_waits; /* Number of tasks waiting for user + * response */ /* groups can define private fields here or use the void *private */ union { @@ -368,6 +371,8 @@ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, un extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_unmount_inodes(struct super_block *sb); +extern void fsnotify_finish_user_wait(struct fsnotify_iter_info *iter_info); +extern bool fsnotify_prepare_user_wait(struct fsnotify_iter_info *iter_info); /* put here because inotify does some weird stuff when destroying watches */ extern void fsnotify_init_event(struct fsnotify_event *event, -- cgit v1.2.3 From 9385a84d7e1f658bb2d96ab798393e4b16268aaa Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Thu, 10 Nov 2016 17:51:50 +0100 Subject: fsnotify: Pass fsnotify_iter_info into handle_event handler Pass fsnotify_iter_info into ->handle_event() handler so that it can release and reacquire SRCU lock via fsnotify_prepare_user_wait() and fsnotify_finish_user_wait() functions. These functions also make sure current marks are appropriately pinned so that iteration protected by srcu in fsnotify() stays safe. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 5bb6d988b9f6..744a4b9076f9 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -99,7 +99,8 @@ struct fsnotify_ops { struct fsnotify_mark *inode_mark, struct fsnotify_mark *vfsmount_mark, u32 mask, const void *data, int data_type, - const unsigned char *file_name, u32 cookie); + const unsigned char *file_name, u32 cookie, + struct fsnotify_iter_info *iter_info); void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); -- cgit v1.2.3 From 66d2b81bcb92c14b22a56a9ff936f2b40accc83c Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2016 16:03:59 +0100 Subject: fsnotify: Remove fsnotify_set_mark_{,ignored_}mask_locked() These helpers are now only a simple assignment and just obfuscate what is going on. Remove them. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 744a4b9076f9..63354cd86a7b 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -347,10 +347,6 @@ extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(str extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode); /* find (and take a reference) to a mark associated with group and vfsmount */ extern struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt); -/* set the ignored_mask of a mark */ -extern void fsnotify_set_mark_ignored_mask_locked(struct fsnotify_mark *mark, __u32 mask); -/* set the mask of a mark (might pin the object into memory */ -extern void fsnotify_set_mark_mask_locked(struct fsnotify_mark *mark, __u32 mask); /* attach the mark to both the group and the inode */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, struct vfsmount *mnt, int allow_dups); -- cgit v1.2.3 From 8920d2734d9a1b68e1b53d8c12b289773cdbd971 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2016 16:13:54 +0100 Subject: fsnotify: Remove fsnotify_recalc_{inode|vfsmount}_mask() These helpers are just very thin wrappers now. Remove them. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 63354cd86a7b..6d09c6ff9810 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -338,10 +338,6 @@ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group /* Calculate mask of events for a list of marks */ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); -/* run all marks associated with a vfsmount and update mnt->mnt_fsnotify_mask */ -extern void fsnotify_recalc_vfsmount_mask(struct vfsmount *mnt); -/* run all marks associated with an inode and update inode->i_fsnotify_mask */ -extern void fsnotify_recalc_inode_mask(struct inode *inode); extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(struct fsnotify_mark *mark)); /* find (and take a reference) to a mark associated with group and inode */ extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode); -- cgit v1.2.3 From 416bcdbcbbb4800f11f03e8baf570f9996219f67 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2016 16:20:32 +0100 Subject: fsnotify: Inline fsnotify_clear_{inode|vfsmount}_mark_group() Inline these helpers as they are very thin. We still keep them as we don't want to expose details about how list type is determined. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 6d09c6ff9810..700b4fa991d4 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -355,12 +355,18 @@ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, extern void fsnotify_detach_mark(struct fsnotify_mark *mark); /* free mark */ extern void fsnotify_free_mark(struct fsnotify_mark *mark); -/* run all the marks in a group, and clear all of the vfsmount marks */ -extern void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group); -/* run all the marks in a group, and clear all of the inode marks */ -extern void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group); /* run all the marks in a group, and clear all of the marks attached to given object type */ extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags); +/* run all the marks in a group, and clear all of the vfsmount marks */ +static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); +} +/* run all the marks in a group, and clear all of the inode marks */ +static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) +{ + fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_OBJ_TYPE_INODE); +} extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); extern void fsnotify_unmount_inodes(struct super_block *sb); -- cgit v1.2.3 From 18f2e0d3a43641889ac2ba9d7508d47359eec063 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Jan 2017 10:33:18 +0100 Subject: fsnotify: Rename fsnotify_clear_marks_by_group_flags() The _flags() suffix in the function name was more confusing than explaining so just remove it. Also rename the argument from 'flags' to 'type' to better explain what the function expects. Reviewed-by: Miklos Szeredi Suggested-by: Amir Goldstein Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 700b4fa991d4..d6bbd5acdac1 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -356,16 +356,16 @@ extern void fsnotify_detach_mark(struct fsnotify_mark *mark); /* free mark */ extern void fsnotify_free_mark(struct fsnotify_mark *mark); /* run all the marks in a group, and clear all of the marks attached to given object type */ -extern void fsnotify_clear_marks_by_group_flags(struct fsnotify_group *group, unsigned int flags); +extern void fsnotify_clear_marks_by_group(struct fsnotify_group *group, unsigned int type); /* run all the marks in a group, and clear all of the vfsmount marks */ static inline void fsnotify_clear_vfsmount_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_VFSMOUNT); } /* run all the marks in a group, and clear all of the inode marks */ static inline void fsnotify_clear_inode_marks_by_group(struct fsnotify_group *group) { - fsnotify_clear_marks_by_group_flags(group, FSNOTIFY_OBJ_TYPE_INODE); + fsnotify_clear_marks_by_group(group, FSNOTIFY_OBJ_TYPE_INODE); } extern void fsnotify_get_mark(struct fsnotify_mark *mark); extern void fsnotify_put_mark(struct fsnotify_mark *mark); -- cgit v1.2.3 From 2e37c6ca8d76c362e844c0cf3ebe8ba2e27940cb Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 4 Jan 2017 10:51:58 +0100 Subject: fsnotify: Remove fsnotify_detach_group_marks() The function is already mostly contained in what fsnotify_clear_marks_by_group() does. Just update that function to not select marks when all of them should be destroyed and remove fsnotify_detach_group_marks(). Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index d6bbd5acdac1..7287cba42a66 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -208,6 +208,8 @@ struct fsnotify_mark_connector { spinlock_t lock; #define FSNOTIFY_OBJ_TYPE_INODE 0x01 #define FSNOTIFY_OBJ_TYPE_VFSMOUNT 0x02 +#define FSNOTIFY_OBJ_ALL_TYPES (FSNOTIFY_OBJ_TYPE_INODE | \ + FSNOTIFY_OBJ_TYPE_VFSMOUNT) unsigned int flags; /* Type of object [lock] */ union { /* Object pointer [lock] */ struct inode *inode; -- cgit v1.2.3 From b1362edfe15b20edd3d116cec521aa420b7afb98 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2016 16:28:45 +0100 Subject: fsnotify: Remove fsnotify_find_{inode|vfsmount}_mark() These are very thin wrappers, just remove them. Drop fs/notify/vfsmount_mark.c as it is empty now. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 7287cba42a66..2ef0e04c5a9d 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -341,10 +341,10 @@ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group /* Calculate mask of events for a list of marks */ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(struct fsnotify_mark *mark)); -/* find (and take a reference) to a mark associated with group and inode */ -extern struct fsnotify_mark *fsnotify_find_inode_mark(struct fsnotify_group *group, struct inode *inode); -/* find (and take a reference) to a mark associated with group and vfsmount */ -extern struct fsnotify_mark *fsnotify_find_vfsmount_mark(struct fsnotify_group *group, struct vfsmount *mnt); +/* Find mark belonging to given group in the list of marks */ +extern struct fsnotify_mark *fsnotify_find_mark( + struct fsnotify_mark_connector __rcu **connp, + struct fsnotify_group *group); /* attach the mark to both the group and the inode */ extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, struct inode *inode, struct vfsmount *mnt, int allow_dups); -- cgit v1.2.3 From 7b1293234084ddb6469c4e9a5ef818f399b5786b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2016 18:32:48 +0100 Subject: fsnotify: Add group pointer in fsnotify_init_mark() Currently we initialize mark->group only in fsnotify_add_mark_lock(). However we will need to access fsnotify_ops of corresponding group from fsnotify_put_mark() so we need mark->group initialized earlier. Do that in fsnotify_init_mark() which has a consequence that once fsnotify_init_mark() is called on a mark, the mark has to be destroyed by fsnotify_put_mark(). Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 2ef0e04c5a9d..a64518e36bd5 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -340,15 +340,17 @@ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group /* Calculate mask of events for a list of marks */ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); -extern void fsnotify_init_mark(struct fsnotify_mark *mark, void (*free_mark)(struct fsnotify_mark *mark)); +extern void fsnotify_init_mark(struct fsnotify_mark *mark, + struct fsnotify_group *group, + void (*free_mark)(struct fsnotify_mark *mark)); /* Find mark belonging to given group in the list of marks */ extern struct fsnotify_mark *fsnotify_find_mark( struct fsnotify_mark_connector __rcu **connp, struct fsnotify_group *group); -/* attach the mark to both the group and the inode */ -extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct fsnotify_group *group, - struct inode *inode, struct vfsmount *mnt, int allow_dups); -extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct fsnotify_group *group, +/* attach the mark to the inode or vfsmount */ +extern int fsnotify_add_mark(struct fsnotify_mark *mark, struct inode *inode, + struct vfsmount *mnt, int allow_dups); +extern int fsnotify_add_mark_locked(struct fsnotify_mark *mark, struct inode *inode, struct vfsmount *mnt, int allow_dups); /* given a group and a mark, flag mark to be freed when all references are dropped */ extern void fsnotify_destroy_mark(struct fsnotify_mark *mark, -- cgit v1.2.3 From 054c636e5c8054884ede889be82ce059879945e6 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 21 Dec 2016 18:06:12 +0100 Subject: fsnotify: Move ->free_mark callback to fsnotify_ops Pointer to ->free_mark callback unnecessarily occupies one long in each fsnotify_mark although they are the same for all marks from one notification group. Move the callback pointer to fsnotify_ops. Reviewed-by: Miklos Szeredi Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index a64518e36bd5..c6c69318752b 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -104,6 +104,8 @@ struct fsnotify_ops { void (*free_group_priv)(struct fsnotify_group *group); void (*freeing_mark)(struct fsnotify_mark *mark, struct fsnotify_group *group); void (*free_event)(struct fsnotify_event *event); + /* called on final put+free to free memory */ + void (*free_mark)(struct fsnotify_mark *mark); }; /* @@ -261,7 +263,6 @@ struct fsnotify_mark { #define FSNOTIFY_MARK_FLAG_ALIVE 0x02 #define FSNOTIFY_MARK_FLAG_ATTACHED 0x04 unsigned int flags; /* flags [mark->lock] */ - void (*free_mark)(struct fsnotify_mark *mark); /* called on final put+free */ }; #ifdef CONFIG_FSNOTIFY @@ -341,8 +342,7 @@ extern struct fsnotify_event *fsnotify_remove_first_event(struct fsnotify_group /* Calculate mask of events for a list of marks */ extern void fsnotify_recalc_mask(struct fsnotify_mark_connector *conn); extern void fsnotify_init_mark(struct fsnotify_mark *mark, - struct fsnotify_group *group, - void (*free_mark)(struct fsnotify_mark *mark)); + struct fsnotify_group *group); /* Find mark belonging to given group in the list of marks */ extern struct fsnotify_mark *fsnotify_find_mark( struct fsnotify_mark_connector __rcu **connp, -- cgit v1.2.3 From 0c08aaf873174c95e674cf21ffcd041c589d2e5b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Vincent=20Stehl=C3=A9?= Date: Sun, 9 Apr 2017 22:05:05 +0200 Subject: regulator: isl9305: fix array size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit ISL9305_MAX_REGULATOR is the last index used to access the init_data[] array, so we need to add one to this last index to obtain the necessary array size. This fixes the following smatch error: drivers/regulator/isl9305.c:160 isl9305_i2c_probe() error: buffer overflow 'pdata->init_data' 3 <= 3 Fixes: dec38b5ce6a9edb4 ("regulator: isl9305: Add Intersil ISL9305/H driver") Signed-off-by: Vincent Stehlé Cc: Mark Brown Signed-off-by: Mark Brown --- include/linux/platform_data/isl9305.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/isl9305.h b/include/linux/platform_data/isl9305.h index 1419133fa69e..4ac1a070af0a 100644 --- a/include/linux/platform_data/isl9305.h +++ b/include/linux/platform_data/isl9305.h @@ -24,7 +24,7 @@ struct regulator_init_data; struct isl9305_pdata { - struct regulator_init_data *init_data[ISL9305_MAX_REGULATOR]; + struct regulator_init_data *init_data[ISL9305_MAX_REGULATOR + 1]; }; #endif -- cgit v1.2.3 From 5367278cb7ba74537bcad1470d75f30d95b09c14 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 6 Apr 2017 12:26:20 -0400 Subject: tracing: Add stack_tracer_disable/enable() functions There are certain parts of the kernel that cannot let stack tracing proceed (namely in RCU), because the stack tracer uses RCU, and parts of RCU internals cannot handle having RCU read side locks taken. Add stack_tracer_disable() and stack_tracer_enable() functions to let RCU stop stack tracing on the current CPU when it is in those critical sections. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index ef7123219f14..7b4e6572ab21 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -286,6 +286,12 @@ int stack_trace_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); + +void stack_tracer_disable(void); +void stack_tracer_enable(void); +#else +static inline void stack_tracer_disable(void) { } +static inline void stack_tracer_enable(void) { } #endif struct ftrace_func_command { -- cgit v1.2.3 From 8aaf1ee70e19ac74cbbb81098edfa328d1ab4bd7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Thu, 6 Apr 2017 15:47:32 -0400 Subject: tracing: Rename trace_active to disable_stack_tracer and inline its modification In order to eliminate a function call, make "trace_active" into "disable_stack_tracer" and convert stack_tracer_disable() and friends into static inline functions. Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 36 ++++++++++++++++++++++++++++++++++-- 1 file changed, 34 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 7b4e6572ab21..06b2990a35e4 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -287,8 +287,40 @@ stack_trace_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); -void stack_tracer_disable(void); -void stack_tracer_enable(void); +/* DO NOT MODIFY THIS VARIABLE DIRECTLY! */ +DECLARE_PER_CPU(int, disable_stack_tracer); + +/** + * stack_tracer_disable - temporarily disable the stack tracer + * + * There's a few locations (namely in RCU) where stack tracing + * cannot be executed. This function is used to disable stack + * tracing during those critical sections. + * + * This function must be called with preemption or interrupts + * disabled and stack_tracer_enable() must be called shortly after + * while preemption or interrupts are still disabled. + */ +static inline void stack_tracer_disable(void) +{ + /* Preemption or interupts must be disabled */ + if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); + this_cpu_inc(disable_stack_tracer); +} + +/** + * stack_tracer_enable - re-enable the stack tracer + * + * After stack_tracer_disable() is called, stack_tracer_enable() + * must be called shortly afterward. + */ +static inline void stack_tracer_enable(void) +{ + if (IS_ENABLED(CONFIG_PREEMPT_DEBUG)) + WARN_ON_ONCE(!preempt_count() || !irqs_disabled()); + this_cpu_dec(disable_stack_tracer); +} #else static inline void stack_tracer_disable(void) { } static inline void stack_tracer_enable(void) { } -- cgit v1.2.3 From 03ecd3f48e57f2e6154584e0ee7450d7a05e2d3b Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 7 Apr 2017 12:20:36 -0400 Subject: rcu/tracing: Add rcu_disabled to denote when rcu_irq_enter() will not work Tracing uses rcu_irq_enter() as a way to make sure that RCU is watching when it needs to use rcu_read_lock() and friends. This is because tracing can happen as RCU is about to enter user space, or about to go idle, and RCU does not watch for RCU read side critical sections as it makes the transition. There is a small location within the RCU infrastructure that rcu_irq_enter() itself will not work. If tracing were to occur in that section it will break if it tries to use rcu_irq_enter(). Originally, this happens with the stack_tracer, because it will call save_stack_trace when it encounters stack usage that is greater than any stack usage it had encountered previously. There was a case where that happened in the RCU section where rcu_irq_enter() did not work, and lockdep complained loudly about it. To fix it, stack tracing added a call to be disabled and RCU would disable stack tracing during the critical section that rcu_irq_enter() was inoperable. This solution worked, but there are other cases that use rcu_irq_enter() and it would be a good idea to let RCU give a way to let others know that rcu_irq_enter() will not work. For example, in trace events. Another helpful aspect of this change is that it also moves the per cpu variable called in the RCU critical section into a cache locale along with other RCU per cpu variables used in that same location. I'm keeping the stack_trace_disable() code, as that still could be used in the future by places that really need to disable it. And since it's only a static inline, it wont take up any kernel text if it is not used. Link: http://lkml.kernel.org/r/20170405093207.404f8deb@gandalf.local.home Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) --- include/linux/rcupdate.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index de88b33c0974..dea8f17b2fe3 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -97,6 +97,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename, unsigned long secs, unsigned long c_old, unsigned long c); +bool rcu_irq_enter_disabled(void); #else static inline void rcutorture_get_gp_data(enum rcutorture_type test_type, int *flags, @@ -113,6 +114,10 @@ static inline void rcutorture_record_test_transition(void) static inline void rcutorture_record_progress(unsigned long vernum) { } +static inline bool rcu_irq_enter_disabled(void) +{ + return false; +} #ifdef CONFIG_RCU_TRACE void do_trace_rcu_torture_read(const char *rcutorturename, struct rcu_head *rhp, -- cgit v1.2.3 From d54b6eeb553c89ed8d4c5a2ed73df374a45b9562 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 7 Apr 2017 12:40:49 -0400 Subject: tracing: Make sure rcu_irq_enter() can work for trace_*_rcuidle() trace events Stack tracing discovered that there's a small location inside the RCU infrastructure where calling rcu_irq_enter() does not work. As trace events use rcu_irq_enter() it must make sure that it is functionable. A check against rcu_irq_enter_disabled() is added with a WARN_ON_ONCE() as no trace event should ever be used in that part of RCU. If the warning is triggered, then the trace event is ignored. Restructure the __DO_TRACE() a bit to get rid of the prercu and postrcu, and just have an rcucheck that does the work from within the _DO_TRACE() macro. gcc optimization will compile out the rcucheck=0 case. Link: http://lkml.kernel.org/r/20170405093207.404f8deb@gandalf.local.home Acked-by: Mathieu Desnoyers Acked-by: Paul E. McKenney Signed-off-by: Steven Rostedt (VMware) --- include/linux/tracepoint.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index f72fcfe0e66a..cc48cb2ce209 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -128,7 +128,7 @@ extern void syscall_unregfunc(void); * as "(void *, void)". The DECLARE_TRACE_NOARGS() will pass in just * "void *data", where as the DECLARE_TRACE() will pass in "void *data, proto". */ -#define __DO_TRACE(tp, proto, args, cond, prercu, postrcu) \ +#define __DO_TRACE(tp, proto, args, cond, rcucheck) \ do { \ struct tracepoint_func *it_func_ptr; \ void *it_func; \ @@ -136,7 +136,11 @@ extern void syscall_unregfunc(void); \ if (!(cond)) \ return; \ - prercu; \ + if (rcucheck) { \ + if (WARN_ON_ONCE(rcu_irq_enter_disabled())) \ + return; \ + rcu_irq_enter_irqson(); \ + } \ rcu_read_lock_sched_notrace(); \ it_func_ptr = rcu_dereference_sched((tp)->funcs); \ if (it_func_ptr) { \ @@ -147,20 +151,19 @@ extern void syscall_unregfunc(void); } while ((++it_func_ptr)->func); \ } \ rcu_read_unlock_sched_notrace(); \ - postrcu; \ + if (rcucheck) \ + rcu_irq_exit_irqson(); \ } while (0) #ifndef MODULE -#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) \ +#define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) \ static inline void trace_##name##_rcuidle(proto) \ { \ if (static_key_false(&__tracepoint_##name.key)) \ __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ - TP_CONDITION(cond), \ - rcu_irq_enter_irqson(), \ - rcu_irq_exit_irqson()); \ + TP_CONDITION(cond), 1); \ } #else #define __DECLARE_TRACE_RCU(name, proto, args, cond, data_proto, data_args) @@ -186,7 +189,7 @@ extern void syscall_unregfunc(void); __DO_TRACE(&__tracepoint_##name, \ TP_PROTO(data_proto), \ TP_ARGS(data_args), \ - TP_CONDITION(cond),,); \ + TP_CONDITION(cond), 0); \ if (IS_ENABLED(CONFIG_LOCKDEP) && (cond)) { \ rcu_read_lock_sched_notrace(); \ rcu_dereference_sched(__tracepoint_##name.funcs);\ -- cgit v1.2.3 From 30e03acda5fd9c77ec9bf8b3c5def9540c6b0486 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Sun, 9 Apr 2017 07:36:14 +0600 Subject: cpuset: Remove cpuset_update_active_cpus()'s parameter. In cpuset_update_active_cpus(), cpu_online isn't used anymore. Remove it. Signed-off-by: Rakib Mullick Acked-by: Zefan Li Signed-off-by: Tejun Heo --- include/linux/cpuset.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 611fce58d670..119a3f9604b0 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -42,7 +42,7 @@ static inline void cpuset_dec(void) extern int cpuset_init(void); extern void cpuset_init_smp(void); -extern void cpuset_update_active_cpus(bool cpu_online); +extern void cpuset_update_active_cpus(void); extern void cpuset_cpus_allowed(struct task_struct *p, struct cpumask *mask); extern void cpuset_cpus_allowed_fallback(struct task_struct *p); extern nodemask_t cpuset_mems_allowed(struct task_struct *p); @@ -155,7 +155,7 @@ static inline bool cpusets_enabled(void) { return false; } static inline int cpuset_init(void) { return 0; } static inline void cpuset_init_smp(void) {} -static inline void cpuset_update_active_cpus(bool cpu_online) +static inline void cpuset_update_active_cpus(void) { partition_sched_domains(1, NULL, NULL); } -- cgit v1.2.3 From b8b1a2e5eca6bbf20e3a29c5f9db8331ec52af2d Mon Sep 17 00:00:00 2001 From: Todd Poynor Date: Thu, 6 Apr 2017 18:47:57 -0700 Subject: cgroup: move cgroup_subsys_state parent field for cache locality Various structures embed a struct cgroup_subsys_state, typically at the top of the containing structure. It is common for code that accesses the structures to perform operations that iterate over the chain of parent css pointers, also accessing data in each containing structure. In particular, struct cpuacct is used by fairly hot code paths in the scheduler such as cpuacct_charge(). Move the parent css pointer field to the end of the structure to increase the chances of residing in the same cache line as the data from the containing structure. Signed-off-by: Todd Poynor Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index c74b78ecd583..21745946cae1 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -107,9 +107,6 @@ struct cgroup_subsys_state { /* reference count - access via css_[try]get() and css_put() */ struct percpu_ref refcnt; - /* PI: the parent css */ - struct cgroup_subsys_state *parent; - /* siblings list anchored at the parent's ->children */ struct list_head sibling; struct list_head children; @@ -139,6 +136,12 @@ struct cgroup_subsys_state { /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; + + /* + * PI: the parent css. Placed here for cache proximity to following + * fields of the containing structure. + */ + struct cgroup_subsys_state *parent; }; /* -- cgit v1.2.3 From 717a94b5fc7092afebe9c93791f29b2d8e5d297a Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Fri, 7 Apr 2017 10:03:26 +1000 Subject: sched/core: Remove 'task' parameter and rename tsk_restore_flags() to current_restore_flags() It is not safe for one thread to modify the ->flags of another thread as there is no locking that can protect the update. So tsk_restore_flags(), which takes a task pointer and modifies the flags, is an invitation to do the wrong thing. All current users pass "current" as the task, so no developers have accepted that invitation. It would be best to ensure it remains that way. So rename tsk_restore_flags() to current_restore_flags() and don't pass in a task_struct pointer. Always operate on current->flags. Signed-off-by: NeilBrown Cc: Linus Torvalds Cc: Mel Gorman Cc: Michal Hocko Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d67eee84fd43..0978fb74e45a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1286,10 +1286,10 @@ TASK_PFA_TEST(LMK_WAITING, lmk_waiting) TASK_PFA_SET(LMK_WAITING, lmk_waiting) static inline void -tsk_restore_flags(struct task_struct *task, unsigned long orig_flags, unsigned long flags) +current_restore_flags(unsigned long orig_flags, unsigned long flags) { - task->flags &= ~flags; - task->flags |= orig_flags & flags; + current->flags &= ~flags; + current->flags |= orig_flags & flags; } extern int cpuset_cpumask_can_shrink(const struct cpumask *cur, const struct cpumask *trial); -- cgit v1.2.3 From e92634cd4d37d5a2ea2fb0d55d25d50bbba1e8e0 Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Thu, 30 Mar 2017 09:46:41 +0300 Subject: usb: otg-fsm: Prevent build warning "VDBG" redefined If usb/otg-fsm.h and usb/composite.h are included together then it results in the build warning [1]. Prevent that by defining VDBG locally. Also get rid of MPC_LOC which doesn't seem to be used by anyone. [1] - warning fixed by this patch: In file included from drivers/usb/dwc3/core.h:33, from drivers/usb/dwc3/ep0.c:33: include/linux/usb/otg-fsm.h:30:1: warning: "VDBG" redefined In file included from drivers/usb/dwc3/ep0.c:31: include/linux/usb/composite.h:615:1: warning: this is the location of the previous definition Signed-off-by: Roger Quadros Reviewed-by: Jun Li Acked-by: Peter Chen Signed-off-by: Felipe Balbi --- include/linux/usb/otg-fsm.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index 7a0350535cb1..a0a8f878503c 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -21,21 +21,6 @@ #include #include -#undef VERBOSE - -#ifdef VERBOSE -#define VDBG(fmt, args...) pr_debug("[%s] " fmt , \ - __func__, ## args) -#else -#define VDBG(stuff...) do {} while (0) -#endif - -#ifdef VERBOSE -#define MPC_LOC printk("Current Location [%s]:[%d]\n", __FILE__, __LINE__) -#else -#define MPC_LOC do {} while (0) -#endif - #define PROTO_UNDEF (0) #define PROTO_HOST (1) #define PROTO_GADGET (2) -- cgit v1.2.3 From 8c58f1a7a4b6d1d723bf25fef9d842d5a11200d0 Mon Sep 17 00:00:00 2001 From: Jacopo Mondi Date: Wed, 5 Apr 2017 16:07:19 +0200 Subject: pinctrl: generic: Add bi-directional and output-enable Add bi-directional and output-enable pin configuration properties. bi-directional allows to specify when a pin shall operate in input and output mode at the same time. This is particularly useful in platforms where input and output buffers have to be manually enabled. output-enable is just syntactic sugar to specify that a pin shall operate in output mode, ignoring the provided argument. This pairs with input-enable pin configuration option. Signed-off-by: Jacopo Mondi Acked-by: Rob Herring Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 7620eb127cff..279e3c5326e3 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -42,6 +42,8 @@ * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high * impedance to VDD). If the argument is != 0 pull-up is enabled, * if it is 0, pull-up is total, i.e. the pin is connected to VDD. + * @PIN_CONFIG_BIDIRECTIONAL: the pin will be configured to allow simultaneous + * input and output operations. * @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open * collector) which means it is usually wired with other output ports * which are then pulled up with an external resistor. Setting this @@ -96,6 +98,7 @@ enum pin_config_param { PIN_CONFIG_BIAS_PULL_DOWN, PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, PIN_CONFIG_BIAS_PULL_UP, + PIN_CONFIG_BIDIRECTIONAL, PIN_CONFIG_DRIVE_OPEN_DRAIN, PIN_CONFIG_DRIVE_OPEN_SOURCE, PIN_CONFIG_DRIVE_PUSH_PULL, -- cgit v1.2.3 From ab781ec0e5e781849bd14291608c8626bac871e1 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Wed, 29 Mar 2017 14:18:20 +0200 Subject: mfd: cpcap: Implement IRQ sense helper CPCAP can sense if IRQ is currently set or not. This functionality is required for a few subdevices, such as the power button and usb phy modules. Signed-off-by: Sebastian Reichel Acked-by: Tony Lindgren Signed-off-by: Lee Jones --- include/linux/mfd/motorola-cpcap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/motorola-cpcap.h b/include/linux/mfd/motorola-cpcap.h index b4031c2b2214..793aa695faa0 100644 --- a/include/linux/mfd/motorola-cpcap.h +++ b/include/linux/mfd/motorola-cpcap.h @@ -290,3 +290,5 @@ static inline int cpcap_get_vendor(struct device *dev, return 0; } + +extern int cpcap_sense_virq(struct regmap *regmap, int virq); -- cgit v1.2.3 From 4244de1c64ded7f5438717bdce3fa074efd20efb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 11 Apr 2017 13:01:24 +0200 Subject: PCI: remove pci_enable_msix Unused now that all callers switched to pci_alloc_irq_vectors. Signed-off-by: Christoph Hellwig Acked-by: Bjorn Helgaas Signed-off-by: David S. Miller --- include/linux/pci.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eb3da1a04e6c..82dec36845e6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1300,7 +1300,6 @@ int pci_msi_vec_count(struct pci_dev *dev); void pci_msi_shutdown(struct pci_dev *dev); void pci_disable_msi(struct pci_dev *dev); int pci_msix_vec_count(struct pci_dev *dev); -int pci_enable_msix(struct pci_dev *dev, struct msix_entry *entries, int nvec); void pci_msix_shutdown(struct pci_dev *dev); void pci_disable_msix(struct pci_dev *dev); void pci_restore_msi_state(struct pci_dev *dev); @@ -1330,9 +1329,6 @@ static inline int pci_msi_vec_count(struct pci_dev *dev) { return -ENOSYS; } static inline void pci_msi_shutdown(struct pci_dev *dev) { } static inline void pci_disable_msi(struct pci_dev *dev) { } static inline int pci_msix_vec_count(struct pci_dev *dev) { return -ENOSYS; } -static inline int pci_enable_msix(struct pci_dev *dev, - struct msix_entry *entries, int nvec) -{ return -ENOSYS; } static inline void pci_msix_shutdown(struct pci_dev *dev) { } static inline void pci_disable_msix(struct pci_dev *dev) { } static inline void pci_restore_msi_state(struct pci_dev *dev) { } -- cgit v1.2.3 From 083c52144a19c69b7956aa53c913ba621f7c5ae2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 11 Apr 2017 09:39:45 +0100 Subject: drivers/perf: arm_pmu: define armpmu_init_fn We expect an ARM PMU's init function to have a particular prototype, which we open-code in a few places. This is less than ideal, considering that we cast a void value to this type in one location, and a mismatch could easily be missed. Add a typedef so that we can ensure this is consistent. Signed-off-by: Mark Rutland Tested-by: Jeremy Linton Cc: Will Deacon Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 44f43fcf2524..4249914315a4 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -132,10 +132,12 @@ int armpmu_map_event(struct perf_event *event, [PERF_COUNT_HW_CACHE_RESULT_MAX], u32 raw_event_mask); +typedef int (*armpmu_init_fn)(struct arm_pmu *); + struct pmu_probe_info { unsigned int cpuid; unsigned int mask; - int (*init)(struct arm_pmu *); + armpmu_init_fn init; }; #define PMU_PROBE(_cpuid, _mask, _fn) \ -- cgit v1.2.3 From 18bfcfe51b8f60b69ab012888dea8061a9cd3381 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 11 Apr 2017 09:39:53 +0100 Subject: drivers/perf: arm_pmu: split out platform device probe logic Now that we've split the pdev and DT probing logic from the runtime management, let's move the former into its own file. We gain a few lines due to the copyright header and includes, but this should keep the logic clearly separated, and paves the way for adding ACPI support in a similar fashion. Signed-off-by: Mark Rutland Tested-by: Jeremy Linton [will: rename nr_irqs to avoid conflict with global variable] Signed-off-by: Will Deacon --- include/linux/perf/arm_pmu.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 4249914315a4..25556ebb1c7b 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -159,6 +159,13 @@ int arm_pmu_device_probe(struct platform_device *pdev, const struct of_device_id *of_table, const struct pmu_probe_info *probe_table); +/* Internal functions only for core arm_pmu code */ +struct arm_pmu *armpmu_alloc(void); +void armpmu_free(struct arm_pmu *pmu); +int armpmu_register(struct arm_pmu *pmu); +int armpmu_request_irqs(struct arm_pmu *armpmu); +void armpmu_free_irqs(struct arm_pmu *armpmu); + #define ARMV8_PMU_PDEV_NAME "armv8-pmu" #endif /* CONFIG_ARM_PMU */ -- cgit v1.2.3 From 45736a72fb79b204c1fbdb08a1e1a2aa52c7281a Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Tue, 11 Apr 2017 09:39:55 +0100 Subject: drivers/perf: arm_pmu: add ACPI framework This patch adds framework code to handle parsing PMU data out of the MADT, sanity checking this, and managing the association of CPUs (and their interrupts) with appropriate logical PMUs. For the time being, we expect that only one PMU driver (PMUv3) will make use of this, and we simply pass in a single probe function. This is based on an earlier patch from Jeremy Linton. Signed-off-by: Mark Rutland Tested-by: Jeremy Linton Cc: Will Deacon Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + include/linux/perf/arm_pmu.h | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index cfcfab37d9c4..0f2a80377520 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -94,6 +94,7 @@ enum cpuhp_state { CPUHP_AP_ARM_VFP_STARTING, CPUHP_AP_ARM64_DEBUG_MONITORS_STARTING, CPUHP_AP_PERF_ARM_HW_BREAKPOINT_STARTING, + CPUHP_AP_PERF_ARM_ACPI_STARTING, CPUHP_AP_PERF_ARM_STARTING, CPUHP_AP_ARM_L2X0_STARTING, CPUHP_AP_ARM_ARCH_TIMER_STARTING, diff --git a/include/linux/perf/arm_pmu.h b/include/linux/perf/arm_pmu.h index 25556ebb1c7b..1360dd6d5e61 100644 --- a/include/linux/perf/arm_pmu.h +++ b/include/linux/perf/arm_pmu.h @@ -117,6 +117,9 @@ struct arm_pmu { struct notifier_block cpu_pm_nb; /* the attr_groups array must be NULL-terminated */ const struct attribute_group *attr_groups[ARMPMU_NR_ATTR_GROUPS + 1]; + + /* Only to be used by ACPI probing code */ + unsigned long acpi_cpuid; }; #define to_arm_pmu(p) (container_of(p, struct arm_pmu, pmu)) @@ -159,12 +162,20 @@ int arm_pmu_device_probe(struct platform_device *pdev, const struct of_device_id *of_table, const struct pmu_probe_info *probe_table); +#ifdef CONFIG_ACPI +int arm_pmu_acpi_probe(armpmu_init_fn init_fn); +#else +static inline int arm_pmu_acpi_probe(armpmu_init_fn init_fn) { return 0; } +#endif + /* Internal functions only for core arm_pmu code */ struct arm_pmu *armpmu_alloc(void); void armpmu_free(struct arm_pmu *pmu); int armpmu_register(struct arm_pmu *pmu); int armpmu_request_irqs(struct arm_pmu *armpmu); void armpmu_free_irqs(struct arm_pmu *armpmu); +int armpmu_request_irq(struct arm_pmu *armpmu, int cpu); +void armpmu_free_irq(struct arm_pmu *armpmu, int cpu); #define ARMV8_PMU_PDEV_NAME "armv8-pmu" -- cgit v1.2.3 From 50512625da06c41517cb596f51b923ce15f401a4 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Wed, 5 Apr 2017 14:05:50 +1000 Subject: Revert "block: introduce bio_copy_data_partial" This reverts commit 6f8802852f7e58a12177a86179803b9efaad98e2. bio_copy_data_partial() is no longer needed. Signed-off-by: NeilBrown Signed-off-by: Shaohua Li --- include/linux/bio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index fafef6343d1b..7cf8a6c70a3f 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -461,8 +461,6 @@ static inline void bio_flush_dcache_pages(struct bio *bi) #endif extern void bio_copy_data(struct bio *dst, struct bio *src); -extern void bio_copy_data_partial(struct bio *dst, struct bio *src, - int offset, int size); extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); extern void bio_free_pages(struct bio *bio); -- cgit v1.2.3 From be9370a7d8614d1fa54649c75de14458e79b91ec Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 Apr 2017 15:34:57 +0200 Subject: bpf: remove struct bpf_prog_type_list There's no need to have struct bpf_prog_type_list since it just contains a list_head, the type, and the ops pointer. Since the types are densely packed and not actually dynamically registered, it's much easier and smaller to have an array of type->ops pointer. Also initialize this array statically to remove code needed to initialize it. In order to save duplicating the list, move it to a new header file and include it in the places needing it. Signed-off-by: Johannes Berg Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 16 +++++----------- include/linux/bpf_types.h | 18 ++++++++++++++++++ 2 files changed, 23 insertions(+), 11 deletions(-) create mode 100644 include/linux/bpf_types.h (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index bbb513da5075..07fc02bb38e4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -173,12 +173,6 @@ struct bpf_verifier_ops { union bpf_attr __user *uattr); }; -struct bpf_prog_type_list { - struct list_head list_node; - const struct bpf_verifier_ops *ops; - enum bpf_prog_type type; -}; - struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; @@ -243,7 +237,11 @@ int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); -void bpf_register_prog_type(struct bpf_prog_type_list *tl); +#define BPF_PROG_TYPE(_id, _ops) \ + extern const struct bpf_verifier_ops _ops; +#include +#undef BPF_PROG_TYPE + void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); @@ -306,10 +304,6 @@ static inline void bpf_long_memcpy(void *dst, const void *src, u32 size) /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else -static inline void bpf_register_prog_type(struct bpf_prog_type_list *tl) -{ -} - static inline struct bpf_prog *bpf_prog_get(u32 ufd) { return ERR_PTR(-EOPNOTSUPP); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h new file mode 100644 index 000000000000..68b0a9811216 --- /dev/null +++ b/include/linux/bpf_types.h @@ -0,0 +1,18 @@ +/* internal file - do not include directly */ + +#ifdef CONFIG_NET +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) +#endif +#ifdef CONFIG_BPF_EVENTS +BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event_prog_ops) +#endif -- cgit v1.2.3 From 40077e0cf62206ac3c315b6991d8dcddb3703286 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Tue, 11 Apr 2017 15:34:58 +0200 Subject: bpf: remove struct bpf_map_type_list There's no need to have struct bpf_map_type_list since it just contains a list_head, the type, and the ops pointer. Since the types are densely packed and not actually dynamically registered, it's much easier and smaller to have an array of type->ops pointer. Also initialize this array statically to remove code needed to initialize it. In order to save duplicating the list, move it to the types header file added by the previous patch and include it in the same fashion. Signed-off-by: Johannes Berg Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 11 +++-------- include/linux/bpf_types.h | 18 ++++++++++++++++++ 2 files changed, 21 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 07fc02bb38e4..6bb38d76faf4 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -53,12 +53,6 @@ struct bpf_map { struct bpf_map *inner_map_meta; }; -struct bpf_map_type_list { - struct list_head list_node; - const struct bpf_map_ops *ops; - enum bpf_map_type type; -}; - /* function argument constraints */ enum bpf_arg_type { ARG_DONTCARE = 0, /* unused argument in helper function */ @@ -239,10 +233,11 @@ DECLARE_PER_CPU(int, bpf_prog_active); #define BPF_PROG_TYPE(_id, _ops) \ extern const struct bpf_verifier_ops _ops; +#define BPF_MAP_TYPE(_id, _ops) \ + extern const struct bpf_map_ops _ops; #include #undef BPF_PROG_TYPE - -void bpf_register_map_type(struct bpf_map_type_list *tl); +#undef BPF_MAP_TYPE struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 68b0a9811216..03bf223f18be 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -16,3 +16,21 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint_prog_ops) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event_prog_ops) #endif + +BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PROG_ARRAY, prog_array_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERF_EVENT_ARRAY, perf_event_array_map_ops) +#ifdef CONFIG_CGROUPS +BPF_MAP_TYPE(BPF_MAP_TYPE_CGROUP_ARRAY, cgroup_array_map_ops) +#endif +BPF_MAP_TYPE(BPF_MAP_TYPE_HASH, htab_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_HASH, htab_percpu_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_HASH, htab_lru_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_LRU_PERCPU_HASH, htab_lru_percpu_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_LPM_TRIE, trie_map_ops) +#ifdef CONFIG_PERF_EVENTS +BPF_MAP_TYPE(BPF_MAP_TYPE_STACK_TRACE, stack_map_ops) +#endif +BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY_OF_MAPS, array_of_maps_map_ops) +BPF_MAP_TYPE(BPF_MAP_TYPE_HASH_OF_MAPS, htab_of_maps_map_ops) -- cgit v1.2.3 From 5e8cb4033807e39849b753e5399ec130c0995f1f Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 10 Apr 2017 19:25:10 +0530 Subject: PCI: endpoint: Add EP core layer to enable EP controller and EP functions Introduce a new EP core layer in order to support endpoint functions in linux kernel. This comprises the EPC library (Endpoint Controller Library) and EPF library (Endpoint Function Library). EPC library implements functions specific to an endpoint controller and EPF library implements functions specific to an endpoint function. Signed-off-by: Kishon Vijay Abraham I Acked-by: Joao Pinto Signed-off-by: Bjorn Helgaas --- include/linux/mod_devicetable.h | 10 +++ include/linux/pci-epc.h | 142 +++++++++++++++++++++++++++++++++++ include/linux/pci-epf.h | 160 ++++++++++++++++++++++++++++++++++++++++ 3 files changed, 312 insertions(+) create mode 100644 include/linux/pci-epc.h create mode 100644 include/linux/pci-epf.h (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 8850fcaf50db..566fda587fcf 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -428,6 +428,16 @@ struct i2c_device_id { kernel_ulong_t driver_data; /* Data private to the driver */ }; +/* pci_epf */ + +#define PCI_EPF_NAME_SIZE 20 +#define PCI_EPF_MODULE_PREFIX "pci_epf:" + +struct pci_epf_device_id { + char name[PCI_EPF_NAME_SIZE]; + kernel_ulong_t driver_data; +}; + /* spi */ #define SPI_NAME_SIZE 32 diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h new file mode 100644 index 000000000000..8c63d3c37f76 --- /dev/null +++ b/include/linux/pci-epc.h @@ -0,0 +1,142 @@ +/** + * PCI Endpoint *Controller* (EPC) header file + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + */ + +#ifndef __LINUX_PCI_EPC_H +#define __LINUX_PCI_EPC_H + +#include + +struct pci_epc; + +enum pci_epc_irq_type { + PCI_EPC_IRQ_UNKNOWN, + PCI_EPC_IRQ_LEGACY, + PCI_EPC_IRQ_MSI, +}; + +/** + * struct pci_epc_ops - set of function pointers for performing EPC operations + * @write_header: ops to populate configuration space header + * @set_bar: ops to configure the BAR + * @clear_bar: ops to reset the BAR + * @map_addr: ops to map CPU address to PCI address + * @unmap_addr: ops to unmap CPU address and PCI address + * @set_msi: ops to set the requested number of MSI interrupts in the MSI + * capability register + * @get_msi: ops to get the number of MSI interrupts allocated by the RC from + * the MSI capability register + * @raise_irq: ops to raise a legacy or MSI interrupt + * @start: ops to start the PCI link + * @stop: ops to stop the PCI link + * @owner: the module owner containing the ops + */ +struct pci_epc_ops { + int (*write_header)(struct pci_epc *pci_epc, + struct pci_epf_header *hdr); + int (*set_bar)(struct pci_epc *epc, enum pci_barno bar, + dma_addr_t bar_phys, size_t size, int flags); + void (*clear_bar)(struct pci_epc *epc, enum pci_barno bar); + int (*map_addr)(struct pci_epc *epc, phys_addr_t addr, + u64 pci_addr, size_t size); + void (*unmap_addr)(struct pci_epc *epc, phys_addr_t addr); + int (*set_msi)(struct pci_epc *epc, u8 interrupts); + int (*get_msi)(struct pci_epc *epc); + int (*raise_irq)(struct pci_epc *pci_epc, + enum pci_epc_irq_type type, u8 interrupt_num); + int (*start)(struct pci_epc *epc); + void (*stop)(struct pci_epc *epc); + struct module *owner; +}; + +/** + * struct pci_epc_mem - address space of the endpoint controller + * @phys_base: physical base address of the PCI address space + * @size: the size of the PCI address space + * @bitmap: bitmap to manage the PCI address space + * @pages: number of bits representing the address region + */ +struct pci_epc_mem { + phys_addr_t phys_base; + size_t size; + unsigned long *bitmap; + int pages; +}; + +/** + * struct pci_epc - represents the PCI EPC device + * @dev: PCI EPC device + * @pci_epf: list of endpoint functions present in this EPC device + * @ops: function pointers for performing endpoint operations + * @mem: address space of the endpoint controller + * @max_functions: max number of functions that can be configured in this EPC + * @lock: spinlock to protect pci_epc ops + */ +struct pci_epc { + struct device dev; + struct list_head pci_epf; + const struct pci_epc_ops *ops; + struct pci_epc_mem *mem; + u8 max_functions; + /* spinlock to protect against concurrent access of EP controller */ + spinlock_t lock; +}; + +#define to_pci_epc(device) container_of((device), struct pci_epc, dev) + +#define pci_epc_create(dev, ops) \ + __pci_epc_create((dev), (ops), THIS_MODULE) +#define devm_pci_epc_create(dev, ops) \ + __devm_pci_epc_create((dev), (ops), THIS_MODULE) + +static inline void epc_set_drvdata(struct pci_epc *epc, void *data) +{ + dev_set_drvdata(&epc->dev, data); +} + +static inline void *epc_get_drvdata(struct pci_epc *epc) +{ + return dev_get_drvdata(&epc->dev); +} + +struct pci_epc * +__devm_pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, + struct module *owner); +struct pci_epc * +__pci_epc_create(struct device *dev, const struct pci_epc_ops *ops, + struct module *owner); +void devm_pci_epc_destroy(struct device *dev, struct pci_epc *epc); +void pci_epc_destroy(struct pci_epc *epc); +int pci_epc_add_epf(struct pci_epc *epc, struct pci_epf *epf); +void pci_epc_linkup(struct pci_epc *epc); +void pci_epc_remove_epf(struct pci_epc *epc, struct pci_epf *epf); +int pci_epc_write_header(struct pci_epc *epc, struct pci_epf_header *hdr); +int pci_epc_set_bar(struct pci_epc *epc, enum pci_barno bar, + dma_addr_t bar_phys, size_t size, int flags); +void pci_epc_clear_bar(struct pci_epc *epc, int bar); +int pci_epc_map_addr(struct pci_epc *epc, phys_addr_t phys_addr, + u64 pci_addr, size_t size); +void pci_epc_unmap_addr(struct pci_epc *epc, phys_addr_t phys_addr); +int pci_epc_set_msi(struct pci_epc *epc, u8 interrupts); +int pci_epc_get_msi(struct pci_epc *epc); +int pci_epc_raise_irq(struct pci_epc *epc, enum pci_epc_irq_type type, + u8 interrupt_num); +int pci_epc_start(struct pci_epc *epc); +void pci_epc_stop(struct pci_epc *epc); +struct pci_epc *pci_epc_get(const char *epc_name); +void pci_epc_put(struct pci_epc *epc); + +int pci_epc_mem_init(struct pci_epc *epc, phys_addr_t phys_addr, size_t size); +void pci_epc_mem_exit(struct pci_epc *epc); +void __iomem *pci_epc_mem_alloc_addr(struct pci_epc *epc, + phys_addr_t *phys_addr, size_t size); +void pci_epc_mem_free_addr(struct pci_epc *epc, phys_addr_t phys_addr, + void __iomem *virt_addr, size_t size); +#endif /* __LINUX_PCI_EPC_H */ diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h new file mode 100644 index 000000000000..5628714f7bcf --- /dev/null +++ b/include/linux/pci-epf.h @@ -0,0 +1,160 @@ +/** + * PCI Endpoint *Function* (EPF) header file + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + */ + +#ifndef __LINUX_PCI_EPF_H +#define __LINUX_PCI_EPF_H + +#include +#include + +struct pci_epf; + +enum pci_interrupt_pin { + PCI_INTERRUPT_UNKNOWN, + PCI_INTERRUPT_INTA, + PCI_INTERRUPT_INTB, + PCI_INTERRUPT_INTC, + PCI_INTERRUPT_INTD, +}; + +enum pci_barno { + BAR_0, + BAR_1, + BAR_2, + BAR_3, + BAR_4, + BAR_5, +}; + +/** + * struct pci_epf_header - represents standard configuration header + * @vendorid: identifies device manufacturer + * @deviceid: identifies a particular device + * @revid: specifies a device-specific revision identifier + * @progif_code: identifies a specific register-level programming interface + * @subclass_code: identifies more specifically the function of the device + * @baseclass_code: broadly classifies the type of function the device performs + * @cache_line_size: specifies the system cacheline size in units of DWORDs + * @subsys_vendor_id: vendor of the add-in card or subsystem + * @subsys_id: id specific to vendor + * @interrupt_pin: interrupt pin the device (or device function) uses + */ +struct pci_epf_header { + u16 vendorid; + u16 deviceid; + u8 revid; + u8 progif_code; + u8 subclass_code; + u8 baseclass_code; + u8 cache_line_size; + u16 subsys_vendor_id; + u16 subsys_id; + enum pci_interrupt_pin interrupt_pin; +}; + +/** + * struct pci_epf_ops - set of function pointers for performing EPF operations + * @bind: ops to perform when a EPC device has been bound to EPF device + * @unbind: ops to perform when a binding has been lost between a EPC device + * and EPF device + * @linkup: ops to perform when the EPC device has established a connection with + * a host system + */ +struct pci_epf_ops { + int (*bind)(struct pci_epf *epf); + void (*unbind)(struct pci_epf *epf); + void (*linkup)(struct pci_epf *epf); +}; + +/** + * struct pci_epf_driver - represents the PCI EPF driver + * @probe: ops to perform when a new EPF device has been bound to the EPF driver + * @remove: ops to perform when the binding between the EPF device and EPF + * driver is broken + * @driver: PCI EPF driver + * @ops: set of function pointers for performing EPF operations + * @owner: the owner of the module that registers the PCI EPF driver + * @id_table: identifies EPF devices for probing + */ +struct pci_epf_driver { + int (*probe)(struct pci_epf *epf); + int (*remove)(struct pci_epf *epf); + + struct device_driver driver; + struct pci_epf_ops *ops; + struct module *owner; + const struct pci_epf_device_id *id_table; +}; + +#define to_pci_epf_driver(drv) (container_of((drv), struct pci_epf_driver, \ + driver)) + +/** + * struct pci_epf_bar - represents the BAR of EPF device + * @phys_addr: physical address that should be mapped to the BAR + * @size: the size of the address space present in BAR + */ +struct pci_epf_bar { + dma_addr_t phys_addr; + size_t size; +}; + +/** + * struct pci_epf - represents the PCI EPF device + * @dev: the PCI EPF device + * @name: the name of the PCI EPF device + * @header: represents standard configuration header + * @bar: represents the BAR of EPF device + * @msi_interrupts: number of MSI interrupts required by this function + * @func_no: unique function number within this endpoint device + * @epc: the EPC device to which this EPF device is bound + * @driver: the EPF driver to which this EPF device is bound + * @list: to add pci_epf as a list of PCI endpoint functions to pci_epc + */ +struct pci_epf { + struct device dev; + const char *name; + struct pci_epf_header *header; + struct pci_epf_bar bar[6]; + u8 msi_interrupts; + u8 func_no; + + struct pci_epc *epc; + struct pci_epf_driver *driver; + struct list_head list; +}; + +#define to_pci_epf(epf_dev) container_of((epf_dev), struct pci_epf, dev) + +#define pci_epf_register_driver(driver) \ + __pci_epf_register_driver((driver), THIS_MODULE) + +static inline void epf_set_drvdata(struct pci_epf *epf, void *data) +{ + dev_set_drvdata(&epf->dev, data); +} + +static inline void *epf_get_drvdata(struct pci_epf *epf) +{ + return dev_get_drvdata(&epf->dev); +} + +struct pci_epf *pci_epf_create(const char *name); +void pci_epf_destroy(struct pci_epf *epf); +int __pci_epf_register_driver(struct pci_epf_driver *driver, + struct module *owner); +void pci_epf_unregister_driver(struct pci_epf_driver *driver); +void *pci_epf_alloc_space(struct pci_epf *epf, size_t size, enum pci_barno bar); +void pci_epf_free_space(struct pci_epf *epf, void *addr, enum pci_barno bar); +int pci_epf_bind(struct pci_epf *epf); +void pci_epf_unbind(struct pci_epf *epf); +void pci_epf_linkup(struct pci_epf *epf); +#endif /* __LINUX_PCI_EPF_H */ -- cgit v1.2.3 From d746799116103d857be203382b09035bbe225d03 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 27 Mar 2017 15:14:59 +0530 Subject: PCI: endpoint: Introduce configfs entry for configuring EP functions Introduce a new configfs entry to configure the EP function (like configuring the standard configuration header entries) and to bind the EP function with EP controller. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Bjorn Helgaas --- include/linux/pci-ep-cfs.h | 41 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 41 insertions(+) create mode 100644 include/linux/pci-ep-cfs.h (limited to 'include/linux') diff --git a/include/linux/pci-ep-cfs.h b/include/linux/pci-ep-cfs.h new file mode 100644 index 000000000000..263b89ea5705 --- /dev/null +++ b/include/linux/pci-ep-cfs.h @@ -0,0 +1,41 @@ +/** + * PCI Endpoint ConfigFS header file + * + * Copyright (C) 2017 Texas Instruments + * Author: Kishon Vijay Abraham I + * + * This program is free software: you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 of + * the License as published by the Free Software Foundation. + */ + +#ifndef __LINUX_PCI_EP_CFS_H +#define __LINUX_PCI_EP_CFS_H + +#include + +#ifdef CONFIG_PCI_ENDPOINT_CONFIGFS +struct config_group *pci_ep_cfs_add_epc_group(const char *name); +void pci_ep_cfs_remove_epc_group(struct config_group *group); +struct config_group *pci_ep_cfs_add_epf_group(const char *name); +void pci_ep_cfs_remove_epf_group(struct config_group *group); +#else +static inline struct config_group *pci_ep_cfs_add_epc_group(const char *name) +{ + return 0; +} + +static inline void pci_ep_cfs_remove_epc_group(struct config_group *group) +{ +} + +static inline struct config_group *pci_ep_cfs_add_epf_group(const char *name) +{ + return 0; +} + +static inline void pci_ep_cfs_remove_epf_group(struct config_group *group) +{ +} +#endif +#endif /* __LINUX_PCI_EP_CFS_H */ -- cgit v1.2.3 From 3a401a2ce1cb6f6e52b78f21aa82e5d90e35c430 Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 27 Mar 2017 15:15:01 +0530 Subject: PCI: endpoint: Create configfs entry for EPC device and EPF driver Invoke APIs provided by pci-ep-cfs to create configfs entry for every EPC device and EPF driver to help users in creating EPF device and binding the EPF device to the EPC device. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Bjorn Helgaas --- include/linux/pci-epc.h | 2 ++ include/linux/pci-epf.h | 2 ++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-epc.h b/include/linux/pci-epc.h index 8c63d3c37f76..af5edbf3eea3 100644 --- a/include/linux/pci-epc.h +++ b/include/linux/pci-epc.h @@ -77,6 +77,7 @@ struct pci_epc_mem { * @ops: function pointers for performing endpoint operations * @mem: address space of the endpoint controller * @max_functions: max number of functions that can be configured in this EPC + * @group: configfs group representing the PCI EPC device * @lock: spinlock to protect pci_epc ops */ struct pci_epc { @@ -85,6 +86,7 @@ struct pci_epc { const struct pci_epc_ops *ops; struct pci_epc_mem *mem; u8 max_functions; + struct config_group *group; /* spinlock to protect against concurrent access of EP controller */ spinlock_t lock; }; diff --git a/include/linux/pci-epf.h b/include/linux/pci-epf.h index 5628714f7bcf..0d529cb90143 100644 --- a/include/linux/pci-epf.h +++ b/include/linux/pci-epf.h @@ -82,6 +82,7 @@ struct pci_epf_ops { * @driver: PCI EPF driver * @ops: set of function pointers for performing EPF operations * @owner: the owner of the module that registers the PCI EPF driver + * @group: configfs group corresponding to the PCI EPF driver * @id_table: identifies EPF devices for probing */ struct pci_epf_driver { @@ -91,6 +92,7 @@ struct pci_epf_driver { struct device_driver driver; struct pci_epf_ops *ops; struct module *owner; + struct config_group *group; const struct pci_epf_device_id *id_table; }; -- cgit v1.2.3 From e0c34e900611a58c93adf463d096c5843eb1967a Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 5 Apr 2017 10:23:12 -0300 Subject: usb: get rid of some ReST doc build errors We need an space before a numbered list to avoid those warnings: ./drivers/usb/core/message.c:478: ERROR: Unexpected indentation. ./drivers/usb/core/message.c:479: WARNING: Block quote ends without a blank line; unexpected unindent. ./include/linux/usb/composite.h:455: ERROR: Unexpected indentation. ./include/linux/usb/composite.h:456: WARNING: Block quote ends without a blank line; unexpected unindent. Signed-off-by: Mauro Carvalho Chehab Acked-by: Greg Kroah-Hartman Signed-off-by: Jonathan Corbet --- include/linux/usb/composite.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 4616a49a1c2e..30a063e98c19 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -451,6 +451,7 @@ static inline struct usb_composite_driver *to_cdriver( * sure doing that won't hurt too much. * * One notion for how to handle Wireless USB devices involves: + * * (a) a second gadget here, discovery mechanism TBD, but likely * needing separate "register/unregister WUSB gadget" calls; * (b) updates to usb_gadget to include flags "is it wireless", -- cgit v1.2.3 From 3e208a00cffd1358cecf79074e50b093212043c1 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 5 Apr 2017 10:23:13 -0300 Subject: usb: composite.h: fix two warnings when building docs By definition, we use /* private: */ tag when we won't be documenting a parameter. However, those two parameters are documented: ./include/linux/usb/composite.h:510: warning: Excess struct/union/enum/typedef member 'setup_pending' description in 'usb_composite_dev' ./include/linux/usb/composite.h:510: warning: Excess struct/union/enum/typedef member 'os_desc_pending' description in 'usb_composite_dev' So, we need to use /* public: */ to avoid a warning. Signed-off-by: Mauro Carvalho Chehab Acked-by: Greg Kroah-Hartman Signed-off-by: Jonathan Corbet --- include/linux/usb/composite.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index 30a063e98c19..f665d2ceac20 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -504,8 +504,9 @@ struct usb_composite_dev { /* protects deactivations and delayed_status counts*/ spinlock_t lock; - unsigned setup_pending:1; - unsigned os_desc_pending:1; + /* public: */ + unsigned int setup_pending:1; + unsigned int os_desc_pending:1; }; extern int usb_string_id(struct usb_composite_dev *c); -- cgit v1.2.3 From 0cb300623e3bb460fd9853bbde2fd1973e3bbcd8 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Wed, 5 Apr 2017 10:23:14 -0300 Subject: usb: gadget.h: be consistent at kernel doc macros There's one value that use spaces instead of tabs to ident. That causes the following warning: ./include/linux/usb/gadget.h:193: ERROR: Unexpected indentation. Signed-off-by: Mauro Carvalho Chehab Acked-by: Greg Kroah-Hartman Signed-off-by: Jonathan Corbet --- include/linux/usb/gadget.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index e4516e9ded0f..fbc22a39e7bc 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -188,7 +188,7 @@ struct usb_ep_caps { * @caps:The structure describing types and directions supported by endoint. * @maxpacket:The maximum packet size used on this endpoint. The initial * value can sometimes be reduced (hardware allowing), according to - * the endpoint descriptor used to configure the endpoint. + * the endpoint descriptor used to configure the endpoint. * @maxpacket_limit:The maximum packet size value which can be handled by this * endpoint. It's set once by UDC driver when endpoint is initialized, and * should not be changed. Should not be confused with maxpacket. -- cgit v1.2.3 From 3ea6b7001ef5da9f9816ee3c4fe731f4fe08b865 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 6 Apr 2017 13:19:35 +0900 Subject: PM / devfreq: Move struct devfreq_governor to devfreq directory This patch moves the struct devfreq_governor from header file to the devfreq directory because this structure is private data and it have to be only accessed by the devfreq core. Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- include/linux/devfreq.h | 30 +----------------------------- 1 file changed, 1 insertion(+), 29 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index e0acb0e5243b..6c220e4ebb6b 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -27,6 +27,7 @@ #define DEVFREQ_POSTCHANGE (1) struct devfreq; +struct devfreq_governor; /** * struct devfreq_dev_status - Data given from devfreq user device to @@ -100,35 +101,6 @@ struct devfreq_dev_profile { unsigned int max_state; }; -/** - * struct devfreq_governor - Devfreq policy governor - * @node: list node - contains registered devfreq governors - * @name: Governor's name - * @immutable: Immutable flag for governor. If the value is 1, - * this govenror is never changeable to other governor. - * @get_target_freq: Returns desired operating frequency for the device. - * Basically, get_target_freq will run - * devfreq_dev_profile.get_dev_status() to get the - * status of the device (load = busy_time / total_time). - * If no_central_polling is set, this callback is called - * only with update_devfreq() notified by OPP. - * @event_handler: Callback for devfreq core framework to notify events - * to governors. Events include per device governor - * init and exit, opp changes out of devfreq, suspend - * and resume of per device devfreq during device idle. - * - * Note that the callbacks are called with devfreq->lock locked by devfreq. - */ -struct devfreq_governor { - struct list_head node; - - const char name[DEVFREQ_NAME_LEN]; - const unsigned int immutable; - int (*get_target_freq)(struct devfreq *this, unsigned long *freq); - int (*event_handler)(struct devfreq *devfreq, - unsigned int event, void *data); -}; - /** * struct devfreq - Device devfreq structure * @node: list node - contains the devices with devfreq that have been -- cgit v1.2.3 From 5b3dc2f37d7daf76a679cd204492ec5dff06bb8a Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 10 Apr 2017 11:11:17 +0300 Subject: net: neigh: make ->hh_len 32-bit Using 16-bit ->hh_len doesn't save any memory, save some .text instead: add/remove: 0/0 grow/shrink: 1/6 up/down: 2/-19 (-17) function old new delta neigh_update 2312 2314 +2 fwnet_header_cache 199 197 -2 eth_header_cache 101 99 -2 ip6_finish_output2 2371 2368 -3 vrf_finish_output6 1522 1518 -4 vrf_finish_output 1413 1409 -4 ip_finish_output2 1627 1623 -4 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cc07c3be2705..8ea8a8b70755 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -237,8 +237,7 @@ struct netdev_hw_addr_list { netdev_hw_addr_list_for_each(ha, &(dev)->mc) struct hh_cache { - u16 hh_len; - u16 __pad; + unsigned int hh_len; seqlock_t hh_lock; /* cached hardware header; allow for machine alignment needs. */ -- cgit v1.2.3 From d92be7a41ef15463eb816a4a2d42bf094b56dfce Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 10 Apr 2017 11:25:26 +0300 Subject: net: make struct net_device::min_header_len 8-bit This field is never big enough to warrant 16-bitness. 8-bit accesses enjoy shorted encoding on i386/x86_64 than 16-bit accesses: add/remove: 0/0 grow/shrink: 0/2 up/down: 0/-10 (-10) function old new delta loopback_setup 169 164 -5 ether_setup 148 143 -5 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8ea8a8b70755..b0aa089ce67f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1715,7 +1715,7 @@ struct net_device { unsigned int max_mtu; unsigned short type; unsigned short hard_header_len; - unsigned short min_header_len; + unsigned char min_header_len; unsigned short needed_headroom; unsigned short needed_tailroom; -- cgit v1.2.3 From 993225adf4af20a0e50e37c3d4894b79c98e01c9 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 7 Apr 2017 10:50:33 +0200 Subject: KVM: x86: rename kvm_vcpu_request_scan_ioapic() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Let's rename it into a proper arch specific callback. Signed-off-by: David Hildenbrand Signed-off-by: Radim Krčmář --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 7e74ae4d99bb..397b7b5b1933 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -502,10 +502,10 @@ int __must_check vcpu_load(struct kvm_vcpu *vcpu); void vcpu_put(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_IOAPIC -void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); +void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm); void kvm_arch_post_irq_routing_update(struct kvm *kvm); #else -static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) +static inline void kvm_arch_post_irq_ack_notifier_list_update(struct kvm *kvm) { } static inline void kvm_arch_post_irq_routing_update(struct kvm *kvm) -- cgit v1.2.3 From b3f80c8f75efb2e6a817a0e48bf36cd30685a138 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 28 Mar 2017 17:59:31 +0200 Subject: serdev: add serdev_device_wait_until_sent Add method, which waits until the transmission buffer has been sent. Note, that the change in ttyport_write_wakeup is related, since tty_wait_until_sent will hang without that change. Acked-by: Rob Herring Acked-by: Pavel Machek Signed-off-by: Sebastian Reichel Acked-by: Greg Kroah-Hartman Signed-off-by: Marcel Holtmann --- include/linux/serdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index 9519da6253a8..a308b206d204 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -81,6 +81,7 @@ struct serdev_controller_ops { void (*close)(struct serdev_controller *); void (*set_flow_control)(struct serdev_controller *, bool); unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int); + void (*wait_until_sent)(struct serdev_controller *, long); }; /** @@ -186,6 +187,7 @@ int serdev_device_open(struct serdev_device *); void serdev_device_close(struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); void serdev_device_set_flow_control(struct serdev_device *, bool); +void serdev_device_wait_until_sent(struct serdev_device *, long); int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); void serdev_device_write_flush(struct serdev_device *); int serdev_device_write_room(struct serdev_device *); @@ -223,6 +225,7 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev return 0; } static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} +static inline void serdev_device_wait_until_sent(struct serdev_device *sdev, long timeout) {} static inline int serdev_device_write_buf(struct serdev_device *sdev, const unsigned char *buf, size_t count) { return -ENODEV; -- cgit v1.2.3 From 5659dab26f09a60db8bd1600e1ce89802fab1c7f Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 28 Mar 2017 17:59:32 +0200 Subject: serdev: implement get/set tiocm Add method for getting and setting tiocm. Acked-by: Pavel Machek Acked-by: Rob Herring Signed-off-by: Sebastian Reichel Acked-by: Greg Kroah-Hartman Signed-off-by: Marcel Holtmann --- include/linux/serdev.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index a308b206d204..e29a270f603c 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -15,6 +15,7 @@ #include #include +#include struct serdev_controller; struct serdev_device; @@ -82,6 +83,8 @@ struct serdev_controller_ops { void (*set_flow_control)(struct serdev_controller *, bool); unsigned int (*set_baudrate)(struct serdev_controller *, unsigned int); void (*wait_until_sent)(struct serdev_controller *, long); + int (*get_tiocm)(struct serdev_controller *); + int (*set_tiocm)(struct serdev_controller *, unsigned int, unsigned int); }; /** @@ -188,6 +191,8 @@ void serdev_device_close(struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); void serdev_device_set_flow_control(struct serdev_device *, bool); void serdev_device_wait_until_sent(struct serdev_device *, long); +int serdev_device_get_tiocm(struct serdev_device *); +int serdev_device_set_tiocm(struct serdev_device *, int, int); int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); void serdev_device_write_flush(struct serdev_device *); int serdev_device_write_room(struct serdev_device *); @@ -226,6 +231,14 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev } static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} static inline void serdev_device_wait_until_sent(struct serdev_device *sdev, long timeout) {} +static inline int serdev_device_get_tiocm(struct serdev_device *serdev) +{ + return -ENOTSUPP; +} +static inline int serdev_device_set_tiocm(struct serdev_device *serdev, int set, int clear) +{ + return -ENOTSUPP; +} static inline int serdev_device_write_buf(struct serdev_device *sdev, const unsigned char *buf, size_t count) { return -ENODEV; -- cgit v1.2.3 From 756db778748949f6403b727fc6251674dbfcb1a2 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Tue, 28 Mar 2017 17:59:33 +0200 Subject: serdev: add helpers for cts and rts handling Add serdev helper functions for handling of cts and rts lines using the serdev's tiocm functions. Acked-by: Rob Herring Signed-off-by: Sebastian Reichel Acked-by: Greg Kroah-Hartman Signed-off-by: Marcel Holtmann --- include/linux/serdev.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index e29a270f603c..37395b8eb8f1 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -16,6 +16,7 @@ #include #include #include +#include struct serdev_controller; struct serdev_device; @@ -254,6 +255,36 @@ static inline int serdev_device_write_room(struct serdev_device *sdev) #endif /* CONFIG_SERIAL_DEV_BUS */ +static inline bool serdev_device_get_cts(struct serdev_device *serdev) +{ + int status = serdev_device_get_tiocm(serdev); + return !!(status & TIOCM_CTS); +} + +static inline int serdev_device_wait_for_cts(struct serdev_device *serdev, bool state, int timeout_ms) +{ + unsigned long timeout; + bool signal; + + timeout = jiffies + msecs_to_jiffies(timeout_ms); + while (time_is_after_jiffies(timeout)) { + signal = serdev_device_get_cts(serdev); + if (signal == state) + return 0; + usleep_range(1000, 2000); + } + + return -ETIMEDOUT; +} + +static inline int serdev_device_set_rts(struct serdev_device *serdev, bool enable) +{ + if (enable) + return serdev_device_set_tiocm(serdev, TIOCM_RTS, 0); + else + return serdev_device_set_tiocm(serdev, 0, TIOCM_RTS); +} + /* * serdev hooks into TTY core */ -- cgit v1.2.3 From 006358b35c73ab75544fb4509483a81ef1a9c0b2 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Fri, 7 Apr 2017 15:33:31 -0700 Subject: libnvdimm: add support for clear poison list and badblocks for device dax Providing mechanism to clear poison list via the ndctl ND_CMD_CLEAR_ERROR call. We will update the poison list and also the badblocks at region level if the region is in dax mode or in pmem mode and not active. In other words we force badblocks to be cleared through write requests if the address is currently accessed through a block device, otherwise it can only be done via the ioctl+dsm path. Signed-off-by: Dave Jiang Reviewed-by: Johannes Thumshirn Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 77e7af32543f..1c609e89048a 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -120,7 +120,9 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( } int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); -void nvdimm_clear_from_poison_list(struct nvdimm_bus *nvdimm_bus, +void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, + phys_addr_t start, unsigned int len); +void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start, unsigned int len); struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc); @@ -162,4 +164,7 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); void nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); +int nvdimm_region_badblocks_clear(struct device *dev, void *data); +void __nvdimm_bus_badblocks_clear(struct nvdimm_bus *nvdimm_bus, + struct resource *res); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From 7b6be8444e0f0dd675b54d059793423d3c9b4c03 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 11 Apr 2017 09:49:49 -0700 Subject: dax: refactor dax-fs into a generic provider of 'struct dax_device' instances We want dax capable drivers to be able to publish a set of dax operations [1]. However, we do not want to further abuse block_devices to advertise these operations. Instead we will attach these operations to a dax device and add a lookup mechanism to go from block device path to a dax device. A dax capable driver like pmem or brd is responsible for registering a dax device, alongside a block device, and then a dax capable filesystem is responsible for retrieving the dax device by path name if it wants to call dax_operations. For now, we refactor the dax pseudo-fs to be a generic facility, rather than an implementation detail, of the device-dax use case. Where a "dax device" is just an inode + dax infrastructure, and "Device DAX" is a mapping service layered on top of that base 'struct dax_device'. "Filesystem DAX" is then a mapping service that layers a filesystem on top of that same base device. Filesystem DAX is associated with a block_device for now, but perhaps directly to a dax device in the future, or for new pmem-only filesystems. [1]: https://lkml.org/lkml/2017/1/19/880 Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/dax.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index d8a3dc042e1c..5b62f5d19aea 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -8,6 +8,9 @@ struct iomap_ops; +int dax_read_lock(void); +void dax_read_unlock(int id); + /* * We use lowest available bit in exceptional entry for locking, one bit for * the entry size (PMD) and two more to tell us if the entry is a huge zero -- cgit v1.2.3 From 6f79309acc32b025064a496dbfcd4c70c557294e Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Mon, 3 Apr 2017 18:05:21 +0200 Subject: gpio: Use unsigned int for interrupt numbers Interrupt numbers are never negative, zero serves as the special invalid value. Signed-off-by: Thierry Reding Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 846f3b989480..393582867afd 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -168,7 +168,7 @@ struct gpio_chip { unsigned int irq_base; irq_flow_handler_t irq_handler; unsigned int irq_default_type; - int irq_chained_parent; + unsigned int irq_chained_parent; bool irq_nested; bool irq_need_valid_mask; unsigned long *irq_valid_mask; @@ -244,12 +244,12 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip, struct irq_chip *irqchip, - int parent_irq, + unsigned int parent_irq, irq_flow_handler_t parent_handler); void gpiochip_set_nested_irqchip(struct gpio_chip *gpiochip, struct irq_chip *irqchip, - int parent_irq); + unsigned int parent_irq); int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, struct irq_chip *irqchip, -- cgit v1.2.3 From 2d4bc93368f5a0ddb57c8c885cdad9c9b7a10ed5 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Apr 2017 14:34:04 +0200 Subject: netlink: extended ACK reporting Add the base infrastructure and UAPI for netlink extended ACK reporting. All "manual" calls to netlink_ack() pass NULL for now and thus don't get extended ACK reporting. Big thanks goes to Pablo Neira Ayuso for not only bringing up the whole topic at netconf (again) but also coming up with the nlattr passing trick and various other ideas. Signed-off-by: Johannes Berg Reviewed-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netlink.h | 26 +++++++++++++++++++++++++- 1 file changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index da14ab61f363..60e7137f840d 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -62,11 +62,35 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) return __netlink_kernel_create(net, unit, THIS_MODULE, cfg); } +/** + * struct netlink_ext_ack - netlink extended ACK report struct + * @_msg: message string to report - don't access directly, use + * %NL_SET_ERR_MSG + * @bad_attr: attribute with error + */ +struct netlink_ext_ack { + const char *_msg; + const struct nlattr *bad_attr; +}; + +/* Always use this macro, this allows later putting the + * message into a separate section or such for things + * like translation or listing all possible messages. + * Currently string formatting is not supported (due + * to the lack of an output buffer.) + */ +#define NL_SET_ERR_MSG(extack, msg) do { \ + static const char _msg[] = (msg); \ + \ + (extack)->_msg = _msg; \ +} while (0) + extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); extern void __netlink_clear_multicast_users(struct sock *sk, unsigned int group); -extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err); +extern void netlink_ack(struct sk_buff *in_skb, struct nlmsghdr *nlh, int err, + const struct netlink_ext_ack *extack); extern int netlink_has_listeners(struct sock *sk, unsigned int group); extern int netlink_unicast(struct sock *ssk, struct sk_buff *skb, __u32 portid, int nonblock); -- cgit v1.2.3 From ba0dc5f6e0ba5a5d2f575bcdb35e5d1960cf7c04 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 12 Apr 2017 14:34:06 +0200 Subject: netlink: allow sending extended ACK with cookie on success Now that we have extended error reporting and a new message format for netlink ACK messages, also extend this to be able to return arbitrary cookie data on success. This will allow, for example, nl80211 to not send an extra message for cookies identifying newly created objects, but return those directly in the ACK message. The cookie data size is currently limited to 20 bytes (since Jamal talked about using SHA1 for identifiers.) Thanks to Jamal Hadi Salim for bringing up this idea during the discussions. Signed-off-by: Johannes Berg Reviewed-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netlink.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 60e7137f840d..8d2a8924705c 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -62,15 +62,22 @@ netlink_kernel_create(struct net *net, int unit, struct netlink_kernel_cfg *cfg) return __netlink_kernel_create(net, unit, THIS_MODULE, cfg); } +/* this can be increased when necessary - don't expose to userland */ +#define NETLINK_MAX_COOKIE_LEN 20 + /** * struct netlink_ext_ack - netlink extended ACK report struct * @_msg: message string to report - don't access directly, use * %NL_SET_ERR_MSG * @bad_attr: attribute with error + * @cookie: cookie data to return to userspace (for success) + * @cookie_len: actual cookie data length */ struct netlink_ext_ack { const char *_msg; const struct nlattr *bad_attr; + u8 cookie[NETLINK_MAX_COOKIE_LEN]; + u8 cookie_len; }; /* Always use this macro, this allows later putting the -- cgit v1.2.3 From b3b454f694db663773bc22002e10909afe9c1739 Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Thu, 13 Apr 2017 14:25:17 -0700 Subject: libnvdimm: fix clear poison locking with spinlock and GFP_NOWAIT allocation The following warning results from holding a lane spinlock, preempt_disable(), or the btt map spinlock and then trying to take the reconfig_mutex to walk the poison list and potentially add new entries. BUG: sleeping function called from invalid context at kernel/locking/mutex. c:747 in_atomic(): 1, irqs_disabled(): 0, pid: 17159, name: dd [..] Call Trace: dump_stack+0x85/0xc8 ___might_sleep+0x184/0x250 __might_sleep+0x4a/0x90 __mutex_lock+0x58/0x9b0 ? nvdimm_bus_lock+0x21/0x30 [libnvdimm] ? __nvdimm_bus_badblocks_clear+0x2f/0x60 [libnvdimm] ? acpi_nfit_forget_poison+0x79/0x80 [nfit] ? _raw_spin_unlock+0x27/0x40 mutex_lock_nested+0x1b/0x20 nvdimm_bus_lock+0x21/0x30 [libnvdimm] nvdimm_forget_poison+0x25/0x50 [libnvdimm] nvdimm_clear_poison+0x106/0x140 [libnvdimm] nsio_rw_bytes+0x164/0x270 [libnvdimm] btt_write_pg+0x1de/0x3e0 [nd_btt] ? blk_queue_enter+0x30/0x290 btt_make_request+0x11a/0x310 [nd_btt] ? blk_queue_enter+0xb7/0x290 ? blk_queue_enter+0x30/0x290 generic_make_request+0x118/0x3b0 A spinlock is introduced to protect the poison list. This allows us to not having to acquire the reconfig_mutex for touching the poison list. The add_poison() function has been broken out into two helper functions. One to allocate the poison entry and the other to apppend the entry. This allows us to unlock the poison_lock in non-I/O path and continue to be able to allocate the poison entry with GFP_KERNEL. We will use GFP_NOWAIT in the I/O path in order to satisfy being in atomic context. Reviewed-by: Vishal Verma Signed-off-by: Dave Jiang Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 1c609e89048a..98b207611b06 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -122,8 +122,6 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, phys_addr_t start, unsigned int len); -void __nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, - phys_addr_t start, unsigned int len); struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc); void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); -- cgit v1.2.3 From 01c0e0a28da749e80cb7d549f75a5f52e2f40d0e Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 22 Mar 2017 15:55:30 +0100 Subject: power: supply: bq24190_charger: Use i2c-core irq-mapping code The i2c-core already maps of irqs before calling the driver's probe function and there are no in tree users of bq24190_platform_data->gpio_int. Remove the redundant custom irq-mapping code and just use client->irq. Cc: Liam Breck Cc: Tony Lindgren Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Acked-by: Tony Lindgren Signed-off-by: Sebastian Reichel --- include/linux/power/bq24190_charger.h | 16 ---------------- 1 file changed, 16 deletions(-) delete mode 100644 include/linux/power/bq24190_charger.h (limited to 'include/linux') diff --git a/include/linux/power/bq24190_charger.h b/include/linux/power/bq24190_charger.h deleted file mode 100644 index 9f0283721cbc..000000000000 --- a/include/linux/power/bq24190_charger.h +++ /dev/null @@ -1,16 +0,0 @@ -/* - * Platform data for the TI bq24190 battery charger driver. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _BQ24190_CHARGER_H_ -#define _BQ24190_CHARGER_H_ - -struct bq24190_platform_data { - unsigned int gpio_int; /* GPIO pin that's connected to INT# */ -}; - -#endif -- cgit v1.2.3 From ffff885832101543c002cef7abcab0fd27a9aee1 Mon Sep 17 00:00:00 2001 From: Jayachandran C Date: Thu, 13 Apr 2017 20:30:44 +0000 Subject: PCI: Add device flag PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT Add a new quirk flag PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT to limit the DMA alias search to go no further than the bridge where the IOMMU unit is attached. The flag will be used to indicate a bridge device which forwards the address translation requests to the IOMMU, i.e., where the interrupt and DMA requests leave the PCIe hierarchy and go into the system blocks. Usually this happens at the PCI RC, so this flag is not needed. But on systems where there are bridges that introduce aliases above the IOMMU, this flag prevents pci_for_each_dma_alias() from generating aliases that the IOMMU will never see. The function pci_for_each_dma_alias() is updated to stop when it see a bridge with this flag set. Link: https://bugzilla.kernel.org/show_bug.cgi?id=195447 Signed-off-by: Jayachandran C Signed-off-by: Bjorn Helgaas Reviewed-by: Robin Murphy Acked-by: David Daney --- include/linux/pci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eb3da1a04e6c..3f596acc05be 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -178,6 +178,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_NO_PM_RESET = (__force pci_dev_flags_t) (1 << 7), /* Get VPD from function 0 VPD */ PCI_DEV_FLAGS_VPD_REF_F0 = (__force pci_dev_flags_t) (1 << 8), + /* a non-root bridge where translation occurs, stop alias search here */ + PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9), }; enum pci_irq_reroute_variant { -- cgit v1.2.3 From c0c379e2931b05facef538e53bf3b21f283d9a0b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 13 Apr 2017 14:56:23 -0700 Subject: mm: drop unused pmdp_huge_get_and_clear_notify() Dave noticed that after fixing MADV_DONTNEED vs numa balancing race the last pmdp_huge_get_and_clear_notify() user is gone. Let's drop the helper. Link: http://lkml.kernel.org/r/20170306112047.24809-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 13 ------------- 1 file changed, 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 51891fb0d3ce..c91b3bcd158f 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -394,18 +394,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) ___pud; \ }) -#define pmdp_huge_get_and_clear_notify(__mm, __haddr, __pmd) \ -({ \ - unsigned long ___haddr = __haddr & HPAGE_PMD_MASK; \ - pmd_t ___pmd; \ - \ - ___pmd = pmdp_huge_get_and_clear(__mm, __haddr, __pmd); \ - mmu_notifier_invalidate_range(__mm, ___haddr, \ - ___haddr + HPAGE_PMD_SIZE); \ - \ - ___pmd; \ -}) - /* * set_pte_at_notify() sets the pte _after_ running the notifier. * This is safe to start by updating the secondary MMUs, because the primary MMU @@ -489,7 +477,6 @@ static inline void mmu_notifier_mm_destroy(struct mm_struct *mm) #define ptep_clear_flush_notify ptep_clear_flush #define pmdp_huge_clear_flush_notify pmdp_huge_clear_flush #define pudp_huge_clear_flush_notify pudp_huge_clear_flush -#define pmdp_huge_get_and_clear_notify pmdp_huge_get_and_clear #define set_pte_at_notify set_pte_at #endif /* CONFIG_MMU_NOTIFIER */ -- cgit v1.2.3 From c7ef8f0c020ac43c8a692bf989017c06ab1fdf0f Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 14 Apr 2017 10:05:36 +0200 Subject: net: Add ESP offload features This patch adds netdev features to configure IPsec offloads. Signed-off-by: Steffen Klassert --- include/linux/netdev_features.h | 8 +++++++- include/linux/netdevice.h | 1 + include/linux/skbuff.h | 2 ++ 3 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 9a0419594e84..1d4737cffc71 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -54,8 +54,9 @@ enum { */ NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */ + NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_SCTP_BIT, + NETIF_F_GSO_ESP_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ @@ -73,6 +74,8 @@ enum { NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ NETIF_F_HW_TC_BIT, /* Offload TC infrastructure */ + NETIF_F_HW_ESP_BIT, /* Hardware ESP transformation offload */ + NETIF_F_HW_ESP_TX_CSUM_BIT, /* ESP with TX checksum offload */ /* * Add your fresh new feature above and remember to update @@ -129,11 +132,14 @@ enum { #define NETIF_F_GSO_PARTIAL __NETIF_F(GSO_PARTIAL) #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP) +#define NETIF_F_GSO_ESP __NETIF_F(GSO_ESP) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_HW_TC __NETIF_F(HW_TC) +#define NETIF_F_HW_ESP __NETIF_F(HW_ESP) +#define NETIF_F_HW_ESP_TX_CSUM __NETIF_F(HW_ESP_TX_CSUM) #define for_each_netdev_feature(mask_addr, bit) \ for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cc07c3be2705..5bb03d181848 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4070,6 +4070,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_PARTIAL != (NETIF_F_GSO_PARTIAL >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 741d75cfc686..81ef53f06534 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -492,6 +492,8 @@ enum { SKB_GSO_TUNNEL_REMCSUM = 1 << 14, SKB_GSO_SCTP = 1 << 15, + + SKB_GSO_ESP = 1 << 16, }; #if BITS_PER_LONG > 32 -- cgit v1.2.3 From d77e38e612a017480157fe6d2c1422f42cb5b7e3 Mon Sep 17 00:00:00 2001 From: Steffen Klassert Date: Fri, 14 Apr 2017 10:06:10 +0200 Subject: xfrm: Add an IPsec hardware offloading API This patch adds all the bits that are needed to do IPsec hardware offload for IPsec states and ESP packets. We add xfrmdev_ops to the net_device. xfrmdev_ops has function pointers that are needed to manage the xfrm states in the hardware and to do a per packet offloading decision. Joint work with: Ilan Tayari Guy Shapiro Yossi Kuperman Signed-off-by: Guy Shapiro Signed-off-by: Ilan Tayari Signed-off-by: Yossi Kuperman Signed-off-by: Steffen Klassert --- include/linux/netdevice.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5bb03d181848..b3eb83db0223 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -824,6 +824,16 @@ struct netdev_xdp { }; }; +#ifdef CONFIG_XFRM_OFFLOAD +struct xfrmdev_ops { + int (*xdo_dev_state_add) (struct xfrm_state *x); + void (*xdo_dev_state_delete) (struct xfrm_state *x); + void (*xdo_dev_state_free) (struct xfrm_state *x); + bool (*xdo_dev_offload_ok) (struct sk_buff *skb, + struct xfrm_state *x); +}; +#endif + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1697,6 +1707,10 @@ struct net_device { const struct ndisc_ops *ndisc_ops; #endif +#ifdef CONFIG_XFRM + const struct xfrmdev_ops *xfrmdev_ops; +#endif + const struct header_ops *header_ops; unsigned int flags; -- cgit v1.2.3 From 4adec7da0536a345d901d7ba55b6c93a14eeeaff Mon Sep 17 00:00:00 2001 From: Benjamin Gaignard Date: Tue, 4 Apr 2017 09:47:51 +0200 Subject: iio: stm32 trigger: Add quadrature encoder device One of the features of STM32 trigger hardware block is a quadrature encoder that can counts up/down depending of the levels and edges of the selected external pins. This patch allow to read/write the counter, get it direction, set/get quadrature modes and get scale factor. When counting up preset value is the limit of the counter. When counting down the counter start from preset value down to 0. This preset value could be set/get by using /sys/bus/iio/devices/iio:deviceX/in_count0_preset attribute. Signed-off-by: Benjamin Gaignard Reviewed-by: William Breathitt Gray Signed-off-by: Jonathan Cameron --- include/linux/mfd/stm32-timers.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/stm32-timers.h b/include/linux/mfd/stm32-timers.h index d0300045f04a..4a0abbc10ef6 100644 --- a/include/linux/mfd/stm32-timers.h +++ b/include/linux/mfd/stm32-timers.h @@ -21,6 +21,7 @@ #define TIM_CCMR1 0x18 /* Capt/Comp 1 Mode Reg */ #define TIM_CCMR2 0x1C /* Capt/Comp 2 Mode Reg */ #define TIM_CCER 0x20 /* Capt/Comp Enable Reg */ +#define TIM_CNT 0x24 /* Counter */ #define TIM_PSC 0x28 /* Prescaler */ #define TIM_ARR 0x2c /* Auto-Reload Register */ #define TIM_CCR1 0x34 /* Capt/Comp Register 1 */ @@ -30,6 +31,7 @@ #define TIM_BDTR 0x44 /* Break and Dead-Time Reg */ #define TIM_CR1_CEN BIT(0) /* Counter Enable */ +#define TIM_CR1_DIR BIT(4) /* Counter Direction */ #define TIM_CR1_ARPE BIT(7) /* Auto-reload Preload Ena */ #define TIM_CR2_MMS (BIT(4) | BIT(5) | BIT(6)) /* Master mode selection */ #define TIM_SMCR_SMS (BIT(0) | BIT(1) | BIT(2)) /* Slave mode selection */ -- cgit v1.2.3 From f7e30f01a9e221067bb4b579e3cfc25cd2617467 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Wed, 12 Apr 2017 11:20:29 -0700 Subject: cpumask: Add helper cpumask_available() With CONFIG_CPUMASK_OFFSTACK=y cpumask_var_t is a struct cpumask pointer, otherwise a struct cpumask array with a single element. Some code dealing with cpumasks needs to validate that a cpumask_var_t is not a NULL pointer when CONFIG_CPUMASK_OFFSTACK=y. This is typically done by performing the check always, regardless of the underlying type of cpumask_var_t. This works in both cases, however clang raises a warning like this when CONFIG_CPUMASK_OFFSTACK=n: kernel/irq/manage.c:839:28: error: address of array 'desc->irq_common_data.affinity' will always evaluate to 'true' [-Werror,-Wpointer-bool-conversion] Add the inline helper cpumask_available() which only performs the pointer check if CONFIG_CPUMASK_OFFSTACK=y. Signed-off-by: Matthias Kaehlcke Cc: Grant Grundler Cc: Rusty Russell Cc: Greg Hackmann Cc: Michael Davidson Cc: Andrew Morton Link: http://lkml.kernel.org/r/20170412182030.83657-1-mka@chromium.org Signed-off-by: Thomas Gleixner --- include/linux/cpumask.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 96f1e88b767c..1a675604b17d 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -667,6 +667,11 @@ void alloc_bootmem_cpumask_var(cpumask_var_t *mask); void free_cpumask_var(cpumask_var_t mask); void free_bootmem_cpumask_var(cpumask_var_t mask); +static inline bool cpumask_available(cpumask_var_t mask) +{ + return mask != NULL; +} + #else typedef struct cpumask cpumask_var_t[1]; @@ -708,6 +713,11 @@ static inline void free_cpumask_var(cpumask_var_t mask) static inline void free_bootmem_cpumask_var(cpumask_var_t mask) { } + +static inline bool cpumask_available(cpumask_var_t mask) +{ + return true; +} #endif /* CONFIG_CPUMASK_OFFSTACK */ /* It's common to want to use cpu_all_mask in struct member initializers, -- cgit v1.2.3 From 2ac00f17b2e110c67ed2af3713bc04aec62e4608 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 26 Mar 2017 12:04:12 -0700 Subject: time: Delete do_sys_setimeofday() struct timespec is not y2038 safe on 32 bit machines and needs to be replaced with struct timespec64. do_sys_timeofday() is just a wrapper function. Replace all calls to this function with direct calls to do_sys_timeofday64() instead and delete do_sys_timeofday(). Signed-off-by: Deepa Dinamani Cc: y2038@lists.linaro.org Cc: john.stultz@linaro.org Cc: arnd@arndb.de Cc: linux-alpha@vger.kernel.org Link: http://lkml.kernel.org/r/1490555058-4603-2-git-send-email-deepa.kernel@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/timekeeping.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index b598cbc7b576..3617a78897bb 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -19,21 +19,6 @@ extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday64(const struct timespec64 *ts); extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); -static inline int do_sys_settimeofday(const struct timespec *tv, - const struct timezone *tz) -{ - struct timespec64 ts64; - - if (!tv) - return do_sys_settimeofday64(NULL, tz); - - if (!timespec_valid(tv)) - return -EINVAL; - - ts64 = timespec_to_timespec64(*tv); - return do_sys_settimeofday64(&ts64, tz); -} - /* * Kernel time accessors */ -- cgit v1.2.3 From d340266e19ddb70dbd608f9deedcfb35fdb9d419 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 26 Mar 2017 12:04:13 -0700 Subject: time: Change posix clocks ops interfaces to use timespec64 struct timespec is not y2038 safe on 32 bit machines. The posix clocks apis use struct timespec directly and through struct itimerspec. Replace the posix clock interfaces to use struct timespec64 and struct itimerspec64 instead. Also fix up their implementations accordingly. Note that the clock_getres() interface has also been changed to use timespec64 even though this particular interface is not affected by the y2038 problem. This helps verification for internal kernel code for y2038 readiness by getting rid of time_t/ timeval/ timespec. Signed-off-by: Deepa Dinamani Cc: arnd@arndb.de Cc: y2038@lists.linaro.org Cc: netdev@vger.kernel.org Cc: Richard Cochran Cc: john.stultz@linaro.org Link: http://lkml.kernel.org/r/1490555058-4603-3-git-send-email-deepa.kernel@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/posix-clock.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-clock.h b/include/linux/posix-clock.h index 34c4498b800f..83b22ae9ae12 100644 --- a/include/linux/posix-clock.h +++ b/include/linux/posix-clock.h @@ -59,23 +59,23 @@ struct posix_clock_operations { int (*clock_adjtime)(struct posix_clock *pc, struct timex *tx); - int (*clock_gettime)(struct posix_clock *pc, struct timespec *ts); + int (*clock_gettime)(struct posix_clock *pc, struct timespec64 *ts); - int (*clock_getres) (struct posix_clock *pc, struct timespec *ts); + int (*clock_getres) (struct posix_clock *pc, struct timespec64 *ts); int (*clock_settime)(struct posix_clock *pc, - const struct timespec *ts); + const struct timespec64 *ts); int (*timer_create) (struct posix_clock *pc, struct k_itimer *kit); int (*timer_delete) (struct posix_clock *pc, struct k_itimer *kit); void (*timer_gettime)(struct posix_clock *pc, - struct k_itimer *kit, struct itimerspec *tsp); + struct k_itimer *kit, struct itimerspec64 *tsp); int (*timer_settime)(struct posix_clock *pc, struct k_itimer *kit, int flags, - struct itimerspec *tsp, struct itimerspec *old); + struct itimerspec64 *tsp, struct itimerspec64 *old); /* * Optional character device methods: */ -- cgit v1.2.3 From 3c9c12f4b4610dba864038c7822b427816f5893c Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 26 Mar 2017 12:04:14 -0700 Subject: time: Change k_clock clock_get() to use timespec64 struct timespec is not y2038 safe on 32 bit machines. Replace uses of struct timespec with struct timespec64 in the kernel. The syscall interfaces themselves will be changed in a separate series. Signed-off-by: Deepa Dinamani Cc: y2038@lists.linaro.org Cc: john.stultz@linaro.org Cc: arnd@arndb.de Link: http://lkml.kernel.org/r/1490555058-4603-4-git-send-email-deepa.kernel@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 2 +- include/linux/timekeeping.h | 5 +++++ 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 64aa189efe21..0688f3975da7 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -90,7 +90,7 @@ struct k_clock { int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); int (*clock_set) (const clockid_t which_clock, const struct timespec *tp); - int (*clock_get) (const clockid_t which_clock, struct timespec * tp); + int (*clock_get) (const clockid_t which_clock, struct timespec64 *tp); int (*clock_adj) (const clockid_t which_clock, struct timex *tx); int (*timer_create) (struct k_itimer *timer); int (*nsleep) (const clockid_t which_clock, int flags, diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 3617a78897bb..ddc229ff6d1e 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -258,6 +258,11 @@ static inline void timekeeping_clocktai(struct timespec *ts) *ts = ktime_to_timespec(ktime_get_clocktai()); } +static inline void timekeeping_clocktai64(struct timespec64 *ts) +{ + *ts = ktime_to_timespec64(ktime_get_clocktai()); +} + /* * RTC specific */ -- cgit v1.2.3 From d2e3e0ca5df7f4ffe90a04790b3be20485df056a Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 26 Mar 2017 12:04:15 -0700 Subject: time: Change k_clock clock_getres() to use timespec64 struct timespec is not y2038 safe on 32 bit machines. Replace uses of struct timespec with struct timespec64 in the kernel. The syscall interfaces themselves will be changed in a separate series. The clock_getres() interface has also been changed to use timespec64 even though this particular interface is not affected by the y2038 problem. This helps verification for internal kernel code for y2038 readiness by getting rid of time_t/ timeval/ timespec completely. Signed-off-by: Deepa Dinamani Cc: y2038@lists.linaro.org Cc: john.stultz@linaro.org Cc: arnd@arndb.de Link: http://lkml.kernel.org/r/1490555058-4603-5-git-send-email-deepa.kernel@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 0688f3975da7..dd05b49074f3 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -87,7 +87,7 @@ struct k_itimer { }; struct k_clock { - int (*clock_getres) (const clockid_t which_clock, struct timespec *tp); + int (*clock_getres) (const clockid_t which_clock, struct timespec64 *tp); int (*clock_set) (const clockid_t which_clock, const struct timespec *tp); int (*clock_get) (const clockid_t which_clock, struct timespec64 *tp); -- cgit v1.2.3 From 0fe6afe3834ba13d75fa1168f0f66f08b427e1c0 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 26 Mar 2017 12:04:16 -0700 Subject: time: Change k_clock clock_set() to use timespec64 struct timespec is not y2038 safe on 32 bit machines. Replace uses of struct timespec with struct timespec64 in the kernel. The syscall interfaces themselves will be changed in a separate series. Signed-off-by: Deepa Dinamani Cc: y2038@lists.linaro.org Cc: john.stultz@linaro.org Cc: arnd@arndb.de Link: http://lkml.kernel.org/r/1490555058-4603-6-git-send-email-deepa.kernel@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index dd05b49074f3..7825e242b128 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -89,7 +89,7 @@ struct k_itimer { struct k_clock { int (*clock_getres) (const clockid_t which_clock, struct timespec64 *tp); int (*clock_set) (const clockid_t which_clock, - const struct timespec *tp); + const struct timespec64 *tp); int (*clock_get) (const clockid_t which_clock, struct timespec64 *tp); int (*clock_adj) (const clockid_t which_clock, struct timex *tx); int (*timer_create) (struct k_itimer *timer); -- cgit v1.2.3 From 5f252b325625c13db1dbc76ac6cdb49ee3bd062e Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 26 Mar 2017 12:04:17 -0700 Subject: time: Change k_clock timer_set() and timer_get() to use timespec64 struct timespec is not y2038 safe on 32 bit machines. Replace uses of struct timespec with struct timespec64 in the kernel. struct itimerspec internally uses struct timespec. Use struct itimerspec64 which uses struct timespec64. The syscall interfaces themselves will be changed in a separate series. Signed-off-by: Deepa Dinamani Cc: y2038@lists.linaro.org Cc: john.stultz@linaro.org Cc: arnd@arndb.de Link: http://lkml.kernel.org/r/1490555058-4603-7-git-send-email-deepa.kernel@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/posix-timers.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index 7825e242b128..ebc4c4945339 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -96,13 +96,13 @@ struct k_clock { int (*nsleep) (const clockid_t which_clock, int flags, struct timespec *, struct timespec __user *); long (*nsleep_restart) (struct restart_block *restart_block); - int (*timer_set) (struct k_itimer * timr, int flags, - struct itimerspec * new_setting, - struct itimerspec * old_setting); - int (*timer_del) (struct k_itimer * timr); + int (*timer_set) (struct k_itimer *timr, int flags, + struct itimerspec64 *new_setting, + struct itimerspec64 *old_setting); + int (*timer_del) (struct k_itimer *timr); #define TIMER_RETRY 1 - void (*timer_get) (struct k_itimer * timr, - struct itimerspec * cur_setting); + void (*timer_get) (struct k_itimer *timr, + struct itimerspec64 *cur_setting); }; extern struct k_clock clock_posix_cpu; -- cgit v1.2.3 From ad19638463a4b5c909fcebf0f19358c4bf4fed48 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Sun, 26 Mar 2017 12:04:18 -0700 Subject: time: Change k_clock nsleep() to use timespec64 struct timespec is not y2038 safe on 32 bit machines. Replace uses of struct timespec with struct timespec64 in the kernel. The syscall interfaces themselves will be changed in a separate series. Note that the restart_block parameter for nanosleep has also been left unchanged and will be part of syscall series noted above. Signed-off-by: Deepa Dinamani Cc: y2038@lists.linaro.org Cc: john.stultz@linaro.org Cc: arnd@arndb.de Link: http://lkml.kernel.org/r/1490555058-4603-8-git-send-email-deepa.kernel@gmail.com Signed-off-by: Thomas Gleixner --- include/linux/hrtimer.h | 2 +- include/linux/posix-timers.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hrtimer.h b/include/linux/hrtimer.h index 23d58fcd4d9a..8c5b10eb7265 100644 --- a/include/linux/hrtimer.h +++ b/include/linux/hrtimer.h @@ -452,7 +452,7 @@ static inline u64 hrtimer_forward_now(struct hrtimer *timer, } /* Precise sleep: */ -extern long hrtimer_nanosleep(struct timespec *rqtp, +extern long hrtimer_nanosleep(struct timespec64 *rqtp, struct timespec __user *rmtp, const enum hrtimer_mode mode, const clockid_t clockid); diff --git a/include/linux/posix-timers.h b/include/linux/posix-timers.h index ebc4c4945339..8c1e43ab14a9 100644 --- a/include/linux/posix-timers.h +++ b/include/linux/posix-timers.h @@ -94,7 +94,7 @@ struct k_clock { int (*clock_adj) (const clockid_t which_clock, struct timex *tx); int (*timer_create) (struct k_itimer *timer); int (*nsleep) (const clockid_t which_clock, int flags, - struct timespec *, struct timespec __user *); + struct timespec64 *, struct timespec __user *); long (*nsleep_restart) (struct restart_block *restart_block); int (*timer_set) (struct k_itimer *timr, int flags, struct itimerspec64 *new_setting, -- cgit v1.2.3 From 5a8d75a1b8c99bdc926ba69b7b7dbe4fae81a5af Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 14 Apr 2017 13:58:29 -0600 Subject: block: fix bio_will_gap() for first bvec with offset Commit 729204ef49ec("block: relax check on sg gap") allows us to merge bios, if both are physically contiguous. This change can merge a huge number of small bios, through mkfs for example, mkfs.ntfs running time can be decreased to ~1/10. But if one rq starts with a non-aligned buffer (the 1st bvec's bv_offset is non-zero) and if we allow the merge, it is quite difficult to respect sg gap limit, especially the max segment size, or we risk having an unaligned virtual boundary. This patch tries to avoid the issue by disallowing a merge, if the req starts with an unaligned buffer. Also add comments to explain why the merged segment can't end in unaligned virt boundary. Fixes: 729204ef49ec ("block: relax check on sg gap") Tested-by: Johannes Thumshirn Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Rewrote parts of the commit message and comments. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 32 ++++++++++++++++++++++++++++---- 1 file changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7548f332121a..01a696b0a4d3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1672,12 +1672,36 @@ static inline bool bios_segs_mergeable(struct request_queue *q, return true; } -static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, - struct bio *next) +static inline bool bio_will_gap(struct request_queue *q, + struct request *prev_rq, + struct bio *prev, + struct bio *next) { if (bio_has_data(prev) && queue_virt_boundary(q)) { struct bio_vec pb, nb; + /* + * don't merge if the 1st bio starts with non-zero + * offset, otherwise it is quite difficult to respect + * sg gap limit. We work hard to merge a huge number of small + * single bios in case of mkfs. + */ + if (prev_rq) + bio_get_first_bvec(prev_rq->bio, &pb); + else + bio_get_first_bvec(prev, &pb); + if (pb.bv_offset) + return true; + + /* + * We don't need to worry about the situation that the + * merged segment ends in unaligned virt boundary: + * + * - if 'pb' ends aligned, the merged segment ends aligned + * - if 'pb' ends unaligned, the next bio must include + * one single bvec of 'nb', otherwise the 'nb' can't + * merge with 'pb' + */ bio_get_last_bvec(prev, &pb); bio_get_first_bvec(next, &nb); @@ -1690,12 +1714,12 @@ static inline bool bio_will_gap(struct request_queue *q, struct bio *prev, static inline bool req_gap_back_merge(struct request *req, struct bio *bio) { - return bio_will_gap(req->q, req->biotail, bio); + return bio_will_gap(req->q, req, req->biotail, bio); } static inline bool req_gap_front_merge(struct request *req, struct bio *bio) { - return bio_will_gap(req->q, bio, req->bio); + return bio_will_gap(req->q, NULL, bio, req->bio); } int kblockd_schedule_work(struct work_struct *work); -- cgit v1.2.3 From 84253394927c4352652d0b118ad9583f5646959b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Apr 2017 13:28:46 +0200 Subject: remove the mg_disk driver This drivers was added in 2008, but as far as a I can tell we never had a single platform that actually registered resources for the platform driver. It's also been unmaintained for a long time and apparently has a ATA mode that can be driven using the IDE/libata subsystem. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/mg_disk.h | 45 --------------------------------------------- 1 file changed, 45 deletions(-) delete mode 100644 include/linux/mg_disk.h (limited to 'include/linux') diff --git a/include/linux/mg_disk.h b/include/linux/mg_disk.h deleted file mode 100644 index e11f4d9f1c2e..000000000000 --- a/include/linux/mg_disk.h +++ /dev/null @@ -1,45 +0,0 @@ -/* - * include/linux/mg_disk.c - * - * Private data for mflash platform driver - * - * (c) 2008 mGine Co.,LTD - * (c) 2008 unsik Kim - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef __MG_DISK_H__ -#define __MG_DISK_H__ - -/* name for platform device */ -#define MG_DEV_NAME "mg_disk" - -/* names of GPIO resource */ -#define MG_RST_PIN "mg_rst" -/* except MG_BOOT_DEV, reset-out pin should be assigned */ -#define MG_RSTOUT_PIN "mg_rstout" - -/* device attribution */ -/* use mflash as boot device */ -#define MG_BOOT_DEV (1 << 0) -/* use mflash as storage device */ -#define MG_STORAGE_DEV (1 << 1) -/* same as MG_STORAGE_DEV, but bootloader already done reset sequence */ -#define MG_STORAGE_DEV_SKIP_RST (1 << 2) - -/* private driver data */ -struct mg_drv_data { - /* disk resource */ - u32 use_polling; - - /* device attribution */ - u32 dev_attr; - - /* internally used */ - void *host; -}; - -#endif -- cgit v1.2.3 From c05e66733788118377c21a913c1bc7b64bccc167 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 14 Apr 2017 00:59:58 -0700 Subject: sbitmap: add sbitmap_get_shallow() operation This operation supports the use case of limiting the number of bits that can be allocated for a given operation. Rather than setting aside some bits at the end of the bitmap, we can set aside bits in each word of the bitmap. This means we can keep the allocation hints spread out and support sbitmap_resize() nicely at the cost of lower granularity for the allowed depth. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 55 +++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 55 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index d4e0a204c118..a1904aadbc45 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -175,6 +175,25 @@ void sbitmap_resize(struct sbitmap *sb, unsigned int depth); */ int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); +/** + * sbitmap_get_shallow() - Try to allocate a free bit from a &struct sbitmap, + * limiting the depth used from each word. + * @sb: Bitmap to allocate from. + * @alloc_hint: Hint for where to start searching for a free bit. + * @shallow_depth: The maximum number of bits to allocate from a single word. + * + * This rather specific operation allows for having multiple users with + * different allocation limits. E.g., there can be a high-priority class that + * uses sbitmap_get() and a low-priority class that uses sbitmap_get_shallow() + * with a @shallow_depth of (1 << (@sb->shift - 1)). Then, the low-priority + * class can only allocate half of the total bits in the bitmap, preventing it + * from starving out the high-priority class. + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +int sbitmap_get_shallow(struct sbitmap *sb, unsigned int alloc_hint, + unsigned long shallow_depth); + /** * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. * @sb: Bitmap to check. @@ -325,6 +344,19 @@ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); */ int __sbitmap_queue_get(struct sbitmap_queue *sbq); +/** + * __sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct + * sbitmap_queue, limiting the depth used from each word, with preemption + * already disabled. + * @sbq: Bitmap queue to allocate from. + * @shallow_depth: The maximum number of bits to allocate from a single word. + * See sbitmap_get_shallow(). + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +int __sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int shallow_depth); + /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue. @@ -345,6 +377,29 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, return nr; } +/** + * sbitmap_queue_get_shallow() - Try to allocate a free bit from a &struct + * sbitmap_queue, limiting the depth used from each word. + * @sbq: Bitmap queue to allocate from. + * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to + * sbitmap_queue_clear()). + * @shallow_depth: The maximum number of bits to allocate from a single word. + * See sbitmap_get_shallow(). + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +static inline int sbitmap_queue_get_shallow(struct sbitmap_queue *sbq, + unsigned int *cpu, + unsigned int shallow_depth) +{ + int nr; + + *cpu = get_cpu(); + nr = __sbitmap_queue_get_shallow(sbq, shallow_depth); + put_cpu(); + return nr; +} + /** * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a * &struct sbitmap_queue. -- cgit v1.2.3 From 5b72727299307e53888277729f980ab03264dac8 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 14 Apr 2017 01:00:00 -0700 Subject: blk-mq: export helpers blk_mq_finish_request() is required for schedulers that define their own put_request(). blk_mq_run_hw_queue() is required for schedulers that hold back requests to be run later. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b90c3d5766cd..d75de612845d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -238,6 +238,7 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); +void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, -- cgit v1.2.3 From c05f8525f67b7d6489b0502211d4ed35622d9beb Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Fri, 14 Apr 2017 01:00:01 -0700 Subject: blk-mq-sched: make completed_request() callback more useful Currently, this callback is called right after put_request() and has no distinguishable purpose. Instead, let's call it before put_request() as soon as I/O has completed on the request, before we account it in blk-stat. With this, Kyber can enable stats when it sees a latency outlier and make sure the outlier gets accounted. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/elevator.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index b7ec315ee7e7..3a216318ae73 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -106,7 +106,7 @@ struct elevator_mq_ops { void (*insert_requests)(struct blk_mq_hw_ctx *, struct list_head *, bool); struct request *(*dispatch_request)(struct blk_mq_hw_ctx *); bool (*has_work)(struct blk_mq_hw_ctx *); - void (*completed_request)(struct blk_mq_hw_ctx *, struct request *); + void (*completed_request)(struct request *); void (*started_request)(struct request *); void (*requeue_request)(struct request *); struct request *(*former_request)(struct request_queue *, struct request *); -- cgit v1.2.3 From 0e8d6a9336b487a1dd6f1991ff376e669d4c87c6 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 12 Apr 2017 22:07:28 +0200 Subject: workqueue: Provide work_on_cpu_safe() work_on_cpu() is not protected against CPU hotplug. For code which requires to be either executed on an online CPU or to fail if the CPU is not available the callsite would have to protect against CPU hotplug. Provide a function which does get/put_online_cpus() around the call to work_on_cpu() and fails the call with -ENODEV if the target CPU is not online. Preparatory patch to convert several racy task affinity manipulations. Signed-off-by: Thomas Gleixner Acked-by: Tejun Heo Cc: Fenghua Yu Cc: Tony Luck Cc: Herbert Xu Cc: "Rafael J. Wysocki" Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Sebastian Siewior Cc: Lai Jiangshan Cc: Viresh Kumar Cc: Michael Ellerman Cc: "David S. Miller" Cc: Len Brown Link: http://lkml.kernel.org/r/20170412201042.262610721@linutronix.de Signed-off-by: Thomas Gleixner --- include/linux/workqueue.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index bde063cefd04..c102ef65cb64 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -608,8 +608,13 @@ static inline long work_on_cpu(int cpu, long (*fn)(void *), void *arg) { return fn(arg); } +static inline long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg) +{ + return fn(arg); +} #else long work_on_cpu(int cpu, long (*fn)(void *), void *arg); +long work_on_cpu_safe(int cpu, long (*fn)(void *), void *arg); #endif /* CONFIG_SMP */ #ifdef CONFIG_FREEZER -- cgit v1.2.3 From 17912c49edfa6ab552329bf63d1b757eb874673b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Sat, 15 Apr 2017 20:55:37 +0200 Subject: lightnvm: submit erases using the I/O path MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Until now erases have been submitted as synchronous commands through a dedicated erase function. In order to enable targets implementing asynchronous erases, refactor the erase path so that it uses the normal async I/O submission functions. If a target requires sync I/O, it can implement it internally. Also, adapt rrpc to use the new erase path. Signed-off-by: Javier González Fixed spelling error. Signed-off-by: Matias Bjørling Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index ca45e4a088a9..e11163f9b3b7 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -56,7 +56,6 @@ typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); -typedef int (nvm_erase_blk_fn)(struct nvm_dev *, struct nvm_rq *); typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); typedef void (nvm_destroy_dma_pool_fn)(void *); typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, @@ -70,7 +69,6 @@ struct nvm_dev_ops { nvm_op_set_bb_fn *set_bb_tbl; nvm_submit_io_fn *submit_io; - nvm_erase_blk_fn *erase_block; nvm_create_dma_pool_fn *create_dma_pool; nvm_destroy_dma_pool_fn *destroy_dma_pool; @@ -479,10 +477,10 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); -extern int nvm_set_rqd_ppalist(struct nvm_dev *, struct nvm_rq *, +extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int); +extern int nvm_set_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *, const struct ppa_addr *, int, int); -extern void nvm_free_rqd_ppalist(struct nvm_dev *, struct nvm_rq *); -extern int nvm_erase_blk(struct nvm_tgt_dev *, struct ppa_addr *, int); +extern void nvm_free_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); -- cgit v1.2.3 From a7737f39c70d9c63ba530d6316724d7be67de541 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Sat, 15 Apr 2017 20:55:38 +0200 Subject: lightnvm: rename scrambler controller hint MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the OCSSD 1.2 specification, the 0x200 hint enables the media scrambler for the read/write opcode, providing that the controller has been correctly configured by the firmware. Rename the macro to represent this meaning. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index e11163f9b3b7..eff7d1f312a8 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -123,7 +123,7 @@ enum { /* NAND Access Modes */ NVM_IO_SUSPEND = 0x80, NVM_IO_SLC_MODE = 0x100, - NVM_IO_SCRAMBLE_DISABLE = 0x200, + NVM_IO_SCRAMBLE_ENABLE = 0x200, /* Block Types */ NVM_BLK_T_FREE = 0x0, -- cgit v1.2.3 From 4af3f75d7992dd0dc49da95fbc039fa3806fba4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Sat, 15 Apr 2017 20:55:45 +0200 Subject: lightnvm: allow to init targets on factory mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Target initialization has two responsibilities: creating the target partition and instantiating the target. This patch enables to create a factory partition (e.g., do not trigger recovery on the given target). This is useful for target development and for being able to restore the device state at any moment in time without requiring a full-device erase. Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index eff7d1f312a8..7dfa56ebbc6d 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -436,7 +436,8 @@ static inline int ppa_cmp_blk(struct ppa_addr ppa1, struct ppa_addr ppa2) typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); -typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *); +typedef void *(nvm_tgt_init_fn)(struct nvm_tgt_dev *, struct gendisk *, + int flags); typedef void (nvm_tgt_exit_fn)(void *); typedef int (nvm_tgt_sysfs_init_fn)(struct gendisk *); typedef void (nvm_tgt_sysfs_exit_fn)(struct gendisk *); -- cgit v1.2.3 From 605f8fc2244236f8d6bf15bcc0586644af3a32e7 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 5 Apr 2017 00:03:33 +0200 Subject: i2c: core: Add new i2c_acpi_new_device helper function By default the i2c subsys creates an i2c-client for the first I2cSerialBus resource of an acpi_device, but some acpi_devices have multiple I2cSerialBus resources and we may want to instantiate i2c-clients for the others. This commit adds a new i2c_acpi_new_device function which can be used to create an i2c-client for any I2cSerialBus resource of an acpi_device. Note that the other resources may even be on a different i2c bus, so just retrieving the client address is not enough. Here is an example DSDT excerpt from such a device: Device (WIDR) { Name (_HID, "INT33FE" /* XPOWER Battery Device */) Name (_CID, "INT33FE" /* XPOWER Battery Device */) Name (_DDN, "WC PMIC Battery Device") Name (RBUF, ResourceTemplate () { I2cSerialBusV2 (0x005E, ControllerInitiated, 0x000186A0, AddressingMode7Bit, "\\_SB.PCI0.I2C7", 0x00, ResourceConsumer, , Exclusive, ) I2cSerialBusV2 (0x0036, ControllerInitiated, 0x000186A0, AddressingMode7Bit, "\\_SB.PCI0.I2C1", 0x00, ResourceConsumer, , Exclusive, ) I2cSerialBusV2 (0x0022, ControllerInitiated, 0x00061A80, AddressingMode7Bit, "\\_SB.PCI0.I2C1", 0x00, ResourceConsumer, , Exclusive, ) I2cSerialBusV2 (0x0054, ControllerInitiated, 0x00061A80, AddressingMode7Bit, "\\_SB.PCI0.I2C1", 0x00, ResourceConsumer, , Exclusive, ) GpioInt (Level, ActiveLow, Exclusive, PullNone, 0x0000, "\\_SB.PCI0.I2C7.PMI5", 0x00, ResourceConsumer, , ) { // Pin list 0x0012 } GpioInt (Edge, ActiveLow, ExclusiveAndWake, PullNone, 0x0000, "\\_SB.GPO1", 0x00, ResourceConsumer, , ) { // Pin list 0x0005 } GpioInt (Level, ActiveLow, Exclusive, PullNone, 0x0000, "\\_SB.PCI0.I2C7.PMI5", 0x00, ResourceConsumer, , ) { // Pin list 0x0013 } }) Method (_CRS, 0, NotSerialized) // _CRS: Current Resource Settings { Return (RBUF) /* \_SB_.PCI0.I2C7.WIDR.RBUF */ } } Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 6b183521c616..53fa50fc63fb 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -824,11 +824,18 @@ static inline const struct of_device_id #if IS_ENABLED(CONFIG_ACPI) u32 i2c_acpi_find_bus_speed(struct device *dev); +struct i2c_client *i2c_acpi_new_device(struct device *dev, int index, + struct i2c_board_info *info); #else static inline u32 i2c_acpi_find_bus_speed(struct device *dev) { return 0; } +static inline struct i2c_client *i2c_acpi_new_device(struct device *dev, + int index, struct i2c_board_info *info) +{ + return NULL; +} #endif /* CONFIG_ACPI */ #endif /* _LINUX_I2C_H */ -- cgit v1.2.3 From d1d84bb95364ed604015c2b788caaf3dbca0262f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 5 Apr 2017 00:03:34 +0200 Subject: i2c: core: Allow drivers to disable i2c-core irq mapping By default the i2c-core will try to get an irq with index 0 on ACPI / of instantiated devices. This is troublesome on some ACPI systems where the irq info at index 0 in the CRS table may contain nonsense and/or point to an irqchip for which there is no Linux driver. If this happens then before this commit the driver's probe method would never get called because i2c_device_probe will try to get an irq by calling acpi_dev_gpio_irq_get which will always return -EPROBE in this case, as it waits for a matching irqchip driver to load. Thus causing the driver to not get a chance to bind. This commit adds a new disable_i2c_core_irq_mapping flag to struct i2c_driver which a driver can set to tell the core to skip irq mapping. Signed-off-by: Hans de Goede Reviewed-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index 53fa50fc63fb..3a57e3dc9bec 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -149,6 +149,7 @@ enum i2c_alert_protocol { * @detect: Callback for device detection * @address_list: The I2C addresses to probe (for detect) * @clients: List of detected clients we created (for i2c-core use only) + * @disable_i2c_core_irq_mapping: Tell the i2c-core to not do irq-mapping * * The driver.owner field should be set to the module owner of this driver. * The driver.name field should be set to the name of this driver. @@ -212,6 +213,8 @@ struct i2c_driver { int (*detect)(struct i2c_client *, struct i2c_board_info *); const unsigned short *address_list; struct list_head clients; + + bool disable_i2c_core_irq_mapping; }; #define to_i2c_driver(d) container_of(d, struct i2c_driver, driver) -- cgit v1.2.3 From b54807fa52ae21bdf6bad72b0f00fd400af412eb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 24 Mar 2017 08:38:20 -0500 Subject: sysctl: Remove dead register_sysctl_root The function no longer does anything. The is only a single caller of register_sysctl_root when semantically there should be two. Remove this function so that if someone decides this functionality is needed again it will be obvious all of the callers of setup_sysctl_set need to be audited and modified appropriately. Signed-off-by: "Eric W. Biederman" --- include/linux/sysctl.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index b7e82049fec7..80d07816def0 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -180,7 +180,6 @@ extern void setup_sysctl_set(struct ctl_table_set *p, int (*is_seen)(struct ctl_table_set *)); extern void retire_sysctl_set(struct ctl_table_set *set); -void register_sysctl_root(struct ctl_table_root *root); struct ctl_table_header *__register_sysctl_table( struct ctl_table_set *set, const char *path, struct ctl_table *table); -- cgit v1.2.3 From 500a3d0ded5ee41072d0f084bff938747ee0c125 Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 13 Apr 2017 06:36:51 +0300 Subject: net/mlx5: Add IPoIB enhanced offloads bits to mlx5_ifc New capability bit: ipoib_enhanced_offloads, indicates new ability for UD QP to do RSS and enhanced IPoIB offloads and acceleration. Add underlay_qpn to the TIS and flow_table objects In order to support SET_ROOT command, to connect between IPoIB QPs and flow steering tables. Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 1993adbd2c82..7c50bd39b297 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -872,7 +872,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 compact_address_vector[0x1]; u8 striding_rq[0x1]; - u8 reserved_at_202[0x2]; + u8 reserved_at_202[0x1]; + u8 ipoib_enhanced_offloads[0x1]; u8 ipoib_basic_offloads[0x1]; u8 reserved_at_205[0xa]; u8 drain_sigerr[0x1]; @@ -2293,7 +2294,9 @@ struct mlx5_ifc_tisc_bits { u8 reserved_at_120[0x8]; u8 transport_domain[0x18]; - u8 reserved_at_140[0x3c0]; + u8 reserved_at_140[0x8]; + u8 underlay_qpn[0x18]; + u8 reserved_at_160[0x3a0]; }; enum { @@ -8218,7 +8221,9 @@ struct mlx5_ifc_set_flow_table_root_in_bits { u8 reserved_at_a0[0x8]; u8 table_id[0x18]; - u8 reserved_at_c0[0x140]; + u8 reserved_at_c0[0x8]; + u8 underlay_qpn[0x18]; + u8 reserved_at_e0[0x120]; }; enum { -- cgit v1.2.3 From b3ba51498bddd72a526d9067b8b0ecf4932ce57e Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 13 Apr 2017 06:36:52 +0300 Subject: net/mlx5: Refactor create flow table method to accept underlay QP IB flow tables need the underlay qp to perform flow steering. Here we change the API of the flow tables creation to accept the underlay QP number as a parameter in order to support IB (IPoIB) flow steering. Signed-off-by: Erez Shitrit Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/fs.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index ae91a4bda1a3..1b166d2e19c5 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -104,12 +104,18 @@ mlx5_create_auto_grouped_flow_table(struct mlx5_flow_namespace *ns, u32 level, u32 flags); +struct mlx5_flow_table_attr { + int prio; + int max_fte; + u32 level; + u32 flags; + u32 underlay_qpn; +}; + struct mlx5_flow_table * mlx5_create_flow_table(struct mlx5_flow_namespace *ns, - int prio, - int num_flow_table_entries, - u32 level, - u32 flags); + struct mlx5_flow_table_attr *ft_attr); + struct mlx5_flow_table * mlx5_create_vport_flow_table(struct mlx5_flow_namespace *ns, int prio, -- cgit v1.2.3 From 258545449b7b410727b516b782256f8a3bde8bf2 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 13 Apr 2017 06:37:02 +0300 Subject: net/mlx5e: IPoIB, Xmit flow Implement mlx5e's IPoIB SKB transmit using the helper functions provided by mlx5e ethernet tx flow, the only difference in the code between mlx5e_xmit and mlx5i_xmit is that IPoIB has some extra fields to fill (UD datagram segment) in the TX descriptor (WQE) and it doesn't need to have any vlan handling. Signed-off-by: Saeed Mahameed Reviewed-by: Erez Shitrit Signed-off-by: David S. Miller --- include/linux/mlx5/qp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 3096370fe831..bef80d0a0e30 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -295,6 +295,16 @@ struct mlx5_av { u8 rgid[16]; }; +struct mlx5_ib_ah { + struct ib_ah ibah; + struct mlx5_av av; +}; + +static inline struct mlx5_ib_ah *to_mah(struct ib_ah *ibah) +{ + return container_of(ibah, struct mlx5_ib_ah, ibah); +} + struct mlx5_wqe_datagram_seg { struct mlx5_av av; }; -- cgit v1.2.3 From 1b72e7fd304639f1cd49d1e11955c4974936d88c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 11 Apr 2017 00:20:41 +0200 Subject: cpufreq: schedutil: Use policy-dependent transition delays Make the schedutil governor take the initial (default) value of the rate_limit_us sysfs attribute from the (new) transition_delay_us policy parameter (to be set by the scaling driver). That will allow scaling drivers to make schedutil use smaller default values of rate_limit_us and reduce the default average time interval between consecutive frequency changes. Make intel_pstate set transition_delay_us to 500. Signed-off-by: Rafael J. Wysocki Acked-by: Viresh Kumar --- include/linux/cpufreq.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 87165f06a307..a5ce0bbeadb5 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -120,6 +120,13 @@ struct cpufreq_policy { bool fast_switch_possible; bool fast_switch_enabled; + /* + * Preferred average time interval between consecutive invocations of + * the driver to set the frequency for this policy. To be set by the + * scaling driver (0, which is the default, means no preference). + */ + unsigned int transition_delay_us; + /* Cached frequency lookup from cpufreq_driver_resolve_freq. */ unsigned int cached_target_freq; int cached_resolved_idx; -- cgit v1.2.3 From 2611dc1939569718c65ffd59c8fb9ba7474d026c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 8 Apr 2017 14:34:51 -0400 Subject: Remove compat_sys_getdents64() Unlike normal compat syscall variants, it is needed only for biarch architectures that have different alignement requirements for u64 in 32bit and 64bit ABI *and* have __put_user() that won't handle a store of 64bit value at 32bit-aligned address. We used to have one such (ia64), but its biarch support has been gone since 2010 (after being broken in 2008, which went unnoticed since nobody had been using it). It had escaped removal at the same time only because back in 2004 a patch that switched several syscalls on amd64 from private wrappers to generic compat ones had switched to use of compat_sys_getdents64(), which hadn't needed (or used) a compat wrapper on amd64. Let's bury it - it's at least 7 years overdue. Signed-off-by: Al Viro --- include/linux/compat.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index aef47be2a5c1..54d65eb3d1e7 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -528,11 +528,6 @@ asmlinkage long compat_sys_old_readdir(unsigned int fd, asmlinkage long compat_sys_getdents(unsigned int fd, struct compat_linux_dirent __user *dirent, unsigned int count); -#ifdef __ARCH_WANT_COMPAT_SYS_GETDENTS64 -asmlinkage long compat_sys_getdents64(unsigned int fd, - struct linux_dirent64 __user *dirent, - unsigned int count); -#endif asmlinkage long compat_sys_vmsplice(int fd, const struct compat_iovec __user *, unsigned int nr_segs, unsigned int flags); asmlinkage long compat_sys_open(const char __user *filename, int flags, -- cgit v1.2.3 From e99ca56ce03dd90991025878152bae8b53484147 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 8 Apr 2017 16:50:24 -0400 Subject: move compat select-related syscalls to fs/select.c Signed-off-by: Al Viro --- include/linux/poll.h | 56 ---------------------------------------------------- 1 file changed, 56 deletions(-) (limited to 'include/linux') diff --git a/include/linux/poll.h b/include/linux/poll.h index a46d6755035e..75ffc5729e4c 100644 --- a/include/linux/poll.h +++ b/include/linux/poll.h @@ -98,64 +98,8 @@ extern int poll_schedule_timeout(struct poll_wqueues *pwq, int state, ktime_t *expires, unsigned long slack); extern u64 select_estimate_accuracy(struct timespec64 *tv); - -static inline int poll_schedule(struct poll_wqueues *pwq, int state) -{ - return poll_schedule_timeout(pwq, state, NULL, 0); -} - -/* - * Scalable version of the fd_set. - */ - -typedef struct { - unsigned long *in, *out, *ex; - unsigned long *res_in, *res_out, *res_ex; -} fd_set_bits; - -/* - * How many longwords for "nr" bits? - */ -#define FDS_BITPERLONG (8*sizeof(long)) -#define FDS_LONGS(nr) (((nr)+FDS_BITPERLONG-1)/FDS_BITPERLONG) -#define FDS_BYTES(nr) (FDS_LONGS(nr)*sizeof(long)) - -/* - * We do a VERIFY_WRITE here even though we are only reading this time: - * we'll write to it eventually.. - * - * Use "unsigned long" accesses to let user-mode fd_set's be long-aligned. - */ -static inline -int get_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) -{ - nr = FDS_BYTES(nr); - if (ufdset) - return copy_from_user(fdset, ufdset, nr) ? -EFAULT : 0; - - memset(fdset, 0, nr); - return 0; -} - -static inline unsigned long __must_check -set_fd_set(unsigned long nr, void __user *ufdset, unsigned long *fdset) -{ - if (ufdset) - return __copy_to_user(ufdset, fdset, FDS_BYTES(nr)); - return 0; -} - -static inline -void zero_fd_set(unsigned long nr, unsigned long *fdset) -{ - memset(fdset, 0, FDS_BYTES(nr)); -} - #define MAX_INT64_SECONDS (((s64)(~((u64)0)>>1)/HZ)-1) -extern int do_select(int n, fd_set_bits *fds, struct timespec64 *end_time); -extern int do_sys_poll(struct pollfd __user * ufds, unsigned int nfds, - struct timespec64 *end_time); extern int core_sys_select(int n, fd_set __user *inp, fd_set __user *outp, fd_set __user *exp, struct timespec64 *end_time); -- cgit v1.2.3 From 57240d007816486131bee88cd474c2a71f0fe224 Mon Sep 17 00:00:00 2001 From: "R. Parameswaran" Date: Wed, 12 Apr 2017 18:31:04 -0700 Subject: l2tp: device MTU setup, tunnel socket needs a lock The MTU overhead calculation in L2TP device set-up merged via commit b784e7ebfce8cfb16c6f95e14e8532d0768ab7ff needs to be adjusted to lock the tunnel socket while referencing the sub-data structures to derive the socket's IP overhead. Reported-by: Guillaume Nault Tested-by: Guillaume Nault Signed-off-by: R. Parameswaran Signed-off-by: David S. Miller --- include/linux/net.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index a42fab24c8af..abcfa46a2bd9 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -298,7 +298,7 @@ int kernel_sendpage(struct socket *sock, struct page *page, int offset, int kernel_sock_ioctl(struct socket *sock, int cmd, unsigned long arg); int kernel_sock_shutdown(struct socket *sock, enum sock_shutdown_cmd how); -/* Following routine returns the IP overhead imposed by a socket. */ +/* Routine returns the IP overhead imposed by a (caller-protected) socket. */ u32 kernel_sock_ip_overhead(struct sock *sk); #define MODULE_ALIAS_NETPROTO(proto) \ -- cgit v1.2.3 From d51e4af5c2092c48a06ceaf2323b13a39a2df4ee Mon Sep 17 00:00:00 2001 From: "Chopra, Manish" Date: Thu, 13 Apr 2017 04:54:44 -0700 Subject: qed: aRFS infrastructure support This patch adds necessary APIs to interface with qede aRFS support in successive patch. It also reserves separate PTT entry for aRFS, [as being in fastpath flow] for hardware access instead of trying to acquire it at run time from the ptt pool. Signed-off-by: Manish Chopra Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 8 ++++++++ include/linux/qed/qed_if.h | 7 +++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 4cd1f0ccfa36..1eba803cb7f1 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -301,6 +301,14 @@ struct qed_eth_ops { int (*tunn_config)(struct qed_dev *cdev, struct qed_tunn_params *params); + + int (*ntuple_filter_config)(struct qed_dev *cdev, void *cookie, + dma_addr_t mapping, u16 length, + u16 vport_id, u16 rx_queue_id, + bool add_filter); + + int (*configure_arfs_searcher)(struct qed_dev *cdev, + bool en_searcher); }; const struct qed_eth_ops *qed_get_eth_ops(void); diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 625f80f08f91..d44933a058ee 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -178,6 +178,12 @@ struct qed_eth_pf_params { * to update_pf_params routine invoked before slowpath start */ u16 num_cons; + + /* To enable arfs, previous to HW-init a positive number needs to be + * set [as filters require allocated searcher ILT memory]. + * This will set the maximal number of configured steering-filters. + */ + u32 num_arfs_filters; }; struct qed_fcoe_pf_params { @@ -427,6 +433,7 @@ struct qed_int_info { }; struct qed_common_cb_ops { + void (*arfs_filter_op)(void *dev, void *fltr, u8 fw_rc); void (*link_update)(void *dev, struct qed_link_output *link); void (*dcbx_aen)(void *dev, struct qed_dcbx_get *get, u32 mib_type); -- cgit v1.2.3 From d27158c0cf080c85753f34e7a20a91c3ba20a0b9 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Thu, 30 Mar 2017 19:26:35 -0500 Subject: signal: Remove unused definition of sig_user_definied Signed-off-by: "Eric W. Biederman" --- include/linux/signal.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index 94ad6eea9550..1f5a16620693 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -390,10 +390,6 @@ int unhandled_signal(struct task_struct *tsk, int sig); #define sig_kernel_ignore(sig) siginmask(sig, SIG_KERNEL_IGNORE_MASK) #define sig_kernel_stop(sig) siginmask(sig, SIG_KERNEL_STOP_MASK) -#define sig_user_defined(t, signr) \ - (((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_DFL) && \ - ((t)->sighand->action[(signr)-1].sa.sa_handler != SIG_IGN)) - #define sig_fatal(t, signr) \ (!siginmask(signr, SIG_KERNEL_IGNORE_MASK|SIG_KERNEL_STOP_MASK) && \ (t)->sighand->action[(signr)-1].sa.sa_handler == SIG_DFL) -- cgit v1.2.3 From f51b17c8d90f85456579c3192ab59ee031835634 Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Mon, 17 Apr 2017 21:34:56 +0800 Subject: boot/param: Move next_arg() function to lib/cmdline.c for later reuse MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit next_arg() will be used to parse boot parameters in the x86/boot/compressed code, so move it to lib/cmdline.c for better code reuse. No change in functionality. Signed-off-by: Baoquan He Cc: Andrew Morton Cc: Gustavo Padovan Cc: Jens Axboe Cc: Jessica Yu Cc: Johannes Berg Cc: Josh Triplett Cc: Larry Finger Cc: Linus Torvalds Cc: Niklas Söderlund Cc: Peter Zijlstra Cc: Petr Mladek Cc: Rasmus Villemoes Cc: Thomas Gleixner Cc: dan.j.williams@intel.com Cc: dave.jiang@intel.com Cc: dyoung@redhat.com Cc: keescook@chromium.org Cc: zijun_hu Link: http://lkml.kernel.org/r/1492436099-4017-2-git-send-email-bhe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4c26dc3a8295..7ae256717a32 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -438,6 +438,7 @@ extern int get_option(char **str, int *pint); extern char *get_options(const char *str, int nints, int *ints); extern unsigned long long memparse(const char *ptr, char **retptr); extern bool parse_option_str(const char *str, const char *option); +extern char *next_arg(char *args, char **param, char **val); extern int core_kernel_text(unsigned long addr); extern int core_kernel_data(unsigned long addr); -- cgit v1.2.3 From 21470e32ca7f976bf131aa3c7b54019d07f7d821 Mon Sep 17 00:00:00 2001 From: Mauro Carvalho Chehab Date: Sun, 16 Apr 2017 21:51:07 -0300 Subject: usb: fix some references for /proc/bus/usb Since when we got rid of usbfs, the /proc/bus/usb is now elsewhere. Fix references for it. Signed-off-by: Mauro Carvalho Chehab Acked-by: Serge Hallyn Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 226557362d36..cb9fbd54386e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -318,7 +318,7 @@ void usb_put_intf(struct usb_interface *intf); * struct usb_interface (which persists only as long as its configuration * is installed). The altsetting arrays can be accessed through these * structures at any time, permitting comparison of configurations and - * providing support for the /proc/bus/usb/devices pseudo-file. + * providing support for the /sys/kernel/debug/usb/devices pseudo-file. */ struct usb_interface_cache { unsigned num_altsetting; /* number of alternate settings */ -- cgit v1.2.3 From 0c688614dcce84dfdbb305fd1c399c06cecea745 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 12 Apr 2017 18:37:14 -0400 Subject: console: move console_init() out of tty_io.c All the console driver handling code lives in printk.c. Move console_init() there as well so console support can still be used when the TTY code is configured out. No logical changes from this patch. Signed-off-by: Nicolas Pitre Signed-off-by: Greg Kroah-Hartman --- include/linux/console.h | 2 ++ include/linux/tty.h | 7 ++++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/console.h b/include/linux/console.h index 5949d1855589..b8920a031a3e 100644 --- a/include/linux/console.h +++ b/include/linux/console.h @@ -212,4 +212,6 @@ extern bool vgacon_text_force(void); static inline bool vgacon_text_force(void) { return false; } #endif +extern void console_init(void); + #endif /* _LINUX_CONSOLE_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index 1017e904c0a3..f1106d7c73b6 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -390,7 +390,6 @@ static inline bool tty_throttled(struct tty_struct *tty) } #ifdef CONFIG_TTY -extern void console_init(void); extern void tty_kref_put(struct tty_struct *tty); extern struct pid *tty_get_pgrp(struct tty_struct *tty); extern void tty_vhangup_self(void); @@ -402,8 +401,6 @@ extern struct tty_struct *get_current_tty(void); extern int __init tty_init(void); extern const char *tty_name(const struct tty_struct *tty); #else -static inline void console_init(void) -{ } static inline void tty_kref_put(struct tty_struct *tty) { } static inline struct pid *tty_get_pgrp(struct tty_struct *tty) @@ -669,7 +666,11 @@ extern int tty_ldisc_receive_buf(struct tty_ldisc *ld, const unsigned char *p, /* n_tty.c */ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); +#ifdef CONFIG_TTY extern void __init n_tty_init(void); +#else +static inline void n_tty_init(void) { } +#endif /* tty_audit.c */ #ifdef CONFIG_AUDIT -- cgit v1.2.3 From a1235b3eb10086b8420f37bbb6c29436f55940ba Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Wed, 12 Apr 2017 18:37:16 -0400 Subject: tty: split job control support into a file of its own This makes it easier for job control to become optional and/or usable independently from tty_io.c, as well as providing a nice purpose separation. No logical changes from this patch. Signed-off-by: Nicolas Pitre Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index f1106d7c73b6..d07cd2105a6c 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -475,9 +475,13 @@ extern int tty_do_resize(struct tty_struct *tty, struct winsize *ws); extern int is_current_pgrp_orphaned(void); extern void tty_hangup(struct tty_struct *tty); extern void tty_vhangup(struct tty_struct *tty); +extern void tty_vhangup_session(struct tty_struct *tty); extern int tty_hung_up_p(struct file *filp); extern void do_SAK(struct tty_struct *tty); extern void __do_SAK(struct tty_struct *tty); +extern void tty_open_proc_set_tty(struct file *filp, struct tty_struct *tty); +extern int tty_signal_session_leader(struct tty_struct *tty, int exit_session); +extern void session_clear_tty(struct pid *session); extern void no_tty(void); extern void tty_buffer_free_all(struct tty_port *port); extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); @@ -525,6 +529,8 @@ extern void tty_ldisc_flush(struct tty_struct *tty); extern long tty_ioctl(struct file *file, unsigned int cmd, unsigned long arg); extern int tty_mode_ioctl(struct tty_struct *tty, struct file *file, unsigned int cmd, unsigned long arg); +extern long tty_jobctrl_ioctl(struct tty_struct *tty, struct tty_struct *real_tty, + struct file *file, unsigned int cmd, unsigned long arg); extern int tty_perform_flush(struct tty_struct *tty, unsigned long arg); extern void tty_default_fops(struct file_operations *fops); extern struct tty_struct *alloc_tty_struct(struct tty_driver *driver, int idx); -- cgit v1.2.3 From 5ef1ecf060f28ecef313b5723f1fd39bf5a35f56 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Wed, 29 Mar 2017 20:54:37 +0200 Subject: mmc: sdio: fix alignment issue in struct sdio_func Certain 64-bit systems (e.g. Amlogic Meson GX) require buffers to be used for DMA to be 8-byte-aligned. struct sdio_func has an embedded small DMA buffer not meeting this requirement. When testing switching to descriptor chain mode in meson-gx driver SDIO is broken therefore. Fix this by allocating the small DMA buffer separately as kmalloc ensures that the returned memory area is properly aligned for every basic data type. Signed-off-by: Heiner Kallweit Tested-by: Helmut Klein Signed-off-by: Ulf Hansson --- include/linux/mmc/sdio_func.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sdio_func.h b/include/linux/mmc/sdio_func.h index aab032a6ae61..97ca105347a6 100644 --- a/include/linux/mmc/sdio_func.h +++ b/include/linux/mmc/sdio_func.h @@ -53,7 +53,7 @@ struct sdio_func { unsigned int state; /* function state */ #define SDIO_STATE_PRESENT (1<<0) /* present in sysfs */ - u8 tmpbuf[4]; /* DMA:able scratch buffer */ + u8 *tmpbuf; /* DMA:able scratch buffer */ unsigned num_info; /* number of info strings */ const char **info; /* info strings */ -- cgit v1.2.3 From 5f8ddeab10ce45d3d3de8ae7ea8811512845c497 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Sun, 16 Apr 2017 02:55:09 +0200 Subject: rhashtable: remove insecure_elasticity commit 83e7e4ce9e93c3 ("mac80211: Use rhltable instead of rhashtable") removed the last user that made use of 'insecure_elasticity' parameter, i.e. the default of 16 is used everywhere. Replace it with a constant. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 21 ++++++++++++++++----- 1 file changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index e507290cd2c7..ae87dcdf52d2 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -49,6 +49,21 @@ /* Base bits plus 1 bit for nulls marker */ #define RHT_HASH_RESERVED_SPACE (RHT_BASE_BITS + 1) +/* Maximum chain length before rehash + * + * The maximum (not average) chain length grows with the size of the hash + * table, at a rate of (log N)/(log log N). + * + * The value of 16 is selected so that even if the hash table grew to + * 2^32 you would not expect the maximum chain length to exceed it + * unless we are under attack (or extremely unlucky). + * + * As this limit is only to detect attacks, we don't need to set it to a + * lower value as you'd need the chain length to vastly exceed 16 to have + * any real effect on the system. + */ +#define RHT_ELASTICITY 16u + struct rhash_head { struct rhash_head __rcu *next; }; @@ -114,7 +129,6 @@ struct rhashtable; * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker - * @insecure_elasticity: Set to true to disable chain length checks * @automatic_shrinking: Enable automatic shrinking of tables * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) @@ -130,7 +144,6 @@ struct rhashtable_params { unsigned int max_size; unsigned int min_size; u32 nulls_base; - bool insecure_elasticity; bool automatic_shrinking; size_t locks_mul; rht_hashfn_t hashfn; @@ -143,7 +156,6 @@ struct rhashtable_params { * @tbl: Bucket table * @nelems: Number of elements in table * @key_len: Key length for hashfn - * @elasticity: Maximum chain length before rehash * @p: Configuration parameters * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously @@ -154,7 +166,6 @@ struct rhashtable { struct bucket_table __rcu *tbl; atomic_t nelems; unsigned int key_len; - unsigned int elasticity; struct rhashtable_params p; bool rhlist; struct work_struct run_work; @@ -726,7 +737,7 @@ slow_path: return rhashtable_insert_slow(ht, key, obj); } - elasticity = ht->elasticity; + elasticity = RHT_ELASTICITY; pprev = rht_bucket_insert(ht, tbl, hash); data = ERR_PTR(-ENOMEM); if (!pprev) -- cgit v1.2.3 From ec19b85913486993d7d6f747beed1a711afd47d8 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 31 Mar 2017 19:01:14 -0400 Subject: ftrace: Move the probe function into the tracing directory As nothing outside the tracing directory uses the function probes mechanism, I'm moving the prototypes out of the include/linux/ftrace.h and into the local kernel/trace/trace.h header. I plan on making them hook to the trace_array structure which is local to kernel/trace, and I do not want to expose it to the rest of the kernel. This requires that the probe functions must also be local to tracing. But luckily nothing else uses them. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 06b2990a35e4..3e790ff1c501 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -360,30 +360,6 @@ void ftrace_bug(int err, struct dyn_ftrace *rec); struct seq_file; -struct ftrace_probe_ops { - void (*func)(unsigned long ip, - unsigned long parent_ip, - void **data); - int (*init)(struct ftrace_probe_ops *ops, - unsigned long ip, void **data); - void (*free)(struct ftrace_probe_ops *ops, - unsigned long ip, void **data); - int (*print)(struct seq_file *m, - unsigned long ip, - struct ftrace_probe_ops *ops, - void *data); -}; - -extern int -register_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data); -extern void -unregister_ftrace_function_probe(char *glob, struct ftrace_probe_ops *ops, - void *data); -extern void -unregister_ftrace_function_probe_func(char *glob, struct ftrace_probe_ops *ops); -extern void unregister_ftrace_function_probe_all(char *glob); - extern int ftrace_text_reserved(const void *start, const void *end); extern int ftrace_nr_registered_ops(void); -- cgit v1.2.3 From ae749c7ab475de2c9c427058db19921c91846e89 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 12 Apr 2017 13:25:54 +0100 Subject: PCI: Add arch_can_pci_mmap_wc() macro Most of the almost-identical versions of pci_mmap_page_range() silently ignore the 'write_combine' argument and give uncached mappings. Yet we allow the PCIIOC_WRITE_COMBINE ioctl in /proc/bus/pci, expose the 'resourceX_wc' file in sysfs, and allow an attempted mapping to apparently succeed. To fix this, introduce a macro arch_can_pci_mmap_wc() which indicates whether the platform can do a write-combining mapping. On x86 this ends up being pat_enabled(), while the few other platforms that support it can just set it to a literal '1'. Signed-off-by: David Woodhouse Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eb3da1a04e6c..e614fb42d8bb 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1626,6 +1626,10 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; } #include +#ifndef arch_can_pci_mmap_wc +#define arch_can_pci_mmap_wc() 0 +#endif + #ifndef pci_root_bus_fwnode #define pci_root_bus_fwnode(bus) NULL #endif -- cgit v1.2.3 From 11df19546fe4a6135cdae62e96a1e25b3fabf6ea Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 12 Apr 2017 13:25:55 +0100 Subject: PCI: Move multiple declarations of pci_mmap_page_range() to We can declare it even on platforms where it isn't going to be defined. There's no need to have it littered through the various files. Signed-off-by: David Woodhouse Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e614fb42d8bb..e7bb4b62cc97 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1626,6 +1626,13 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; } #include +/* Map a range of PCI memory or I/O space for a device into user space. + * Architectures provide this function if they set HAVE_PCI_MMAP, and + * it accepts the 'write_combine' argument when arch_can_pci_mmap_wc() + * evaluates to nonzero. */ +int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); + #ifndef arch_can_pci_mmap_wc #define arch_can_pci_mmap_wc() 0 #endif -- cgit v1.2.3 From e854d8b2a82ef76521ad2bed68211fde0511d417 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 12 Apr 2017 13:25:56 +0100 Subject: PCI: Add arch_can_pci_mmap_io() on architectures which can mmap() I/O space This is relatively esoteric, and knowing that we don't have it makes life easier in some cases rather than just an eventual -EINVAL from pci_mmap_page_range(). Signed-off-by: David Woodhouse Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e7bb4b62cc97..590cfcf6acf5 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1636,6 +1636,9 @@ int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma, #ifndef arch_can_pci_mmap_wc #define arch_can_pci_mmap_wc() 0 #endif +#ifndef arch_can_pci_mmap_io +#define arch_can_pci_mmap_io() 0 +#endif #ifndef pci_root_bus_fwnode #define pci_root_bus_fwnode(bus) NULL -- cgit v1.2.3 From b8c17e6664c461e4aed545a943304c3b32dd309c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 8 Nov 2016 14:25:21 -0800 Subject: rcu: Maintain special bits at bottom of ->dynticks counter Currently, IPIs are used to force other CPUs to invalidate their TLBs in response to a kernel virtual-memory mapping change. This works, but degrades both battery lifetime (for idle CPUs) and real-time response (for nohz_full CPUs), and in addition results in unnecessary IPIs due to the fact that CPUs executing in usermode are unaffected by stale kernel mappings. It would be better to cause a CPU executing in usermode to wait until it is entering kernel mode to do the flush, first to avoid interrupting usemode tasks and second to handle multiple flush requests with a single flush in the case of a long-running user task. This commit therefore reserves a bit at the bottom of the ->dynticks counter, which is checked upon exit from extended quiescent states. If it is set, it is cleared and then a new rcu_eqs_special_exit() macro is invoked, which, if not supplied, is an empty single-pass do-while loop. If this bottom bit is set on -entry- to an extended quiescent state, then a WARN_ON_ONCE() triggers. This bottom bit may be set using a new rcu_eqs_special_set() function, which returns true if the bit was set, or false if the CPU turned out to not be in an extended quiescent state. Please note that this function refuses to set the bit for a non-nohz_full CPU when that CPU is executing in usermode because usermode execution is tracked by RCU as a dyntick-idle extended quiescent state only for nohz_full CPUs. Reported-by: Andy Lutomirski Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcutiny.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index b452953e21c8..6c9d941e3962 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -33,6 +33,11 @@ static inline int rcu_dynticks_snap(struct rcu_dynticks *rdtp) return 0; } +static inline bool rcu_eqs_special_set(int cpu) +{ + return false; /* Never flag non-existent other CPUs! */ +} + static inline unsigned long get_state_synchronize_rcu(void) { return 0; -- cgit v1.2.3 From 77e5849688670280b173bb9e0544e9da7b2acc36 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 14 Jan 2017 13:32:50 -0800 Subject: rcu: Make arch select smp_mb__after_unlock_lock() strength The definition of smp_mb__after_unlock_lock() is currently smp_mb() for CONFIG_PPC and a no-op otherwise. It would be better to instead provide an architecture-selectable Kconfig option, and select the strength of smp_mb__after_unlock_lock() based on that option. This commit therefore creates ARCH_WEAK_RELEASE_ACQUIRE, has PPC select it, and bases the definition of smp_mb__after_unlock_lock() on this new ARCH_WEAK_RELEASE_ACQUIRE Kconfig option. Reported-by: Ingo Molnar Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Will Deacon Cc: Boqun Feng Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Acked-by: Michael Ellerman Cc: Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index de88b33c0974..e6146d0074f8 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -1127,11 +1127,11 @@ do { \ * if the UNLOCK and LOCK are executed by the same CPU or if the * UNLOCK and LOCK operate on the same lock variable. */ -#ifdef CONFIG_PPC +#ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE #define smp_mb__after_unlock_lock() smp_mb() /* Full ordering for lock. */ -#else /* #ifdef CONFIG_PPC */ +#else /* #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ #define smp_mb__after_unlock_lock() do { } while (0) -#endif /* #else #ifdef CONFIG_PPC */ +#endif /* #else #ifdef CONFIG_ARCH_WEAK_RELEASE_ACQUIRE */ #endif /* __LINUX_RCUPDATE_H */ -- cgit v1.2.3 From 900b1028ec388e50c98200641ae4274794c807cf Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 Feb 2017 14:32:54 -0800 Subject: srcu: Allow SRCU to access rcu_scheduler_active This is primarily a code-movement commit in preparation for allowing SRCU to handle early-boot SRCU grace periods. Signed-off-by: Paul E. McKenney --- include/linux/rcutiny.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 6c9d941e3962..5219be250f00 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -217,14 +217,14 @@ static inline void exit_rcu(void) { } -#ifdef CONFIG_DEBUG_LOCK_ALLOC +#if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) extern int rcu_scheduler_active __read_mostly; void rcu_scheduler_starting(void); -#else /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#else /* #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ static inline void rcu_scheduler_starting(void) { } -#endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +#endif /* #else #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_SRCU) */ #if defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) -- cgit v1.2.3 From c2a8ec0778b2ca0d360ba9b5cac7fcd5ddfe798f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 10 Mar 2017 15:31:55 -0800 Subject: srcu: Move to state-based grace-period sequencing The current SRCU grace-period processing might never reach the last portion of srcu_advance_batches(). This is OK given the current implementation, as the first portion, up to the try_check_zero() following the srcu_flip() is sufficient to drive grace periods forward. However, it has the unfortunate side-effect of making it impossible to determine when a given grace period has ended, and it will be necessary to efficiently trace ends of grace periods in order to efficiently handle per-CPU SRCU callback lists. This commit therefore adds states to the SRCU grace-period processing, so that the end of a given SRCU grace period is marked by the transition to the SRCU_STATE_DONE state. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index a598cf3ac70c..f149a685896c 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -48,7 +48,7 @@ struct srcu_struct { unsigned long completed; struct srcu_array __percpu *per_cpu_ref; spinlock_t queue_lock; /* protect ->batch_queue, ->running */ - bool running; + int srcu_state; /* callbacks just queued */ struct rcu_batch batch_queue; /* callbacks try to do the first check_zero */ @@ -62,6 +62,12 @@ struct srcu_struct { #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ }; +/* Values for -> state variable. */ +#define SRCU_STATE_IDLE 0 +#define SRCU_STATE_SCAN1 1 +#define SRCU_STATE_SCAN2 2 +#define SRCU_STATE_DONE 3 + #ifdef CONFIG_DEBUG_LOCK_ALLOC int __init_srcu_struct(struct srcu_struct *sp, const char *name, @@ -89,7 +95,7 @@ void process_srcu(struct work_struct *work); .completed = -300, \ .per_cpu_ref = &name##_srcu_array, \ .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ - .running = false, \ + .srcu_state = SRCU_STATE_IDLE, \ .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \ .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \ -- cgit v1.2.3 From ac367c1c621b75689f6d5cd8301d364ba2c9f292 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 11 Mar 2017 07:14:06 -0800 Subject: srcu: Add grace-period sequence numbers This commit adds grace-period sequence numbers, which will be used to handle mid-boot grace periods and per-CPU callback lists. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index f149a685896c..047ac8c28a4e 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -46,6 +46,7 @@ struct rcu_batch { struct srcu_struct { unsigned long completed; + unsigned long srcu_gp_seq; struct srcu_array __percpu *per_cpu_ref; spinlock_t queue_lock; /* protect ->batch_queue, ->running */ int srcu_state; -- cgit v1.2.3 From 8660b7d8a545227fd9ee80508aa82528ea9947d7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 13 Mar 2017 16:48:18 -0700 Subject: srcu: Use rcu_segcblist to track SRCU callbacks This commit switches SRCU from custom-built callback queues to the new rcu_segcblist structure. This change associates grace-period sequence numbers with groups of callbacks, which will be needed for efficient processing of per-CPU callbacks. Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 678 ++++++++++++++++++++++++++++++++++++++++++ include/linux/srcu.h | 24 +- 2 files changed, 683 insertions(+), 19 deletions(-) create mode 100644 include/linux/rcu_segcblist.h (limited to 'include/linux') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h new file mode 100644 index 000000000000..74b1e7243955 --- /dev/null +++ b/include/linux/rcu_segcblist.h @@ -0,0 +1,678 @@ +/* + * RCU segmented callback lists + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright IBM Corporation, 2017 + * + * Authors: Paul E. McKenney + */ + +#ifndef __KERNEL_RCU_SEGCBLIST_H +#define __KERNEL_RCU_SEGCBLIST_H + +/* Simple unsegmented callback lists. */ +struct rcu_cblist { + struct rcu_head *head; + struct rcu_head **tail; + long len; + long len_lazy; +}; + +#define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head } + +/* Initialize simple callback list. */ +static inline void rcu_cblist_init(struct rcu_cblist *rclp) +{ + rclp->head = NULL; + rclp->tail = &rclp->head; + rclp->len = 0; + rclp->len_lazy = 0; +} + +/* Is simple callback list empty? */ +static inline bool rcu_cblist_empty(struct rcu_cblist *rclp) +{ + return !rclp->head; +} + +/* Return number of callbacks in simple callback list. */ +static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp) +{ + return rclp->len; +} + +/* Return number of lazy callbacks in simple callback list. */ +static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp) +{ + return rclp->len_lazy; +} + +/* + * Debug function to actually count the number of callbacks. + * If the number exceeds the limit specified, return -1. + */ +static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim) +{ + int cnt = 0; + struct rcu_head **rhpp = &rclp->head; + + for (;;) { + if (!*rhpp) + return cnt; + if (++cnt > lim) + return -1; + rhpp = &(*rhpp)->next; + } +} + +/* + * Dequeue the oldest rcu_head structure from the specified callback + * list. This function assumes that the callback is non-lazy, but + * the caller can later invoke rcu_cblist_dequeued_lazy() if it + * finds otherwise (and if it cares about laziness). This allows + * different users to have different ways of determining laziness. + */ +static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) +{ + struct rcu_head *rhp; + + rhp = rclp->head; + if (!rhp) + return NULL; + rclp->len--; + rclp->head = rhp->next; + if (!rclp->head) + rclp->tail = &rclp->head; + return rhp; +} + +/* + * Account for the fact that a previously dequeued callback turned out + * to be marked as lazy. + */ +static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp) +{ + rclp->len_lazy--; +} + +/* + * Interim function to return rcu_cblist head pointer. Longer term, the + * rcu_cblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp) +{ + return rclp->head; +} + +/* + * Interim function to return rcu_cblist head pointer. Longer term, the + * rcu_cblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp) +{ + WARN_ON_ONCE(rcu_cblist_empty(rclp)); + return rclp->tail; +} + +/* Complicated segmented callback lists. ;-) */ + +/* + * Index values for segments in rcu_segcblist structure. + * + * The segments are as follows: + * + * [head, *tails[RCU_DONE_TAIL]): + * Callbacks whose grace period has elapsed, and thus can be invoked. + * [*tails[RCU_DONE_TAIL], *tails[RCU_WAIT_TAIL]): + * Callbacks waiting for the current GP from the current CPU's viewpoint. + * [*tails[RCU_WAIT_TAIL], *tails[RCU_NEXT_READY_TAIL]): + * Callbacks that arrived before the next GP started, again from + * the current CPU's viewpoint. These can be handled by the next GP. + * [*tails[RCU_NEXT_READY_TAIL], *tails[RCU_NEXT_TAIL]): + * Callbacks that might have arrived after the next GP started. + * There is some uncertainty as to when a given GP starts and + * ends, but a CPU knows the exact times if it is the one starting + * or ending the GP. Other CPUs know that the previous GP ends + * before the next one starts. + * + * Note that RCU_WAIT_TAIL cannot be empty unless RCU_NEXT_READY_TAIL is also + * empty. + * + * The ->gp_seq[] array contains the grace-period number at which the + * corresponding segment of callbacks will be ready to invoke. A given + * element of this array is meaningful only when the corresponding segment + * is non-empty, and it is never valid for RCU_DONE_TAIL (whose callbacks + * are already ready to invoke) or for RCU_NEXT_TAIL (whose callbacks have + * not yet been assigned a grace-period number). + */ +#define RCU_DONE_TAIL 0 /* Also RCU_WAIT head. */ +#define RCU_WAIT_TAIL 1 /* Also RCU_NEXT_READY head. */ +#define RCU_NEXT_READY_TAIL 2 /* Also RCU_NEXT head. */ +#define RCU_NEXT_TAIL 3 +#define RCU_CBLIST_NSEGS 4 + +struct rcu_segcblist { + struct rcu_head *head; + struct rcu_head **tails[RCU_CBLIST_NSEGS]; + unsigned long gp_seq[RCU_CBLIST_NSEGS]; + long len; + long len_lazy; +}; + +#define RCU_SEGCBLIST_INITIALIZER(n) \ +{ \ + .head = NULL, \ + .tails[RCU_DONE_TAIL] = &n.head, \ + .tails[RCU_WAIT_TAIL] = &n.head, \ + .tails[RCU_NEXT_READY_TAIL] = &n.head, \ + .tails[RCU_NEXT_TAIL] = &n.head, \ +} + +/* + * Initialize an rcu_segcblist structure. + */ +static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp) +{ + int i; + + BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); + BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); + rsclp->head = NULL; + for (i = 0; i < RCU_CBLIST_NSEGS; i++) + rsclp->tails[i] = &rsclp->head; + rsclp->len = 0; + rsclp->len_lazy = 0; +} + +/* + * Is the specified rcu_segcblist structure empty? + * + * But careful! The fact that the ->head field is NULL does not + * necessarily imply that there are no callbacks associated with + * this structure. When callbacks are being invoked, they are + * removed as a group. If callback invocation must be preempted, + * the remaining callbacks will be added back to the list. Either + * way, the counts are updated later. + * + * So it is often the case that rcu_segcblist_n_cbs() should be used + * instead. + */ +static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp) +{ + return !rsclp->head; +} + +/* Return number of callbacks in segmented callback list. */ +static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) +{ + return READ_ONCE(rsclp->len); +} + +/* Return number of lazy callbacks in segmented callback list. */ +static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp) +{ + return rsclp->len_lazy; +} + +/* Return number of lazy callbacks in segmented callback list. */ +static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp) +{ + return rsclp->len - rsclp->len_lazy; +} + +/* + * Is the specified rcu_segcblist enabled, for example, not corresponding + * to an offline or callback-offloaded CPU? + */ +static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) +{ + return !!rsclp->tails[RCU_NEXT_TAIL]; +} + +/* + * Disable the specified rcu_segcblist structure, so that callbacks can + * no longer be posted to it. This structure must be empty. + */ +static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp) +{ + WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); + WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); + WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); + rsclp->tails[RCU_NEXT_TAIL] = NULL; +} + +/* + * Is the specified segment of the specified rcu_segcblist structure + * empty of callbacks? + */ +static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg) +{ + if (seg == RCU_DONE_TAIL) + return &rsclp->head == rsclp->tails[RCU_DONE_TAIL]; + return rsclp->tails[seg - 1] == rsclp->tails[seg]; +} + +/* + * Are all segments following the specified segment of the specified + * rcu_segcblist structure empty of callbacks? (The specified + * segment might well contain callbacks.) + */ +static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg) +{ + return !*rsclp->tails[seg]; +} + +/* + * Does the specified rcu_segcblist structure contain callbacks that + * are ready to be invoked? + */ +static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) +{ + return rcu_segcblist_is_enabled(rsclp) && + &rsclp->head != rsclp->tails[RCU_DONE_TAIL]; +} + +/* + * Does the specified rcu_segcblist structure contain callbacks that + * are still pending, that is, not yet ready to be invoked? + */ +static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) +{ + return rcu_segcblist_is_enabled(rsclp) && + !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL); +} + +/* + * Dequeue and return the first ready-to-invoke callback. If there + * are no ready-to-invoke callbacks, return NULL. Disables interrupts + * to avoid interference. Does not protect from interference from other + * CPUs or tasks. + */ +static inline struct rcu_head * +rcu_segcblist_dequeue(struct rcu_segcblist *rsclp) +{ + unsigned long flags; + int i; + struct rcu_head *rhp; + + local_irq_save(flags); + if (!rcu_segcblist_ready_cbs(rsclp)) { + local_irq_restore(flags); + return NULL; + } + rhp = rsclp->head; + BUG_ON(!rhp); + rsclp->head = rhp->next; + for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) { + if (rsclp->tails[i] != &rhp->next) + break; + rsclp->tails[i] = &rsclp->head; + } + smp_mb(); /* Dequeue before decrement for rcu_barrier(). */ + WRITE_ONCE(rsclp->len, rsclp->len - 1); + local_irq_restore(flags); + return rhp; +} + +/* + * Account for the fact that a previously dequeued callback turned out + * to be marked as lazy. + */ +static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp) +{ + unsigned long flags; + + local_irq_save(flags); + rsclp->len_lazy--; + local_irq_restore(flags); +} + +/* + * Return a pointer to the first callback in the specified rcu_segcblist + * structure. This is useful for diagnostics. + */ +static inline struct rcu_head * +rcu_segcblist_first_cb(struct rcu_segcblist *rsclp) +{ + if (rcu_segcblist_is_enabled(rsclp)) + return rsclp->head; + return NULL; +} + +/* + * Return a pointer to the first pending callback in the specified + * rcu_segcblist structure. This is useful just after posting a given + * callback -- if that callback is the first pending callback, then + * you cannot rely on someone else having already started up the required + * grace period. + */ +static inline struct rcu_head * +rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) +{ + if (rcu_segcblist_is_enabled(rsclp)) + return *rsclp->tails[RCU_DONE_TAIL]; + return NULL; +} + +/* + * Does the specified rcu_segcblist structure contain callbacks that + * have not yet been processed beyond having been posted, that is, + * does it contain callbacks in its last segment? + */ +static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp) +{ + return rcu_segcblist_is_enabled(rsclp) && + !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL); +} + +/* + * Enqueue the specified callback onto the specified rcu_segcblist + * structure, updating accounting as needed. Note that the ->len + * field may be accessed locklessly, hence the WRITE_ONCE(). + * The ->len field is used by rcu_barrier() and friends to determine + * if it must post a callback on this structure, and it is OK + * for rcu_barrier() to sometimes post callbacks needlessly, but + * absolutely not OK for it to ever miss posting a callback. + */ +static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, + struct rcu_head *rhp, bool lazy) +{ + WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */ + if (lazy) + rsclp->len_lazy++; + smp_mb(); /* Ensure counts are updated before callback is enqueued. */ + rhp->next = NULL; + *rsclp->tails[RCU_NEXT_TAIL] = rhp; + rsclp->tails[RCU_NEXT_TAIL] = &rhp->next; +} + +/* + * Extract only the counts from the specified rcu_segcblist structure, + * and place them in the specified rcu_cblist structure. This function + * supports both callback orphaning and invocation, hence the separation + * of counts and callbacks. (Callbacks ready for invocation must be + * orphaned and adopted separately from pending callbacks, but counts + * apply to all callbacks. Locking must be used to make sure that + * both orphaned-callbacks lists are consistent.) + */ +static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + rclp->len_lazy += rsclp->len_lazy; + rclp->len += rsclp->len; + rsclp->len_lazy = 0; + WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */ +} + +/* + * Extract only those callbacks ready to be invoked from the specified + * rcu_segcblist structure and place them in the specified rcu_cblist + * structure. + */ +static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + int i; + + if (!rcu_segcblist_ready_cbs(rsclp)) + return; /* Nothing to do. */ + *rclp->tail = rsclp->head; + rsclp->head = *rsclp->tails[RCU_DONE_TAIL]; + *rsclp->tails[RCU_DONE_TAIL] = NULL; + rclp->tail = rsclp->tails[RCU_DONE_TAIL]; + for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) + if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) + rsclp->tails[i] = &rsclp->head; +} + +/* + * Extract only those callbacks still pending (not yet ready to be + * invoked) from the specified rcu_segcblist structure and place them in + * the specified rcu_cblist structure. Note that this loses information + * about any callbacks that might have been partway done waiting for + * their grace period. Too bad! They will have to start over. + */ +static inline void +rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + int i; + + if (!rcu_segcblist_pend_cbs(rsclp)) + return; /* Nothing to do. */ + *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; + rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; + *rsclp->tails[RCU_DONE_TAIL] = NULL; + for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) + rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL]; +} + +/* + * Move the entire contents of the specified rcu_segcblist structure, + * counts, callbacks, and all, to the specified rcu_cblist structure. + * @@@ Why do we need this??? Moving early-boot CBs to NOCB lists? + * @@@ Memory barrier needed? (Not if only used at boot time...) + */ +static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + rcu_segcblist_extract_done_cbs(rsclp, rclp); + rcu_segcblist_extract_pend_cbs(rsclp, rclp); + rcu_segcblist_extract_count(rsclp, rclp); +} + +/* + * Insert counts from the specified rcu_cblist structure in the + * specified rcu_segcblist structure. + */ +static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + rsclp->len_lazy += rclp->len_lazy; + /* ->len sampled locklessly. */ + WRITE_ONCE(rsclp->len, rsclp->len + rclp->len); + rclp->len_lazy = 0; + rclp->len = 0; +} + +/* + * Move callbacks from the specified rcu_cblist to the beginning of the + * done-callbacks segment of the specified rcu_segcblist. + */ +static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + int i; + + if (!rclp->head) + return; /* No callbacks to move. */ + *rclp->tail = rsclp->head; + rsclp->head = rclp->head; + for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) + if (&rsclp->head == rsclp->tails[i]) + rsclp->tails[i] = rclp->tail; + else + break; + rclp->head = NULL; + rclp->tail = &rclp->head; +} + +/* + * Move callbacks from the specified rcu_cblist to the end of the + * new-callbacks segment of the specified rcu_segcblist. + */ +static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, + struct rcu_cblist *rclp) +{ + if (!rclp->head) + return; /* Nothing to do. */ + *rsclp->tails[RCU_NEXT_TAIL] = rclp->head; + rsclp->tails[RCU_NEXT_TAIL] = rclp->tail; + rclp->head = NULL; + rclp->tail = &rclp->head; +} + +/* + * Advance the callbacks in the specified rcu_segcblist structure based + * on the current value passed in for the grace-period counter. + */ +static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp, + unsigned long seq) +{ + int i, j; + + WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); + WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)); + + /* + * Find all callbacks whose ->gp_seq numbers indicate that they + * are ready to invoke, and put them into the RCU_DONE_TAIL segment. + */ + for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { + if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) + break; + rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i]; + } + + /* If no callbacks moved, nothing more need be done. */ + if (i == RCU_WAIT_TAIL) + return; + + /* Clean up tail pointers that might have been misordered above. */ + for (j = RCU_WAIT_TAIL; j < i; j++) + rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL]; + + /* + * Callbacks moved, so clean up the misordered ->tails[] pointers + * that now point into the middle of the list of ready-to-invoke + * callbacks. The overall effect is to copy down the later pointers + * into the gap that was created by the now-ready segments. + */ + for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { + if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) + break; /* No more callbacks. */ + rsclp->tails[j] = rsclp->tails[i]; + rsclp->gp_seq[j] = rsclp->gp_seq[i]; + } +} + +/* + * "Accelerate" callbacks based on more-accurate grace-period information. + * The reason for this is that RCU does not synchronize the beginnings and + * ends of grace periods, and that callbacks are posted locally. This in + * turn means that the callbacks must be labelled conservatively early + * on, as getting exact information would degrade both performance and + * scalability. When more accurate grace-period information becomes + * available, previously posted callbacks can be "accelerated", marking + * them to complete at the end of the earlier grace period. + * + * This function operates on an rcu_segcblist structure, and also the + * grace-period sequence number at which new callbacks would become + * ready to invoke. + */ +static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, + unsigned long seq) +{ + int i; + + WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); + WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)); + + /* + * Find the segment preceding the oldest segment of callbacks + * whose ->gp_seq[] completion is at or after that passed in via + * "seq", skipping any empty segments. This oldest segment, along + * with any later segments, can be merged in with any newly arrived + * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq" + * as their ->gp_seq[] grace-period completion sequence number. + */ + for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--) + if (rsclp->tails[i] != rsclp->tails[i - 1] && + ULONG_CMP_LT(rsclp->gp_seq[i], seq)) + break; + + /* + * If all the segments contain callbacks that correspond to + * earlier grace-period sequence numbers than "seq", leave. + * Assuming that the rcu_segcblist structure has enough + * segments in its arrays, this can only happen if some of + * the non-done segments contain callbacks that really are + * ready to invoke. This situation will get straightened + * out by the next call to rcu_segcblist_advance(). + * + * Also advance to the oldest segment of callbacks whose + * ->gp_seq[] completion is at or after that passed in via "seq", + * skipping any empty segments. + */ + if (++i >= RCU_NEXT_TAIL) + return false; + + /* + * Merge all later callbacks, including newly arrived callbacks, + * into the segment located by the for-loop above. Assign "seq" + * as the ->gp_seq[] value in order to correctly handle the case + * where there were no pending callbacks in the rcu_segcblist + * structure other than in the RCU_NEXT_TAIL segment. + */ + for (; i < RCU_NEXT_TAIL; i++) { + rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL]; + rsclp->gp_seq[i] = seq; + } + return true; +} + +/* + * Scan the specified rcu_segcblist structure for callbacks that need + * a grace period later than the one specified by "seq". We don't look + * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't + * have a grace-period sequence number. + */ +static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, + unsigned long seq) +{ + int i; + + for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) + if (rsclp->tails[i - 1] != rsclp->tails[i] && + ULONG_CMP_LT(seq, rsclp->gp_seq[i])) + return true; + return false; +} + +/* + * Interim function to return rcu_segcblist head pointer. Longer term, the + * rcu_segcblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp) +{ + return rsclp->head; +} + +/* + * Interim function to return rcu_segcblist head pointer. Longer term, the + * rcu_segcblist will be used more pervasively, removing the need for this + * function. + */ +static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp) +{ + WARN_ON_ONCE(rcu_segcblist_empty(rsclp)); + return rsclp->tails[RCU_NEXT_TAIL]; +} + +#endif /* __KERNEL_RCU_SEGCBLIST_H */ diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 047ac8c28a4e..ad154a7bc114 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -22,7 +22,7 @@ * Lai Jiangshan * * For detailed explanation of Read-Copy Update mechanism see - - * Documentation/RCU/ *.txt + * Documentation/RCU/ *.txt * */ @@ -32,31 +32,20 @@ #include #include #include +#include struct srcu_array { unsigned long lock_count[2]; unsigned long unlock_count[2]; }; -struct rcu_batch { - struct rcu_head *head, **tail; -}; - -#define RCU_BATCH_INIT(name) { NULL, &(name.head) } - struct srcu_struct { unsigned long completed; unsigned long srcu_gp_seq; struct srcu_array __percpu *per_cpu_ref; - spinlock_t queue_lock; /* protect ->batch_queue, ->running */ + spinlock_t queue_lock; /* protect ->srcu_cblist, ->srcu_state */ int srcu_state; - /* callbacks just queued */ - struct rcu_batch batch_queue; - /* callbacks try to do the first check_zero */ - struct rcu_batch batch_check0; - /* callbacks done with the first check_zero and the flip */ - struct rcu_batch batch_check1; - struct rcu_batch batch_done; + struct rcu_segcblist srcu_cblist; struct delayed_work work; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; @@ -97,10 +86,7 @@ void process_srcu(struct work_struct *work); .per_cpu_ref = &name##_srcu_array, \ .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ .srcu_state = SRCU_STATE_IDLE, \ - .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ - .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \ - .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \ - .batch_done = RCU_BATCH_INIT(name.batch_done), \ + .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\ .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ __SRCU_DEP_MAP_INIT(name) \ } -- cgit v1.2.3 From f2425b4efb0c69e77c0b9666b605ae4a1ecaae47 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Mar 2017 12:42:30 -0700 Subject: srcu: Move combining-tree definitions for SRCU's benefit This commit moves the C preprocessor code that defines the default shape of the rcu_node combining tree to a new include/linux/rcu_node_tree.h file as a first step towards enabling SRCU to create its own combining tree, which in turn enables SRCU to implement per-CPU callback handling, thus avoiding contention on the lock currently guarding the single list of callbacks. Note that users of SRCU still need to know the size of the srcu_struct structure, hence include/linux rather than kernel/rcu. This commit is code-movement only. Signed-off-by: Paul E. McKenney --- include/linux/rcu_node_tree.h | 102 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 102 insertions(+) create mode 100644 include/linux/rcu_node_tree.h (limited to 'include/linux') diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h new file mode 100644 index 000000000000..b7eb97096b1c --- /dev/null +++ b/include/linux/rcu_node_tree.h @@ -0,0 +1,102 @@ +/* + * RCU node combining tree definitions. These are used to compute + * global attributes while avoiding common-case global contention. A key + * property that these computations rely on is a tournament-style approach + * where only one of the tasks contending a lower level in the tree need + * advance to the next higher level. If properly configured, this allows + * unlimited scalability while maintaining a constant level of contention + * on the root node. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright IBM Corporation, 2017 + * + * Author: Paul E. McKenney + */ + +#ifndef __LINUX_RCU_NODE_TREE_H +#define __LINUX_RCU_NODE_TREE_H + +/* + * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and + * CONFIG_RCU_FANOUT_LEAF. + * In theory, it should be possible to add more levels straightforwardly. + * In practice, this did work well going from three levels to four. + * Of course, your mileage may vary. + */ + +#ifdef CONFIG_RCU_FANOUT +#define RCU_FANOUT CONFIG_RCU_FANOUT +#else /* #ifdef CONFIG_RCU_FANOUT */ +# ifdef CONFIG_64BIT +# define RCU_FANOUT 64 +# else +# define RCU_FANOUT 32 +# endif +#endif /* #else #ifdef CONFIG_RCU_FANOUT */ + +#ifdef CONFIG_RCU_FANOUT_LEAF +#define RCU_FANOUT_LEAF CONFIG_RCU_FANOUT_LEAF +#else /* #ifdef CONFIG_RCU_FANOUT_LEAF */ +#define RCU_FANOUT_LEAF 16 +#endif /* #else #ifdef CONFIG_RCU_FANOUT_LEAF */ + +#define RCU_FANOUT_1 (RCU_FANOUT_LEAF) +#define RCU_FANOUT_2 (RCU_FANOUT_1 * RCU_FANOUT) +#define RCU_FANOUT_3 (RCU_FANOUT_2 * RCU_FANOUT) +#define RCU_FANOUT_4 (RCU_FANOUT_3 * RCU_FANOUT) + +#if NR_CPUS <= RCU_FANOUT_1 +# define RCU_NUM_LVLS 1 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_NODES NUM_RCU_LVL_0 +# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0 } +# define RCU_NODE_NAME_INIT { "rcu_node_0" } +# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0" } +#elif NR_CPUS <= RCU_FANOUT_2 +# define RCU_NUM_LVLS 2 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) +# define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1) +# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1 } +# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1" } +# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1" } +#elif NR_CPUS <= RCU_FANOUT_3 +# define RCU_NUM_LVLS 3 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) +# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) +# define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2) +# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2 } +# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2" } +# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2" } +#elif NR_CPUS <= RCU_FANOUT_4 +# define RCU_NUM_LVLS 4 +# define NUM_RCU_LVL_0 1 +# define NUM_RCU_LVL_1 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_3) +# define NUM_RCU_LVL_2 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_2) +# define NUM_RCU_LVL_3 DIV_ROUND_UP(NR_CPUS, RCU_FANOUT_1) +# define NUM_RCU_NODES (NUM_RCU_LVL_0 + NUM_RCU_LVL_1 + NUM_RCU_LVL_2 + NUM_RCU_LVL_3) +# define NUM_RCU_LVL_INIT { NUM_RCU_LVL_0, NUM_RCU_LVL_1, NUM_RCU_LVL_2, NUM_RCU_LVL_3 } +# define RCU_NODE_NAME_INIT { "rcu_node_0", "rcu_node_1", "rcu_node_2", "rcu_node_3" } +# define RCU_FQS_NAME_INIT { "rcu_node_fqs_0", "rcu_node_fqs_1", "rcu_node_fqs_2", "rcu_node_fqs_3" } +#else +# error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" +#endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ + +extern int rcu_num_lvls; +extern int rcu_num_nodes; + +#endif /* __LINUX_RCU_NODE_TREE_H */ -- cgit v1.2.3 From 2b34c43cc1671c59bad6dd1682ae3ee4f0919eb7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 14 Mar 2017 14:29:53 -0700 Subject: srcu: Move rcu_init_levelspread() to rcu_tree_node.h This commit moves the rcu_init_levelspread() function from kernel/rcu/tree.c to kernel/rcu/rcu.h so that SRCU can access it. This is another step towards enabling SRCU to create its own combining tree. This commit is code-movement only, give or take knock-on adjustments. Signed-off-by: Paul E. McKenney --- include/linux/rcu_node_tree.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcu_node_tree.h b/include/linux/rcu_node_tree.h index b7eb97096b1c..4b766b61e1a0 100644 --- a/include/linux/rcu_node_tree.h +++ b/include/linux/rcu_node_tree.h @@ -96,7 +96,4 @@ # error "CONFIG_RCU_FANOUT insufficient for NR_CPUS" #endif /* #if (NR_CPUS) <= RCU_FANOUT_1 */ -extern int rcu_num_lvls; -extern int rcu_num_nodes; - #endif /* __LINUX_RCU_NODE_TREE_H */ -- cgit v1.2.3 From 80a7956fe36c2ee40c6ff12c77926d267802b7c8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 22 Mar 2017 15:26:18 -0700 Subject: srcu: Merge ->srcu_state into ->srcu_gp_seq Updating ->srcu_state and ->srcu_gp_seq will lead to extremely complex race conditions given multiple callback queues, so this commit takes advantage of the two-bit state now available in rcu_seq counters to store the state in the bottom two bits of ->srcu_gp_seq. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index ad154a7bc114..e7dbc01b61a1 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -43,8 +43,7 @@ struct srcu_struct { unsigned long completed; unsigned long srcu_gp_seq; struct srcu_array __percpu *per_cpu_ref; - spinlock_t queue_lock; /* protect ->srcu_cblist, ->srcu_state */ - int srcu_state; + spinlock_t queue_lock; /* protect ->srcu_cblist */ struct rcu_segcblist srcu_cblist; struct delayed_work work; #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -56,7 +55,6 @@ struct srcu_struct { #define SRCU_STATE_IDLE 0 #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 -#define SRCU_STATE_DONE 3 #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -85,7 +83,6 @@ void process_srcu(struct work_struct *work); .completed = -300, \ .per_cpu_ref = &name##_srcu_array, \ .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ - .srcu_state = SRCU_STATE_IDLE, \ .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\ .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ __SRCU_DEP_MAP_INIT(name) \ -- cgit v1.2.3 From f60d231a87c5c9f23f10e69996f396d46f5bf901 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 24 Mar 2017 13:46:33 -0700 Subject: srcu: Crude control of expedited grace periods SRCU's implementation of expedited grace periods has always assumed that the SRCU instance is idle when the expedited request arrives. This commit improves this a bit by maintaining a count of the number of outstanding expedited requests, thus allowing prior non-expedited grace periods accommodate these requests by shifting to expedited mode. However, any non-expedited wait already in progress will still wait for the full duration. Improved control of expedited grace periods is planned, but one step at a time. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index e7dbc01b61a1..73a1b6296224 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -42,6 +42,7 @@ struct srcu_array { struct srcu_struct { unsigned long completed; unsigned long srcu_gp_seq; + atomic_t srcu_exp_cnt; struct srcu_array __percpu *per_cpu_ref; spinlock_t queue_lock; /* protect ->srcu_cblist */ struct rcu_segcblist srcu_cblist; -- cgit v1.2.3 From d8be81735aa89413b333de488251f0e64e2be591 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 25 Mar 2017 09:59:38 -0700 Subject: srcu: Create a tiny SRCU In response to automated complaints about modifications to SRCU increasing its size, this commit creates a tiny SRCU that is used in SMP=n && PREEMPT=n builds. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 69 +++++------------------------------- include/linux/srcutiny.h | 81 ++++++++++++++++++++++++++++++++++++++++++ include/linux/srcutree.h | 91 ++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 180 insertions(+), 61 deletions(-) create mode 100644 include/linux/srcutiny.h create mode 100644 include/linux/srcutree.h (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 73a1b6296224..907f09b14eda 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -34,28 +34,7 @@ #include #include -struct srcu_array { - unsigned long lock_count[2]; - unsigned long unlock_count[2]; -}; - -struct srcu_struct { - unsigned long completed; - unsigned long srcu_gp_seq; - atomic_t srcu_exp_cnt; - struct srcu_array __percpu *per_cpu_ref; - spinlock_t queue_lock; /* protect ->srcu_cblist */ - struct rcu_segcblist srcu_cblist; - struct delayed_work work; -#ifdef CONFIG_DEBUG_LOCK_ALLOC - struct lockdep_map dep_map; -#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -}; - -/* Values for -> state variable. */ -#define SRCU_STATE_IDLE 0 -#define SRCU_STATE_SCAN1 1 -#define SRCU_STATE_SCAN2 2 +struct srcu_struct; #ifdef CONFIG_DEBUG_LOCK_ALLOC @@ -77,42 +56,13 @@ int init_srcu_struct(struct srcu_struct *sp); #define __SRCU_DEP_MAP_INIT(srcu_name) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -void process_srcu(struct work_struct *work); - -#define __SRCU_STRUCT_INIT(name) \ - { \ - .completed = -300, \ - .per_cpu_ref = &name##_srcu_array, \ - .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ - .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\ - .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ - __SRCU_DEP_MAP_INIT(name) \ - } - -/* - * Define and initialize a srcu struct at build time. - * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. - * - * Note that although DEFINE_STATIC_SRCU() hides the name from other - * files, the per-CPU variable rules nevertheless require that the - * chosen name be globally unique. These rules also prohibit use of - * DEFINE_STATIC_SRCU() within a function. If these rules are too - * restrictive, declare the srcu_struct manually. For example, in - * each file: - * - * static struct srcu_struct my_srcu; - * - * Then, before the first use of each my_srcu, manually initialize it: - * - * init_srcu_struct(&my_srcu); - * - * See include/linux/percpu-defs.h for the rules on per-CPU variables. - */ -#define __DEFINE_SRCU(name, is_static) \ - static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ - is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) -#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) -#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) +#ifdef CONFIG_TINY_SRCU +#include +#elif defined(CONFIG_TREE_SRCU) +#include +#else +#error "Unknown SRCU implementation specified to kernel configuration" +#endif /** * call_srcu() - Queue a callback for invocation after an SRCU grace period @@ -138,9 +88,6 @@ void cleanup_srcu_struct(struct srcu_struct *sp); int __srcu_read_lock(struct srcu_struct *sp) __acquires(sp); void __srcu_read_unlock(struct srcu_struct *sp, int idx) __releases(sp); void synchronize_srcu(struct srcu_struct *sp); -void synchronize_srcu_expedited(struct srcu_struct *sp); -unsigned long srcu_batches_completed(struct srcu_struct *sp); -void srcu_barrier(struct srcu_struct *sp); #ifdef CONFIG_DEBUG_LOCK_ALLOC diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h new file mode 100644 index 000000000000..4f284e4f4d8c --- /dev/null +++ b/include/linux/srcutiny.h @@ -0,0 +1,81 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion, + * tiny variant. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2017 + * + * Author: Paul McKenney + */ + +#ifndef _LINUX_SRCU_TINY_H +#define _LINUX_SRCU_TINY_H + +#include + +struct srcu_struct { + int srcu_lock_nesting[2]; /* srcu_read_lock() nesting depth. */ + struct swait_queue_head srcu_wq; + /* Last srcu_read_unlock() wakes GP. */ + unsigned long srcu_gp_seq; /* GP seq # for callback tagging. */ + struct rcu_segcblist srcu_cblist; + /* Pending SRCU callbacks. */ + int srcu_idx; /* Current reader array element. */ + bool srcu_gp_running; /* GP workqueue running? */ + bool srcu_gp_waiting; /* GP waiting for readers? */ + struct work_struct srcu_work; /* For driving grace periods. */ +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +}; + +void srcu_drive_gp(struct work_struct *wp); + +#define __SRCU_STRUCT_INIT(name) \ +{ \ + .srcu_wq = __SWAIT_QUEUE_HEAD_INITIALIZER(name.srcu_wq), \ + .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist), \ + .srcu_work = __WORK_INITIALIZER(name.srcu_work, srcu_drive_gp), \ + __SRCU_DEP_MAP_INIT(name) \ +} + +/* + * This odd _STATIC_ arrangement is needed for API compatibility with + * Tree SRCU, which needs some per-CPU data. + */ +#define DEFINE_SRCU(name) \ + struct srcu_struct name = __SRCU_STRUCT_INIT(name) +#define DEFINE_STATIC_SRCU(name) \ + static struct srcu_struct name = __SRCU_STRUCT_INIT(name) + +void synchronize_srcu(struct srcu_struct *sp); + +static inline void synchronize_srcu_expedited(struct srcu_struct *sp) +{ + synchronize_srcu(sp); +} + +static inline void srcu_barrier(struct srcu_struct *sp) +{ + synchronize_srcu(sp); +} + +static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) +{ + return 0; +} + +#endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h new file mode 100644 index 000000000000..f2b3bd6c6bc2 --- /dev/null +++ b/include/linux/srcutree.h @@ -0,0 +1,91 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion, + * tree variant. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2017 + * + * Author: Paul McKenney + */ + +#ifndef _LINUX_SRCU_TREE_H +#define _LINUX_SRCU_TREE_H + +struct srcu_array { + unsigned long lock_count[2]; + unsigned long unlock_count[2]; +}; + +struct srcu_struct { + unsigned long completed; + unsigned long srcu_gp_seq; + atomic_t srcu_exp_cnt; + struct srcu_array __percpu *per_cpu_ref; + spinlock_t queue_lock; /* protect ->srcu_cblist */ + struct rcu_segcblist srcu_cblist; + struct delayed_work work; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +}; + +/* Values for -> state variable. */ +#define SRCU_STATE_IDLE 0 +#define SRCU_STATE_SCAN1 1 +#define SRCU_STATE_SCAN2 2 + +void process_srcu(struct work_struct *work); + +#define __SRCU_STRUCT_INIT(name) \ + { \ + .completed = -300, \ + .per_cpu_ref = &name##_srcu_array, \ + .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ + .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\ + .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ + __SRCU_DEP_MAP_INIT(name) \ + } + +/* + * Define and initialize a srcu struct at build time. + * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. + * + * Note that although DEFINE_STATIC_SRCU() hides the name from other + * files, the per-CPU variable rules nevertheless require that the + * chosen name be globally unique. These rules also prohibit use of + * DEFINE_STATIC_SRCU() within a function. If these rules are too + * restrictive, declare the srcu_struct manually. For example, in + * each file: + * + * static struct srcu_struct my_srcu; + * + * Then, before the first use of each my_srcu, manually initialize it: + * + * init_srcu_struct(&my_srcu); + * + * See include/linux/percpu-defs.h for the rules on per-CPU variables. + */ +#define __DEFINE_SRCU(name, is_static) \ + static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) +#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) +#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) + +void synchronize_srcu_expedited(struct srcu_struct *sp); +void srcu_barrier(struct srcu_struct *sp); +unsigned long srcu_batches_completed(struct srcu_struct *sp); + +#endif -- cgit v1.2.3 From dad81a2026841b5e2651aab58a7398c13cc05847 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sat, 25 Mar 2017 17:23:44 -0700 Subject: srcu: Introduce CLASSIC_SRCU Kconfig option The TREE_SRCU rewrite is large and a bit on the non-simple side, so this commit helps reduce risk by allowing the old v4.11 SRCU algorithm to be selected using a new CLASSIC_SRCU Kconfig option that depends on RCU_EXPERT. The default is to use the new TREE_SRCU and TINY_SRCU algorithms, in order to help get these the testing that they need. However, if your users do not require the update-side scalability that is to be provided by TREE_SRCU, select RCU_EXPERT and then CLASSIC_SRCU to revert back to the old classic SRCU algorithm. Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 2 + include/linux/srcuclassic.h | 101 ++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 103 insertions(+) create mode 100644 include/linux/srcuclassic.h (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 907f09b14eda..167ad8831aaf 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -60,6 +60,8 @@ int init_srcu_struct(struct srcu_struct *sp); #include #elif defined(CONFIG_TREE_SRCU) #include +#elif defined(CONFIG_CLASSIC_SRCU) +#include #else #error "Unknown SRCU implementation specified to kernel configuration" #endif diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h new file mode 100644 index 000000000000..41cf99930f34 --- /dev/null +++ b/include/linux/srcuclassic.h @@ -0,0 +1,101 @@ +/* + * Sleepable Read-Copy Update mechanism for mutual exclusion, + * classic v4.11 variant. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (C) IBM Corporation, 2017 + * + * Author: Paul McKenney + */ + +#ifndef _LINUX_SRCU_CLASSIC_H +#define _LINUX_SRCU_CLASSIC_H + +struct srcu_array { + unsigned long lock_count[2]; + unsigned long unlock_count[2]; +}; + +struct rcu_batch { + struct rcu_head *head, **tail; +}; + +#define RCU_BATCH_INIT(name) { NULL, &(name.head) } + +struct srcu_struct { + unsigned long completed; + struct srcu_array __percpu *per_cpu_ref; + spinlock_t queue_lock; /* protect ->batch_queue, ->running */ + bool running; + /* callbacks just queued */ + struct rcu_batch batch_queue; + /* callbacks try to do the first check_zero */ + struct rcu_batch batch_check0; + /* callbacks done with the first check_zero and the flip */ + struct rcu_batch batch_check1; + struct rcu_batch batch_done; + struct delayed_work work; +#ifdef CONFIG_DEBUG_LOCK_ALLOC + struct lockdep_map dep_map; +#endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ +}; + +void process_srcu(struct work_struct *work); + +#define __SRCU_STRUCT_INIT(name) \ + { \ + .completed = -300, \ + .per_cpu_ref = &name##_srcu_array, \ + .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ + .running = false, \ + .batch_queue = RCU_BATCH_INIT(name.batch_queue), \ + .batch_check0 = RCU_BATCH_INIT(name.batch_check0), \ + .batch_check1 = RCU_BATCH_INIT(name.batch_check1), \ + .batch_done = RCU_BATCH_INIT(name.batch_done), \ + .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ + __SRCU_DEP_MAP_INIT(name) \ + } + +/* + * Define and initialize a srcu struct at build time. + * Do -not- call init_srcu_struct() nor cleanup_srcu_struct() on it. + * + * Note that although DEFINE_STATIC_SRCU() hides the name from other + * files, the per-CPU variable rules nevertheless require that the + * chosen name be globally unique. These rules also prohibit use of + * DEFINE_STATIC_SRCU() within a function. If these rules are too + * restrictive, declare the srcu_struct manually. For example, in + * each file: + * + * static struct srcu_struct my_srcu; + * + * Then, before the first use of each my_srcu, manually initialize it: + * + * init_srcu_struct(&my_srcu); + * + * See include/linux/percpu-defs.h for the rules on per-CPU variables. + */ +#define __DEFINE_SRCU(name, is_static) \ + static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ + is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) +#define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) +#define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) + +void synchronize_srcu_expedited(struct srcu_struct *sp); +void srcu_barrier(struct srcu_struct *sp); +unsigned long srcu_batches_completed(struct srcu_struct *sp); + +#endif -- cgit v1.2.3 From 25ce4be72411867e0471908ee9319599035cc624 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2017 09:06:41 +0200 Subject: genirq: Return the IRQ name from free_irq() This allows callers to get back at them instead of having to store it in another variable. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner --- include/linux/interrupt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 53144e78a369..a6fba4804672 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -155,7 +155,7 @@ extern int __must_check request_percpu_irq(unsigned int irq, irq_handler_t handler, const char *devname, void __percpu *percpu_dev_id); -extern void free_irq(unsigned int, void *); +extern const void *free_irq(unsigned int, void *); extern void free_percpu_irq(unsigned int, void __percpu *); struct device; -- cgit v1.2.3 From 704e8953d3e9db29d5d93c0bf6973d86fe15e679 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 13 Apr 2017 09:06:42 +0200 Subject: PCI/irq: Add pci_request_irq() and pci_free_irq() helpers These are small wrappers around request_threaded_irq() and free_irq(), which dynamically allocate space for the device name so that drivers don't need to keep static buffers for these around. Additionally it works with device-relative vector numbers to make the usage easier, and force the IRQF_SHARED flag on given that it has no runtime overhead and should be supported by all PCI devices. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas Reviewed-by: Thomas Gleixner --- include/linux/pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eb3da1a04e6c..b23f81b583ab 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -28,6 +28,7 @@ #include #include #include +#include #include #include #include @@ -1072,6 +1073,11 @@ int pci_select_bars(struct pci_dev *dev, unsigned long flags); bool pci_device_is_present(struct pci_dev *pdev); void pci_ignore_hotplug(struct pci_dev *dev); +int __printf(6, 7) pci_request_irq(struct pci_dev *dev, unsigned int nr, + irq_handler_t handler, irq_handler_t thread_fn, void *dev_id, + const char *fmt, ...); +void pci_free_irq(struct pci_dev *dev, unsigned int nr, void *dev_id); + /* ROM control related routines */ int pci_enable_rom(struct pci_dev *pdev); void pci_disable_rom(struct pci_dev *pdev); -- cgit v1.2.3 From f318dd083c8128c50e48ceb8c3e812e52800fc4f Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 18 Apr 2017 11:27:03 -0700 Subject: cma: Store a name in the cma structure Frameworks that may want to enumerate CMA heaps (e.g. Ion) will find it useful to have an explicit name attached to each region. Store the name in each CMA structure. Signed-off-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- include/linux/cma.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cma.h b/include/linux/cma.h index 03f32d0bd1d8..d41d1f8d1e28 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -21,13 +21,15 @@ struct cma; extern unsigned long totalcma_pages; extern phys_addr_t cma_get_base(const struct cma *cma); extern unsigned long cma_get_size(const struct cma *cma); +extern const char *cma_get_name(const struct cma *cma); extern int __init cma_declare_contiguous(phys_addr_t base, phys_addr_t size, phys_addr_t limit, phys_addr_t alignment, unsigned int order_per_bit, - bool fixed, struct cma **res_cma); + bool fixed, const char *name, struct cma **res_cma); extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, + const char *name, struct cma **res_cma); extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask); -- cgit v1.2.3 From e4231bcda72daef497af45e195a33daa0f9357d0 Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Tue, 18 Apr 2017 11:27:04 -0700 Subject: cma: Introduce cma_for_each_area Frameworks (e.g. Ion) may want to iterate over each possible CMA area to allow for enumeration. Introduce a function to allow a callback. Signed-off-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- include/linux/cma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cma.h b/include/linux/cma.h index d41d1f8d1e28..3e8fbf5a5c73 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -34,4 +34,6 @@ extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align, gfp_t gfp_mask); extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count); + +extern int cma_for_each_area(int (*it)(struct cma *cma, void *data), void *data); #endif -- cgit v1.2.3 From 5f0d5a3ae7cff0d7fa943c199c3a2e44f23e1fac Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 18 Jan 2017 02:53:44 -0800 Subject: mm: Rename SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU A group of Linux kernel hackers reported chasing a bug that resulted from their assumption that SLAB_DESTROY_BY_RCU provided an existence guarantee, that is, that no block from such a slab would be reallocated during an RCU read-side critical section. Of course, that is not the case. Instead, SLAB_DESTROY_BY_RCU only prevents freeing of an entire slab of blocks. However, there is a phrase for this, namely "type safety". This commit therefore renames SLAB_DESTROY_BY_RCU to SLAB_TYPESAFE_BY_RCU in order to avoid future instances of this sort of confusion. Signed-off-by: Paul E. McKenney Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Cc: Andrew Morton Cc: Acked-by: Johannes Weiner Acked-by: Vlastimil Babka [ paulmck: Add comments mentioning the old name, as requested by Eric Dumazet, in order to help people familiar with the old name find the new one. ] Acked-by: David Rientjes --- include/linux/dma-fence.h | 4 ++-- include/linux/slab.h | 6 ++++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 6048fa404e57..a5195a7d6f77 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -229,7 +229,7 @@ static inline struct dma_fence *dma_fence_get_rcu(struct dma_fence *fence) * * Function returns NULL if no refcount could be obtained, or the fence. * This function handles acquiring a reference to a fence that may be - * reallocated within the RCU grace period (such as with SLAB_DESTROY_BY_RCU), + * reallocated within the RCU grace period (such as with SLAB_TYPESAFE_BY_RCU), * so long as the caller is using RCU on the pointer to the fence. * * An alternative mechanism is to employ a seqlock to protect a bunch of @@ -257,7 +257,7 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep) * have successfully acquire a reference to it. If it no * longer matches, we are holding a reference to some other * reallocated pointer. This is possible if the allocator - * is using a freelist like SLAB_DESTROY_BY_RCU where the + * is using a freelist like SLAB_TYPESAFE_BY_RCU where the * fence remains valid for the RCU grace period, but it * may be reallocated. When using such allocators, we are * responsible for ensuring the reference we get is to diff --git a/include/linux/slab.h b/include/linux/slab.h index 3c37a8c51921..04a7f7993e67 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -28,7 +28,7 @@ #define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ #define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ /* - * SLAB_DESTROY_BY_RCU - **WARNING** READ THIS! + * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * * This delays freeing the SLAB page by a grace period, it does _NOT_ * delay object freeing. This means that if you do kmem_cache_free() @@ -61,8 +61,10 @@ * * rcu_read_lock before reading the address, then rcu_read_unlock after * taking the spinlock within the structure expected at that address. + * + * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ -#define SLAB_DESTROY_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ +#define SLAB_TYPESAFE_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ #define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ #define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ -- cgit v1.2.3 From de5bbdd01cf9ee3cd4586b5a970d3ea015c6d7e3 Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Tue, 18 Apr 2017 14:21:04 -0500 Subject: PCI: Change pci_host_common_probe() visibility pci_host_common_probe() is defined when CONFIG_PCI_HOST_COMMON=y; therefore the function declaration should match that. drivers/pci/host/pcie-tango.c:300:9: error: implicit declaration of function 'pci_host_common_probe' Signed-off-by: Marc Gonzalez Signed-off-by: Bjorn Helgaas --- include/linux/pci-ecam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci-ecam.h b/include/linux/pci-ecam.h index b8f11d783a11..809c2f1873ac 100644 --- a/include/linux/pci-ecam.h +++ b/include/linux/pci-ecam.h @@ -69,7 +69,7 @@ extern struct pci_ecam_ops xgene_v1_pcie_ecam_ops; /* APM X-Gene PCIe v1 */ extern struct pci_ecam_ops xgene_v2_pcie_ecam_ops; /* APM X-Gene PCIe v2.x */ #endif -#ifdef CONFIG_PCI_HOST_GENERIC +#ifdef CONFIG_PCI_HOST_COMMON /* for DT-based PCI controllers that support ECAM */ int pci_host_common_probe(struct platform_device *pdev, struct pci_ecam_ops *ops); -- cgit v1.2.3 From f49c3f90a31f6e19ef3343dcc8809dac1019b59e Mon Sep 17 00:00:00 2001 From: Baoquan He Date: Fri, 7 Apr 2017 16:22:47 +0800 Subject: ACPI / tables: Drop acpi_parse_entries() which is not used Function acpi_parse_entries() is not used any more and if necessary, acpi_table_parse_entries() can be used instead of it, so drop it. Signed-off-by: Baoquan He [ rjw: Subject / changelog ] Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9b05886f9773..83abbfceabad 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -233,10 +233,6 @@ int acpi_numa_init (void); int acpi_table_init (void); int acpi_table_parse(char *id, acpi_tbl_table_handler handler); -int __init acpi_parse_entries(char *id, unsigned long table_size, - acpi_tbl_entry_handler handler, - struct acpi_table_header *table_header, - int entry_id, unsigned int max_entries); int __init acpi_table_parse_entries(char *id, unsigned long table_size, int entry_id, acpi_tbl_entry_handler handler, -- cgit v1.2.3 From 139c279fb9423833fb730ccb07e549b5a9183f44 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 12 Apr 2017 08:53:42 +0200 Subject: quota: Remove dquot_quotactl_ops Nobody uses them anymore. Signed-off-by: Jan Kara --- include/linux/quotaops.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/quotaops.h b/include/linux/quotaops.h index 799a63d0e1a8..9c6f768b7d32 100644 --- a/include/linux/quotaops.h +++ b/include/linux/quotaops.h @@ -162,7 +162,6 @@ static inline bool sb_has_quota_active(struct super_block *sb, int type) * Operations supported for diskquotas. */ extern const struct dquot_operations dquot_operations; -extern const struct quotactl_ops dquot_quotactl_ops; extern const struct quotactl_ops dquot_quotactl_sysfile_ops; #else -- cgit v1.2.3 From e21b7a0b988772e82e7147e1c659a5afe2ae003c Mon Sep 17 00:00:00 2001 From: Arianna Avanzini Date: Wed, 12 Apr 2017 18:23:08 +0200 Subject: block, bfq: add full hierarchical scheduling and cgroups support Add complete support for full hierarchical scheduling, with a cgroups interface. Full hierarchical scheduling is implemented through the 'entity' abstraction: both bfq_queues, i.e., the internal BFQ queues associated with processes, and groups are represented in general by entities. Given the bfq_queues associated with the processes belonging to a given group, the entities representing these queues are sons of the entity representing the group. At higher levels, if a group, say G, contains other groups, then the entity representing G is the parent entity of the entities representing the groups in G. Hierarchical scheduling is performed as follows: if the timestamps of a leaf entity (i.e., of a bfq_queue) change, and such a change lets the entity become the next-to-serve entity for its parent entity, then the timestamps of the parent entity are recomputed as a function of the budget of its new next-to-serve leaf entity. If the parent entity belongs, in its turn, to a group, and its new timestamps let it become the next-to-serve for its parent entity, then the timestamps of the latter parent entity are recomputed as well, and so on. When a new bfq_queue must be set in service, the reverse path is followed: the next-to-serve highest-level entity is chosen, then its next-to-serve child entity, and so on, until the next-to-serve leaf entity is reached, and the bfq_queue that this entity represents is set in service. Writeback is accounted for on a per-group basis, i.e., for each group, the async I/O requests of the processes of the group are enqueued in a distinct bfq_queue, and the entity associated with this queue is a child of the entity associated with the group. Weights can be assigned explicitly to groups and processes through the cgroups interface, differently from what happens, for single processes, if the cgroups interface is not used (as explained in the description of the previous patch). In particular, since each node has a full scheduler, each group can be assigned its own weight. Signed-off-by: Fabio Checconi Signed-off-by: Paolo Valente Signed-off-by: Arianna Avanzini Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ec993573e0a8..fe9c512cc6fa 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -50,7 +50,7 @@ struct blk_stat_callback; * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. */ -#define BLKCG_MAX_POLS 2 +#define BLKCG_MAX_POLS 3 typedef void (rq_end_io_fn)(struct request *, int); -- cgit v1.2.3 From 5f1ae4ebe578319a0cd5dae9591dd426070be106 Mon Sep 17 00:00:00 2001 From: Fu Wei Date: Sat, 1 Apr 2017 01:51:01 +0800 Subject: acpi/arm64: Add GTDT table parse driver This patch adds support for parsing arch timer info in GTDT, provides some kernel APIs to parse all the PPIs and always-on info in GTDT and export them. By this driver, we can simplify arm_arch_timer drivers, and separate the ACPI GTDT knowledge from it. Signed-off-by: Fu Wei Signed-off-by: Hanjun Guo Acked-by: Rafael J. Wysocki Tested-by: Xiongfeng Wang Reviewed-by: Hanjun Guo Tested-by: Hanjun Guo Acked-by: Lorenzo Pieralisi Signed-off-by: Mark Rutland --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9b05886f9773..4b5c146fba97 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -595,6 +595,12 @@ enum acpi_reconfig_event { int acpi_reconfig_notifier_register(struct notifier_block *nb); int acpi_reconfig_notifier_unregister(struct notifier_block *nb); +#ifdef CONFIG_ACPI_GTDT +int acpi_gtdt_init(struct acpi_table_header *table, int *platform_timer_count); +int acpi_gtdt_map_ppi(int type); +bool acpi_gtdt_c3stop(int type); +#endif + #else /* !CONFIG_ACPI */ #define acpi_disabled 1 -- cgit v1.2.3 From a712c3ed9b8a4565a200f3e8e09c42079b1666b3 Mon Sep 17 00:00:00 2001 From: Fu Wei Date: Sat, 1 Apr 2017 01:51:03 +0800 Subject: acpi/arm64: Add memory-mapped timer support in GTDT driver On platforms booting with ACPI, architected memory-mapped timers' configuration data is provided by firmware through the ACPI GTDT static table. The clocksource architected timer kernel driver requires a firmware interface to collect timer configuration and configure its driver. this infrastructure is present for device tree systems, but it is missing on systems booting with ACPI. Implement the kernel infrastructure required to parse the static ACPI GTDT table so that the architected timer clocksource driver can make use of it on systems booting with ACPI, therefore enabling the corresponding timers configuration. Signed-off-by: Fu Wei Signed-off-by: Hanjun Guo Acked-by: Lorenzo Pieralisi [Mark: restructure error handling] Signed-off-by: Mark Rutland --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 4b5c146fba97..31937249f8cc 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -599,6 +599,7 @@ int acpi_reconfig_notifier_unregister(struct notifier_block *nb); int acpi_gtdt_init(struct acpi_table_header *table, int *platform_timer_count); int acpi_gtdt_map_ppi(int type); bool acpi_gtdt_c3stop(int type); +int acpi_arch_timer_mem_init(struct arch_timer_mem *timer_mem, int *timer_count); #endif #else /* !CONFIG_ACPI */ -- cgit v1.2.3 From 314fe91b4a99949bb720501ba74d2228093bbf47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Apr 2017 12:13:57 +0200 Subject: block: remove blk_end_request_err and __blk_end_request_err Both functions are entirely unused. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fe9c512cc6fa..cca704c80b01 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1127,12 +1127,10 @@ extern bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, int error); extern bool blk_end_request_cur(struct request *rq, int error); -extern bool blk_end_request_err(struct request *rq, int error); extern bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void __blk_end_request_all(struct request *rq, int error); extern bool __blk_end_request_cur(struct request *rq, int error); -extern bool __blk_end_request_err(struct request *rq, int error); extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); -- cgit v1.2.3 From fa1a15c08e23cb89c5837915b1989909bce47456 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 12 Apr 2017 12:13:58 +0200 Subject: block: remove blk_end_request_cur This function is not used anywhere in the kernel. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cca704c80b01..5b52b3d7818c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1126,7 +1126,6 @@ extern void blk_finish_request(struct request *rq, int error); extern bool blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, int error); -extern bool blk_end_request_cur(struct request *rq, int error); extern bool __blk_end_request(struct request *rq, int error, unsigned int nr_bytes); extern void __blk_end_request_all(struct request *rq, int error); -- cgit v1.2.3 From 468d01bec544286bb5283f012b95b5b84636565b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 2 Feb 2017 11:40:15 -0800 Subject: types: Update obsolete callback_head comment The comment header for callback_head (and thus for rcu_head) states that the bottom two bits of a pointer to these structures must be zero. This is obsolete: The new requirement is that only the bottom bit need be zero. This commit therefore updates this comment. Signed-off-by: Paul E. McKenney --- include/linux/types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index 1e7bd24848fc..258099a4ed82 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -209,7 +209,7 @@ struct ustat { * naturally due ABI requirements, but some architectures (like CRIS) have * weird ABI and we need to ask it explicitly. * - * The alignment is required to guarantee that bits 0 and 1 of @next will be + * The alignment is required to guarantee that bit 0 of @next will be * clear under normal conditions -- as long as we use call_rcu(), * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. * -- cgit v1.2.3 From 48ac34666ff76843d8743db1cc78b303759916f1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 27 Feb 2017 21:14:19 +0200 Subject: hlist_add_tail_rcu disable sparse warning sparse is unhappy about this code in hlist_add_tail_rcu: struct hlist_node *i, *last = NULL; for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i)) last = i; This is because hlist_next_rcu and hlist_next_rcu return __rcu pointers. It's a false positive - it's a write side primitive and so does not need to be called in a read side critical section. The following trivial patch disables the warning without changing the behaviour in any way. Note: __hlist_for_each_rcu would also remove the warning but it would be confusing since it calls rcu_derefence and is designed to run in the rcu read side critical section. Signed-off-by: Michael S. Tsirkin Reviewed-by: Steven Rostedt (VMware) Signed-off-by: Paul E. McKenney --- include/linux/rculist.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 4f7a9561b8c4..b1fd8bf85fdc 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -509,7 +509,8 @@ static inline void hlist_add_tail_rcu(struct hlist_node *n, { struct hlist_node *i, *last = NULL; - for (i = hlist_first_rcu(h); i; i = hlist_next_rcu(i)) + /* Note: write side code, so rcu accessors are not needed. */ + for (i = h->first; i; i = i->next) last = i; if (last) { -- cgit v1.2.3 From 28c5fe99016d28f15d1b825df8acb1558a3a63a1 Mon Sep 17 00:00:00 2001 From: Felix Brack Date: Thu, 13 Apr 2017 09:51:38 +0200 Subject: leds: pca9532: Extend pca9532 device tree support This patch extends the device tree support for the pca9532 by adding the leds 'default-state' property. Signed-off-by: Felix Brack Signed-off-by: Jacek Anaszewski --- include/linux/leds-pca9532.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds-pca9532.h b/include/linux/leds-pca9532.h index d215b4561180..5e240b2b4d58 100644 --- a/include/linux/leds-pca9532.h +++ b/include/linux/leds-pca9532.h @@ -22,7 +22,8 @@ enum pca9532_state { PCA9532_OFF = 0x0, PCA9532_ON = 0x1, PCA9532_PWM0 = 0x2, - PCA9532_PWM1 = 0x3 + PCA9532_PWM1 = 0x3, + PCA9532_KEEP = 0xff, }; struct pca9532_led { @@ -44,4 +45,3 @@ struct pca9532_platform_data { }; #endif /* __LINUX_PCA9532_H */ - -- cgit v1.2.3 From 4a67c9fde04fc1b6752fa68c495310ca3ed29eeb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Fri, 31 Mar 2017 11:11:48 +0200 Subject: mtd: use dev_of_node helper in mtd_get_of_node MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This allows better compile-time optimizations with CONFIG_OF disabled. Signed-off-by: Rafał Miłecki Acked-by: Boris Brezillon Signed-off-by: Brian Norris --- include/linux/mtd/mtd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index eebdc63cf6af..f8db5b2e4028 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -393,7 +393,7 @@ static inline void mtd_set_of_node(struct mtd_info *mtd, static inline struct device_node *mtd_get_of_node(struct mtd_info *mtd) { - return mtd->dev.of_node; + return dev_of_node(&mtd->dev); } static inline int mtd_oobavail(struct mtd_info *mtd, struct mtd_oob_ops *ops) -- cgit v1.2.3 From cf9ea8ca4a0bea7eda12f8fb04dc34146839a215 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 19 Apr 2017 17:48:51 +0100 Subject: linux/io.h: Add pci_remap_cfgspace() interface The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and Posting") mandate non-posted configuration transactions. As further highlighted in the PCIe specifications (4.0 - Rev0.3, "Ordering Considerations for the Enhanced Configuration Access Mechanism"), through ECAM and ECAM-derivative configuration mechanism, the memory mapped transactions from the host CPU into Configuration Requests on the PCI express fabric may create ordering problems for software because writes to memory address are typically posted transactions (unless the architecture can enforce through virtual address mapping non-posted write transactions behaviour) but writes to Configuration Space are not posted on the PCI express fabric. Current DT and ACPI host bridge controllers map PCI configuration space (ECAM and ECAM-derivative) into the virtual address space through ioremap() calls, that are non-cacheable device accesses on most architectures, but may provide "bufferable" or "posted" write semantics in architecture like eg ARM/ARM64 that allow ioremap'ed regions writes to be buffered in the bus connecting the host CPU to the PCI fabric; this behaviour, as underlined in the PCIe specifications, may trigger transactions ordering rules and must be prevented. Introduce a new generic and explicit API to create a memory mapping for ECAM and ECAM-derivative config space area that defaults to ioremap_nocache() (which should provide a sane default behaviour) but still allowing architectures on which ioremap_nocache() results in posted write transactions to override the function call with an arch specific implementation that complies with the PCI specifications for configuration transactions. [bhelgaas: fold in #ifdef CONFIG_PCI wrapper] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Arnd Bergmann Cc: Will Deacon Cc: Russell King Cc: Catalin Marinas --- include/linux/io.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index 82ef36eac8a1..2195d9ea4aaa 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -90,6 +90,27 @@ void devm_memunmap(struct device *dev, void *addr); void *__devm_memremap_pages(struct device *dev, struct resource *res); +#ifdef CONFIG_PCI +/* + * The PCI specifications (Rev 3.0, 3.2.5 "Transaction Ordering and + * Posting") mandate non-posted configuration transactions. There is + * no ioremap API in the kernel that can guarantee non-posted write + * semantics across arches so provide a default implementation for + * mapping PCI config space that defaults to ioremap_nocache(); arches + * should override it if they have memory mapping implementations that + * guarantee non-posted writes semantics to make the memory mapping + * compliant with the PCI specification. + */ +#ifndef pci_remap_cfgspace +#define pci_remap_cfgspace pci_remap_cfgspace +static inline void __iomem *pci_remap_cfgspace(phys_addr_t offset, + size_t size) +{ + return ioremap_nocache(offset, size); +} +#endif +#endif + /* * Some systems do not have legacy ISA devices. * /dev/port is not a valid interface on these systems. -- cgit v1.2.3 From 8661423eea1a1b58417014716e3f1ba286072379 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 19 Apr 2017 14:02:08 +0200 Subject: ACPI / utils: Add new acpi_dev_present helper acpi_dev_found just iterates over all ACPI-ids and sees if one matches. This means that it will return true for devices which are in the DSDT but disabled (their _STA method returns 0). For some drivers it is useful to be able to check if a certain HID is not only present in the namespace, but also actually present as in acpi_device_is_present() will return true for the device. For example because if a certain device is present then the driver will want to use an extcon or IIO ADC channel provided by that device. This commit adds a new acpi_dev_present helper which drivers can use to this end. Like acpi_dev_found, acpi_dev_present take a HID as argument, but it also has 2 extra optional arguments to only check for an ACPI device with a specific UID and/or HRV value. This makes it more generic and allows it to replace custom code doing similar checks in several places. Arguably acpi_dev_present is what acpi_dev_found should have been, but there are too many users to just change acpi_dev_found without the risk of breaking something. Signed-off-by: Hans de Goede Reviewed-by: Lukas Wunner Reviewed-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9b05886f9773..841a8dc55ade 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -611,6 +611,11 @@ static inline bool acpi_dev_found(const char *hid) return false; } +static inline bool acpi_dev_present(const char *hid, const char *uid, s64 hrv) +{ + return false; +} + static inline bool is_acpi_node(struct fwnode_handle *fwnode) { return false; -- cgit v1.2.3 From 72058005411ffddcae6c06f7b691d635489132af Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 19 Apr 2017 15:14:31 -0700 Subject: dax: add a facility to lookup a dax device by 'host' device name For the current block_device based filesystem-dax path, we need a way for it to lookup the dax_device associated with a block_device. Add a 'host' property of a dax_device that can be used for this purpose. It is a free form string, but for a dax_device associated with a block device it is the bdev name. This is a stop-gap until filesystems are able to mount on a dax-inode directly. Signed-off-by: Dan Williams --- include/linux/dax.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 5b62f5d19aea..9b2d5ba10d7d 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -10,6 +10,7 @@ struct iomap_ops; int dax_read_lock(void); void dax_read_unlock(int id); +struct dax_device *dax_get_by_host(const char *host); /* * We use lowest available bit in exceptional entry for locking, one bit for -- cgit v1.2.3 From 6568b08b77816cda2a95919c7494108d983d5941 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 24 Jan 2017 18:44:18 -0800 Subject: dax: introduce dax_operations Track a set of dax_operations per dax_device that can be set at alloc_dax() time. These operations will be used to stop the abuse of block_device_operations for communicating dax capabilities to filesystems. It will also be used to replace the "pmem api" and move pmem-specific cache maintenance, and other dax-driver-specific filesystem-dax operations, to dax device methods. In particular this allows us to stop abusing __copy_user_nocache(), via memcpy_to_pmem(), with a driver specific replacement. This is a standalone introduction of the operations. Follow on patches convert each dax-driver and teach fs/dax.c to use ->direct_access() from dax_operations instead of block_device_operations. Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/dax.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 9b2d5ba10d7d..74ebb92b625a 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -7,6 +7,16 @@ #include struct iomap_ops; +struct dax_device; +struct dax_operations { + /* + * direct_access: translate a device-relative + * logical-page-offset into an absolute physical pfn. Return the + * number of pages available for DAX at that pfn. + */ + long (*direct_access)(struct dax_device *, pgoff_t, long, + void **, pfn_t *); +}; int dax_read_lock(void); void dax_read_unlock(int id); -- cgit v1.2.3 From c1d6e828a35df524df2af277eedd1471d05e4f4c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 24 Jan 2017 23:02:09 -0800 Subject: pmem: add dax_operations support Setup a dax_device to have the same lifetime as the pmem block device and add a ->direct_access() method that is equivalent to pmem_direct_access(). Once fs/dax.c has been converted to use dax_operations the old pmem_direct_access() will be removed. Signed-off-by: Dan Williams --- include/linux/dax.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 74ebb92b625a..39a0312c45c3 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -21,6 +21,12 @@ struct dax_operations { int dax_read_lock(void); void dax_read_unlock(int id); struct dax_device *dax_get_by_host(const char *host); +struct dax_device *alloc_dax(void *private, const char *host, + const struct dax_operations *ops); +void put_dax(struct dax_device *dax_dev); +bool dax_alive(struct dax_device *dax_dev); +void kill_dax(struct dax_device *dax_dev); +void *dax_get_private(struct dax_device *dax_dev); /* * We use lowest available bit in exceptional entry for locking, one bit for -- cgit v1.2.3 From da8d7f079b868ceab830309f80efc69d350576f3 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 19 Apr 2017 14:01:24 -0700 Subject: block: Export blk_init_request_from_bio() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Export this function such that it becomes available to block drivers. Signed-off-by: Bart Van Assche Reviewed-by: Christoph Hellwig Cc: Matias Bjørling Cc: Adam Manzanares Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5b52b3d7818c..3470375952a1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -924,6 +924,7 @@ extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern blk_qc_t generic_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); +extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, int, gfp_t); -- cgit v1.2.3 From 0be0dee64eacd950f8e4b6c45adb5a92392eaaaf Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 19 Apr 2017 14:01:27 -0700 Subject: block: Inline blk_rq_set_prio() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since only a single caller remains, inline blk_rq_set_prio(). Initialize req->ioprio even if no I/O priority has been set in the bio nor in the I/O context. Signed-off-by: Bart Van Assche Reviewed-by: Adam Manzanares Tested-by: Adam Manzanares Reviewed-by: Christoph Hellwig Cc: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3470375952a1..51c9e391798e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1087,20 +1087,6 @@ static inline unsigned int blk_rq_count_bios(struct request *rq) return nr_bios; } -/* - * blk_rq_set_prio - associate a request with prio from ioc - * @rq: request of interest - * @ioc: target iocontext - * - * Assocate request prio with ioc prio so request based drivers - * can leverage priority information. - */ -static inline void blk_rq_set_prio(struct request *rq, struct io_context *ioc) -{ - if (ioc) - rq->ioprio = ioc->ioprio; -} - /* * Request issue related functions. */ -- cgit v1.2.3 From f9b67f0014cba18f1aabb6fa9272335a043eb6fd Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Wed, 19 Apr 2017 13:36:10 -0600 Subject: dma-buf: Rename dma-ops to prevent conflict with kunmap_atomic macro Seeing the kunmap_atomic dma_buf_ops share the same name with a macro in highmem.h, the former can be aliased if any dma-buf user includes that header. I'm personally trying to include highmem.h inside scatterlist.h and this breaks the dma-buf code proper. Christoph Hellwig suggested [1] renaming it and pushing this patch ASAP. To maintain consistency I've renamed all four of kmap* and kunmap* to be map* and unmap*. (Even though only kmap_atomic presently conflicts.) [1] https://www.spinics.net/lists/target-devel/msg15070.html Signed-off-by: Logan Gunthorpe Reviewed-by: Sinclair Yeh Acked-by: Daniel Vetter Acked-by: Sumit Semwal Signed-off-by: Sumit Semwal Link: http://patchwork.freedesktop.org/patch/msgid/1492630570-879-1-git-send-email-logang@deltatee.com --- include/linux/dma-buf.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-buf.h b/include/linux/dma-buf.h index bfb3704fc6fc..79f27d60ec66 100644 --- a/include/linux/dma-buf.h +++ b/include/linux/dma-buf.h @@ -39,13 +39,13 @@ struct dma_buf_attachment; /** * struct dma_buf_ops - operations possible on struct dma_buf - * @kmap_atomic: maps a page from the buffer into kernel address - * space, users may not block until the subsequent unmap call. - * This callback must not sleep. - * @kunmap_atomic: [optional] unmaps a atomically mapped page from the buffer. - * This Callback must not sleep. - * @kmap: maps a page from the buffer into kernel address space. - * @kunmap: [optional] unmaps a page from the buffer. + * @map_atomic: maps a page from the buffer into kernel address + * space, users may not block until the subsequent unmap call. + * This callback must not sleep. + * @unmap_atomic: [optional] unmaps a atomically mapped page from the buffer. + * This Callback must not sleep. + * @map: maps a page from the buffer into kernel address space. + * @unmap: [optional] unmaps a page from the buffer. * @vmap: [optional] creates a virtual mapping for the buffer into kernel * address space. Same restrictions as for vmap and friends apply. * @vunmap: [optional] unmaps a vmap from the buffer @@ -206,10 +206,10 @@ struct dma_buf_ops { * to be restarted. */ int (*end_cpu_access)(struct dma_buf *, enum dma_data_direction); - void *(*kmap_atomic)(struct dma_buf *, unsigned long); - void (*kunmap_atomic)(struct dma_buf *, unsigned long, void *); - void *(*kmap)(struct dma_buf *, unsigned long); - void (*kunmap)(struct dma_buf *, unsigned long, void *); + void *(*map_atomic)(struct dma_buf *, unsigned long); + void (*unmap_atomic)(struct dma_buf *, unsigned long, void *); + void *(*map)(struct dma_buf *, unsigned long); + void (*unmap)(struct dma_buf *, unsigned long, void *); /** * @mmap: -- cgit v1.2.3 From 0773cea37470f8e080c510fe720fc356cf35df3a Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Tue, 18 Apr 2017 16:30:37 -0700 Subject: clocksource: Use GENMASK_ULL in definition of CLOCKSOURCE_MASK Besides reusing existing code this removes the special case handling for 64-bit masks, which causes clang to raise a shift count overflow warning due to https://bugs.llvm.org//show_bug.cgi?id=10030. Suggested-by: Dmitry Torokhov Signed-off-by: Matthias Kaehlcke Cc: Grant Grundler Cc: Greg Hackmann Cc: Michael Davidson Cc: John Stultz Link: http://lkml.kernel.org/r/20170418233037.70990-1-mka@chromium.org Signed-off-by: Thomas Gleixner --- include/linux/clocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index cfc75848a35d..f2b10d9ebd04 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -120,7 +120,7 @@ struct clocksource { #define CLOCK_SOURCE_RESELECT 0x100 /* simplify initialization of mask field */ -#define CLOCKSOURCE_MASK(bits) (u64)((bits) < 64 ? ((1ULL<<(bits))-1) : -1) +#define CLOCKSOURCE_MASK(bits) GENMASK_ULL((bits) - 1, 0) static inline u32 clocksource_freq2mult(u32 freq, u32 shift_constant, u64 from) { -- cgit v1.2.3 From 58bb100a9de10329ca0d63484e76f27c257e9a2e Mon Sep 17 00:00:00 2001 From: Matt Redfearn Date: Wed, 19 Apr 2017 13:26:46 +0100 Subject: Clocksource/mips-gic: Remove redundant non devicetree init Malta was the only platform probing this driver from platform code without using device tree. With that code removed, gic_clocksource_init is redundant so remove it. Signed-off-by: Matt Redfearn Cc: linux-mips@linux-mips.org Cc: Jason Cooper Cc: Paul Burton Cc: Daniel Lezcano Cc: Ralf Baechle Link: http://lkml.kernel.org/r/1492604806-23420-2-git-send-email-matt.redfearn@imgtec.com Signed-off-by: Thomas Gleixner --- include/linux/irqchip/mips-gic.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 7b49c71c968b..2b0e56619e53 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -258,7 +258,6 @@ extern unsigned int gic_present; extern void gic_init(unsigned long gic_base_addr, unsigned long gic_addrspace_size, unsigned int cpu_vec, unsigned int irqbase); -extern void gic_clocksource_init(unsigned int); extern u64 gic_read_count(void); extern unsigned int gic_get_count_width(void); extern u64 gic_read_compare(void); -- cgit v1.2.3 From 49e0b4658fe6aab5bf6bfe0738a86c1895930ad1 Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 19 Apr 2017 18:21:00 +0530 Subject: kprobes: Convert kprobe_lookup_name() to a function The macro is now pretty long and ugly on powerpc. In the light of further changes needed here, convert it to a __weak variant to be over-ridden with a nicer looking function. Suggested-by: Masami Hiramatsu Acked-by: Masami Hiramatsu Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- include/linux/kprobes.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index c328e4f7dcad..16f153c84646 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -379,6 +379,7 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) return this_cpu_ptr(&kprobe_ctlblk); } +kprobe_opcode_t *kprobe_lookup_name(const char *name); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); -- cgit v1.2.3 From 290e3070762ac80e5fc4087d8c4de7e3f1d90aca Mon Sep 17 00:00:00 2001 From: "Naveen N. Rao" Date: Wed, 19 Apr 2017 18:21:01 +0530 Subject: powerpc/kprobes: Fix handling of function offsets on ABIv2 commit 239aeba76409 ("perf powerpc: Fix kprobe and kretprobe handling with kallsyms on ppc64le") changed how we use the offset field in struct kprobe on ABIv2. perf now offsets from the global entry point if an offset is specified and otherwise chooses the local entry point. Fix the same in kernel for kprobe API users. We do this by extending kprobe_lookup_name() to accept an additional parameter to indicate the offset specified with the kprobe registration. If offset is 0, we return the local function entry and return the global entry point otherwise. With: # cd /sys/kernel/debug/tracing/ # echo "p _do_fork" >> kprobe_events # echo "p _do_fork+0x10" >> kprobe_events before this patch: # cat ../kprobes/list c0000000000d0748 k _do_fork+0x8 [DISABLED] c0000000000d0758 k _do_fork+0x18 [DISABLED] c0000000000412b0 k kretprobe_trampoline+0x0 [OPTIMIZED] and after: # cat ../kprobes/list c0000000000d04c8 k _do_fork+0x8 [DISABLED] c0000000000d04d0 k _do_fork+0x10 [DISABLED] c0000000000412b0 k kretprobe_trampoline+0x0 [OPTIMIZED] Acked-by: Ananth N Mavinakayanahalli Signed-off-by: Naveen N. Rao Signed-off-by: Michael Ellerman --- include/linux/kprobes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 16f153c84646..1f82a3db00b1 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -379,7 +379,7 @@ static inline struct kprobe_ctlblk *get_kprobe_ctlblk(void) return this_cpu_ptr(&kprobe_ctlblk); } -kprobe_opcode_t *kprobe_lookup_name(const char *name); +kprobe_opcode_t *kprobe_lookup_name(const char *name, unsigned int offset); int register_kprobe(struct kprobe *p); void unregister_kprobe(struct kprobe *p); int register_kprobes(struct kprobe **kps, int num); -- cgit v1.2.3 From f66e225828c1b046c7db1db65b0dd2d135f6a2da Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 12 Apr 2017 13:25:58 +0100 Subject: PCI: Add BAR index argument to pci_mmap_page_range() In all cases we know which BAR it is. Passing it in means that arch code (or generic code; watch this space) won't have to go looking for it again. Signed-off-by: David Woodhouse Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 590cfcf6acf5..7173a677d6dd 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1630,7 +1630,8 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; } * Architectures provide this function if they set HAVE_PCI_MMAP, and * it accepts the 'write_combine' argument when arch_can_pci_mmap_wc() * evaluates to nonzero. */ -int pci_mmap_page_range(struct pci_dev *pdev, struct vm_area_struct *vma, +int pci_mmap_page_range(struct pci_dev *pdev, int bar, + struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); #ifndef arch_can_pci_mmap_wc -- cgit v1.2.3 From f719582435afe9c7985206e42d804ea6aa315d33 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 12 Apr 2017 13:25:59 +0100 Subject: PCI: Add pci_mmap_resource_range() and use it for ARM64 Starting to leave behind the legacy of the pci_mmap_page_range() interface which takes "user-visible" BAR addresses. This takes just the resource and offset. For now, both APIs coexist and depending on the platform, one is implemented as a wrapper around the other. Signed-off-by: David Woodhouse Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 7173a677d6dd..98a72abcf361 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1626,10 +1626,21 @@ static inline int pci_get_new_domain_nr(void) { return -ENOSYS; } #include -/* Map a range of PCI memory or I/O space for a device into user space. - * Architectures provide this function if they set HAVE_PCI_MMAP, and - * it accepts the 'write_combine' argument when arch_can_pci_mmap_wc() - * evaluates to nonzero. */ +/* These two functions provide almost identical functionality. Depennding + * on the architecture, one will be implemented as a wrapper around the + * other (in drivers/pci/mmap.c). + * + * pci_mmap_resource_range() maps a specific BAR, and vm->vm_pgoff + * is expected to be an offset within that region. + * + * pci_mmap_page_range() is the legacy architecture-specific interface, + * which accepts a "user visible" resource address converted by + * pci_resource_to_user(), as used in the legacy mmap() interface in + * /proc/bus/pci/. + */ +int pci_mmap_resource_range(struct pci_dev *dev, int bar, + struct vm_area_struct *vma, + enum pci_mmap_state mmap_state, int write_combine); int pci_mmap_page_range(struct pci_dev *pdev, int bar, struct vm_area_struct *vma, enum pci_mmap_state mmap_state, int write_combine); -- cgit v1.2.3 From 2bea36fd1af440e1853e431459a0bf929266cd52 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 12 Apr 2017 13:26:08 +0100 Subject: PCI: Add I/O BAR support to generic pci_mmap_resource_range() This will need to call into an arch-provided pci_iobar_pfn() function. Signed-off-by: David Woodhouse Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 98a72abcf361..9f302444d4ac 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1648,8 +1648,12 @@ int pci_mmap_page_range(struct pci_dev *pdev, int bar, #ifndef arch_can_pci_mmap_wc #define arch_can_pci_mmap_wc() 0 #endif + #ifndef arch_can_pci_mmap_io #define arch_can_pci_mmap_io() 0 +#define pci_iobar_pfn(pdev, bar, vma) (-EINVAL) +#else +int pci_iobar_pfn(struct pci_dev *pdev, int bar, struct vm_area_struct *vma); #endif #ifndef pci_root_bus_fwnode -- cgit v1.2.3 From a60a2b73ba69abca26653fff157b0fd8947bc498 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 14 Apr 2017 21:11:25 +0200 Subject: PCI: Export pcie_flr() Currently we opencode the FLR sequence in lots of place; export a core helper instead. We split out the probing for FLR support as all the non-core callers already know their hardware. Note that in the new pci_has_flr() function the quirk check has been moved before the capability check as there is no point in reading the capability in this case. Signed-off-by: Christoph Hellwig Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 22cad2c66d59..a9ff99c91601 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1054,6 +1054,7 @@ int pcie_get_mps(struct pci_dev *dev); int pcie_set_mps(struct pci_dev *dev, int mps); int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, enum pcie_link_width *width); +void pcie_flr(struct pci_dev *dev); int __pci_reset_function(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); -- cgit v1.2.3 From 3f1866779cf8338e1c8bd32e5f6f5424795ef191 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 10 Apr 2017 16:50:58 +0530 Subject: of: dma: Make of_dma_deconfigure() public As part of moving DMA initializing to probe time the of_dma_deconfigure() function will need to be called from different source files. Make it public and move it to drivers/of/device.c where the of_dma_configure() function is. Tested-by: Marek Szyprowski Reviewed-by: Robin Murphy Acked-by: Rob Herring Signed-off-by: Laurent Pinchart Signed-off-by: Joerg Roedel --- include/linux/of_device.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index c12dace043f3..af984551cc2b 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -56,6 +56,7 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) } void of_dma_configure(struct device *dev, struct device_node *np); +void of_dma_deconfigure(struct device *dev); #else /* CONFIG_OF */ static inline int of_driver_match_device(struct device *dev, @@ -105,6 +106,8 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) } static inline void of_dma_configure(struct device *dev, struct device_node *np) {} +static inline void of_dma_deconfigure(struct device *dev) +{} #endif /* CONFIG_OF */ #endif /* _LINUX_OF_DEVICE_H */ -- cgit v1.2.3 From 09515ef5ddad71c7820e5e428da418b709feeb26 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 10 Apr 2017 16:51:01 +0530 Subject: of/acpi: Configure dma operations at probe time for platform/amba/pci bus devices Configuring DMA ops at probe time will allow deferring device probe when the IOMMU isn't available yet. The dma_configure for the device is now called from the generic device_attach callback just before the bus/driver probe is called. This way, configuring the DMA ops for the device would be called at the same place for all bus_types, hence the deferred probing mechanism should work for all buses as well. pci_bus_add_devices (platform/amba)(_device_create/driver_register) | | pci_bus_add_device (device_add/driver_register) | | device_attach device_initial_probe | | __device_attach_driver __device_attach_driver | driver_probe_device | really_probe | dma_configure Similarly on the device/driver_unregister path __device_release_driver is called which inturn calls dma_deconfigure. This patch changes the dma ops configuration to probe time for both OF and ACPI based platform/amba/pci bus devices. Tested-by: Marek Szyprowski Tested-by: Hanjun Guo Reviewed-by: Robin Murphy Acked-by: Rob Herring Acked-by: Bjorn Helgaas (drivers/pci part) Acked-by: Rafael J. Wysocki Signed-off-by: Sricharan R Signed-off-by: Joerg Roedel --- include/linux/dma-mapping.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 0977317c6835..4f3eecedca2d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -728,6 +728,18 @@ dma_mark_declared_memory_occupied(struct device *dev, } #endif /* CONFIG_HAVE_GENERIC_DMA_COHERENT */ +#ifdef CONFIG_HAS_DMA +int dma_configure(struct device *dev); +void dma_deconfigure(struct device *dev); +#else +static inline int dma_configure(struct device *dev) +{ + return 0; +} + +static inline void dma_deconfigure(struct device *dev) {} +#endif + /* * Managed DMA API */ -- cgit v1.2.3 From 7b07cbefb68d486febf47e13b570fed53d9296b4 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Mon, 10 Apr 2017 16:51:02 +0530 Subject: iommu: of: Handle IOMMU lookup failure with deferred probing or error Failures to look up an IOMMU when parsing the DT iommus property need to be handled separately from the .of_xlate() failures to support deferred probing. The lack of a registered IOMMU can be caused by the lack of a driver for the IOMMU, the IOMMU device probe not having been performed yet, having been deferred, or having failed. The first case occurs when the device tree describes the bus master and IOMMU topology correctly but no device driver exists for the IOMMU yet or the device driver has not been compiled in. Return NULL, the caller will configure the device without an IOMMU. The second and third cases are handled by deferring the probe of the bus master device which will eventually get reprobed after the IOMMU. The last case is currently handled by deferring the probe of the bus master device as well. A mechanism to either configure the bus master device without an IOMMU or to fail the bus master device probe depending on whether the IOMMU is optional or mandatory would be a good enhancement. Tested-by: Marek Szyprowski Reviewed-by: Robin Murphy Acked-by: Rob Herring Signed-off-by: Laurent Pichart Signed-off-by: Sricharan R Signed-off-by: Joerg Roedel --- include/linux/of_device.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index af984551cc2b..2cacdd81062e 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -55,7 +55,7 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) return of_node_get(cpu_dev->of_node); } -void of_dma_configure(struct device *dev, struct device_node *np); +int of_dma_configure(struct device *dev, struct device_node *np); void of_dma_deconfigure(struct device *dev); #else /* CONFIG_OF */ @@ -104,8 +104,11 @@ static inline struct device_node *of_cpu_device_node_get(int cpu) { return NULL; } -static inline void of_dma_configure(struct device *dev, struct device_node *np) -{} + +static inline int of_dma_configure(struct device *dev, struct device_node *np) +{ + return 0; +} static inline void of_dma_deconfigure(struct device *dev) {} #endif /* CONFIG_OF */ -- cgit v1.2.3 From 5a1bb638d5677053c7addcb228b56da6fccb5d68 Mon Sep 17 00:00:00 2001 From: Sricharan R Date: Mon, 10 Apr 2017 16:51:03 +0530 Subject: drivers: acpi: Handle IOMMU lookup failure with deferred probing or error This is an equivalent to the DT's handling of the iommu master's probe with deferred probing when the corrsponding iommu is not probed yet. The lack of a registered IOMMU can be caused by the lack of a driver for the IOMMU, the IOMMU device probe not having been performed yet, having been deferred, or having failed. The first case occurs when the firmware describes the bus master and IOMMU topology correctly but no device driver exists for the IOMMU yet or the device driver has not been compiled in. Return NULL, the caller will configure the device without an IOMMU. The second and third cases are handled by deferring the probe of the bus master device which will eventually get reprobed after the IOMMU. The last case is currently handled by deferring the probe of the bus master device as well. A mechanism to either configure the bus master device without an IOMMU or to fail the bus master device probe depending on whether the IOMMU is optional or mandatory would be a good enhancement. Tested-by: Hanjun Guo Reviewed-by: Robin Murphy [Lorenzo: Added fixes for dma_coherent_mask overflow, acpi_dma_configure called multiple times for same device] Signed-off-by: Lorenzo Pieralisi Signed-off-by: Sricharan R Signed-off-by: Joerg Roedel --- include/linux/acpi.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 9b05886f9773..79d06ef654c9 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -762,8 +762,11 @@ static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) return DEV_DMA_NOT_SUPPORTED; } -static inline void acpi_dma_configure(struct device *dev, - enum dev_dma_attr attr) { } +static inline int acpi_dma_configure(struct device *dev, + enum dev_dma_attr attr) +{ + return 0; +} static inline void acpi_dma_deconfigure(struct device *dev) { } -- cgit v1.2.3 From 316ca8804ea84a782d5ba2163711ebb22116ff5a Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Mon, 10 Apr 2017 16:51:06 +0530 Subject: ACPI/IORT: Remove linker section for IORT entries probing The IORT linker section introduced by commit 34ceea275f62 ("ACPI/IORT: Introduce linker section for IORT entries probing") was needed to make sure SMMU drivers are registered (and therefore probed) in the kernel before devices using the SMMU have a chance to probe in turn. Through the introduction of deferred IOMMU configuration the linker section based IORT probing infrastructure is not needed any longer, in that device/SMMU probe dependencies are managed through the probe deferral mechanism, making the IORT linker section infrastructure unused, so that it can be removed. Remove the unused IORT linker section probing infrastructure from the kernel to complete the ACPI IORT IOMMU configure probe deferral mechanism implementation. Tested-by: Hanjun Guo Reviewed-by: Robin Murphy Signed-off-by: Lorenzo Pieralisi Cc: Sricharan R Signed-off-by: Joerg Roedel --- include/linux/acpi_iort.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 77e08099e554..f167e1d045ff 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -52,7 +52,4 @@ const struct iommu_ops *iort_iommu_configure(struct device *dev) { return NULL; } #endif -#define IORT_ACPI_DECLARE(name, table_id, fn) \ - ACPI_DECLARE_PROBE_ENTRY(iort, name, table_id, 0, NULL, 0, fn) - #endif /* __ACPI_IORT_H__ */ -- cgit v1.2.3 From 49a57ef7f8492ef985ee1ecdb927ca78a6b2f308 Mon Sep 17 00:00:00 2001 From: Suman Anna Date: Wed, 12 Apr 2017 00:21:27 -0500 Subject: iommu/omap: Drop legacy-style device support All the supported boards that have OMAP IOMMU devices do support DT boot only now. So, drop the support for the non-DT legacy-style devices from the OMAP IOMMU driver. Couple of the fields from the iommu platform data would no longer be required, so they have also been cleaned up. The IOMMU platform data is still needed though for performing reset management properly in a multi-arch environment. Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- include/linux/platform_data/iommu-omap.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index 0496d171700a..a40fc0f4f9de 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -30,10 +30,7 @@ struct omap_iommu_arch_data { }; struct iommu_platform_data { - const char *name; const char *reset_name; - int nr_tlb_entries; - int (*assert_reset)(struct platform_device *pdev, const char *name); int (*deassert_reset)(struct platform_device *pdev, const char *name); }; -- cgit v1.2.3 From e73b7afe4e8ca5ec4304a9e1d5009755a85fff91 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 12 Apr 2017 00:21:28 -0500 Subject: iommu/omap: Move data structures to omap-iommu.h The internal data-structures are scattered over various header and C files. Consolidate them in omap-iommu.h. While at this, add the kerneldoc comment for the missing iommu domain variable and revise the iommu_arch_data name. Signed-off-by: Joerg Roedel [s-anna@ti.com: revise kerneldoc comments] Signed-off-by: Suman Anna Signed-off-by: Joerg Roedel --- include/linux/platform_data/iommu-omap.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/iommu-omap.h b/include/linux/platform_data/iommu-omap.h index a40fc0f4f9de..e8b12dbf6170 100644 --- a/include/linux/platform_data/iommu-omap.h +++ b/include/linux/platform_data/iommu-omap.h @@ -12,23 +12,6 @@ #include -#define MMU_REG_SIZE 256 - -/** - * struct iommu_arch_data - omap iommu private data - * @name: name of the iommu device - * @iommu_dev: handle of the iommu device - * - * This is an omap iommu private data object, which binds an iommu user - * to its iommu device. This object should be placed at the iommu user's - * dev_archdata so generic IOMMU API can be used without having to - * utilize omap-specific plumbing anymore. - */ -struct omap_iommu_arch_data { - const char *name; - struct omap_iommu *iommu_dev; -}; - struct iommu_platform_data { const char *reset_name; int (*assert_reset)(struct platform_device *pdev, const char *name); -- cgit v1.2.3 From 518662e0fcb9fa241fe90a337b59bc5066b2a930 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Mon, 10 Apr 2017 12:22:09 +1000 Subject: NFS: fix usage of mempools. When passed GFP flags that allow sleeping (such as GFP_NOIO), mempool_alloc() will never return NULL, it will wait until memory is available. This means that we don't need to handle failure, but that we do need to ensure one thread doesn't call mempool_alloc() twice on the one pool without queuing or freeing the first allocation. If multiple threads did this during times of high memory pressure, the pool could be exhausted and a deadlock could result. pnfs_generic_alloc_ds_commits() attempts to allocate from the nfs_commit_mempool while already holding an allocation from that pool. This is not safe. So change nfs_commitdata_alloc() to take a flag that indicates whether failure is acceptable. In pnfs_generic_alloc_ds_commits(), accept failure and handle it as we currently do. Else where, do not accept failure, and do not handle it. Even when failure is acceptable, we want to succeed if possible. That means both - using an entry from the pool if there is one - waiting for direct reclaim is there isn't. We call mempool_alloc(GFP_NOWAIT) to achieve the first, then kmem_cache_alloc(GFP_NOIO|__GFP_NORETRY) to achieve the second. Each of these can fail, but together they do the best they can without blocking indefinitely. The objects returned by kmem_cache_alloc() will still be freed by mempool_free(). This is safe as mempool_alloc() uses exactly the same function to allocate objects (since the mempool was created with mempool_create_slab_pool()). The object returned by mempool_alloc() and kmem_cache_alloc() are indistinguishable so mempool_free() will handle both identically, either adding to the pool or calling kmem_cache_free(). Also, don't test for failure when allocating from nfs_wdata_mempool. Signed-off-by: NeilBrown Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 287f34161086..1b29915247b2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -502,7 +502,7 @@ extern int nfs_wb_all(struct inode *inode); extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); -extern struct nfs_commit_data *nfs_commitdata_alloc(void); +extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); static inline int -- cgit v1.2.3 From fbe77c30e9abcb3429380dec622439991a718e31 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Wed, 19 Apr 2017 10:11:35 -0400 Subject: NFS: move rw_mode to nfs_pageio_header Let's try to have it in a cacheline in nfs4_proc_pgio_rpc_prepare(). Signed-off-by: Benjamin Coddington Signed-off-by: Trond Myklebust --- include/linux/nfs_page.h | 4 ++-- include/linux/nfs_xdr.h | 1 + 2 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 957049f72290..6f01e28bba27 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -64,7 +64,6 @@ struct nfs_pageio_ops { }; struct nfs_rw_ops { - const fmode_t rw_mode; struct nfs_pgio_header *(*rw_alloc_header)(void); void (*rw_free_header)(struct nfs_pgio_header *); int (*rw_done)(struct rpc_task *, struct nfs_pgio_header *, @@ -124,7 +123,8 @@ extern void nfs_pageio_init(struct nfs_pageio_descriptor *desc, const struct nfs_pgio_completion_ops *compl_ops, const struct nfs_rw_ops *rw_ops, size_t bsize, - int how); + int how, + gfp_t gfp_flags); extern int nfs_pageio_add_request(struct nfs_pageio_descriptor *, struct nfs_page *); extern int nfs_pageio_resend(struct nfs_pageio_descriptor *, diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 348f7c158084..51e27f9746ee 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1427,6 +1427,7 @@ struct nfs_pgio_header { struct list_head pages; struct nfs_page *req; struct nfs_writeverf verf; /* Used for writes */ + fmode_t rw_mode; struct pnfs_layout_segment *lseg; loff_t io_start; const struct rpc_call_ops *mds_ops; -- cgit v1.2.3 From baf7a616d537f577d33b7d9986f40532e2bd9f66 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 12 Apr 2017 12:24:25 +0200 Subject: bdi: Provide bdi_register_va() and bdi_alloc() Add function that registers bdi and takes va_list instead of variable number of arguments. Add bdi_alloc() as simple wrapper for NUMA-unaware users allocating BDI. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index c52a48cb9a66..47a98e6e2a65 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -30,6 +30,8 @@ void bdi_put(struct backing_dev_info *bdi); __printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); +int bdi_register_va(struct backing_dev_info *bdi, struct device *parent, + const char *fmt, va_list args); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner); void bdi_unregister(struct backing_dev_info *bdi); @@ -37,6 +39,10 @@ void bdi_unregister(struct backing_dev_info *bdi); int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); void bdi_destroy(struct backing_dev_info *bdi); struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id); +static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask) +{ + return bdi_alloc_node(gfp_mask, NUMA_NO_NODE); +} void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, bool range_cyclic, enum wb_reason reason); -- cgit v1.2.3 From fca39346a55bb7196888ffc77d9e3557340d1d0b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 12 Apr 2017 12:24:28 +0200 Subject: fs: Provide infrastructure for dynamic BDIs in filesystems Provide helper functions for setting up dynamically allocated backing_dev_info structures for filesystems and cleaning them up on superblock destruction. CC: linux-mtd@lists.infradead.org CC: linux-nfs@vger.kernel.org CC: Petr Vandrovec CC: linux-nilfs@vger.kernel.org CC: cluster-devel@redhat.com CC: osd-dev@open-osd.org CC: codalist@coda.cs.cmu.edu CC: linux-afs@lists.infradead.org CC: ecryptfs@vger.kernel.org CC: linux-cifs@vger.kernel.org CC: ceph-devel@vger.kernel.org CC: linux-btrfs@vger.kernel.org CC: v9fs-developer@lists.sourceforge.net CC: lustre-devel@lists.lustre.org Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 2 +- include/linux/fs.h | 6 ++++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index e66d4722db8e..866c433e7d32 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -146,7 +146,7 @@ struct backing_dev_info { congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ - char *name; + const char *name; struct kref refcnt; /* Reference counter for the structure */ unsigned int capabilities; /* Device capabilities */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 7251f7bb45e8..98cf14ea78c0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1272,6 +1272,9 @@ struct mm_struct; /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ +/* Temporary flag until all filesystems are converted to dynamic bdis */ +#define SB_I_DYNBDI 0x00000100 + /* Possible states of 'frozen' field */ enum { SB_UNFROZEN = 0, /* FS is unfrozen */ @@ -2121,6 +2124,9 @@ extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); +extern __printf(2, 3) +int super_setup_bdi_name(struct super_block *sb, char *fmt, ...); +extern int super_setup_bdi(struct super_block *sb); extern int current_umask(void); -- cgit v1.2.3 From fa06052d637bf3a76f18cd2304048b866af4096e Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 12 Apr 2017 12:24:37 +0200 Subject: mtd: Convert to dynamically allocated bdi infrastructure MTD already allocates backing_dev_info dynamically. Convert it to use generic infrastructure for this including proper refcounting. We drop mtd->backing_dev_info as its only use was to pass mtd_bdi pointer from one file into another and if we wanted to keep that in a clean way, we'd have to make mtd hold and drop bdi reference as needed which seems pointless for passing one global pointer... CC: David Woodhouse CC: Brian Norris CC: linux-mtd@lists.infradead.org Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/mtd/mtd.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index eebdc63cf6af..79b176eca04a 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -334,11 +334,6 @@ struct mtd_info { int (*_get_device) (struct mtd_info *mtd); void (*_put_device) (struct mtd_info *mtd); - /* Backing device capabilities for this device - * - provides mmap capabilities - */ - struct backing_dev_info *backing_dev_info; - struct notifier_block reboot_notifier; /* default mode before reboot */ /* ECC status information */ -- cgit v1.2.3 From a5695a79088653c73c92ae8d48658cbc49f31884 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 12 Apr 2017 12:24:38 +0200 Subject: coda: Convert to separately allocated bdi Allocate struct backing_dev_info separately instead of embedding it inside the superblock. This unifies handling of bdi among users. CC: Jan Harkes CC: coda@cs.cmu.edu CC: codalist@coda.cs.cmu.edu Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/coda_psdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/coda_psdev.h b/include/linux/coda_psdev.h index 5b8721efa948..31e4e1f1547c 100644 --- a/include/linux/coda_psdev.h +++ b/include/linux/coda_psdev.h @@ -15,7 +15,6 @@ struct venus_comm { struct list_head vc_processing; int vc_inuse; struct super_block *vc_sb; - struct backing_dev_info bdi; struct mutex vc_mutex; }; -- cgit v1.2.3 From 0db10944a76ba09f37d43b99d0fe085a18307f22 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 12 Apr 2017 12:24:45 +0200 Subject: nfs: Convert to separately allocated bdi Allocate struct backing_dev_info separately instead of embedding it inside the superblock. This unifies handling of bdi among users. CC: Anna Schumaker CC: linux-nfs@vger.kernel.org Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Acked-by: Trond Myklebust Signed-off-by: Jens Axboe --- include/linux/nfs_fs_sb.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b34097c67848..e1502c55741e 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -133,7 +133,6 @@ struct nfs_server { struct rpc_clnt * client_acl; /* ACL RPC client handle */ struct nlm_host *nlm_host; /* NLM client handle */ struct nfs_iostats __percpu *io_stats; /* I/O statistics */ - struct backing_dev_info backing_dev_info; atomic_long_t writeback; /* number of writeback pages */ int flags; /* various flags */ unsigned int caps; /* server capabilities */ -- cgit v1.2.3 From c1844d536dafa5f2cddf4b4841a3634f80a27666 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 12 Apr 2017 12:24:47 +0200 Subject: fs: Remove SB_I_DYNBDI flag Now that all bdi structures filesystems use are properly refcounted, we can remove the SB_I_DYNBDI flag. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/fs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 98cf14ea78c0..30e5c14bd743 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1272,9 +1272,6 @@ struct mm_struct; /* sb->s_iflags to limit user namespace mounts */ #define SB_I_USERNS_VISIBLE 0x00000010 /* fstype already mounted */ -/* Temporary flag until all filesystems are converted to dynamic bdis */ -#define SB_I_DYNBDI 0x00000100 - /* Possible states of 'frozen' field */ enum { SB_UNFROZEN = 0, /* FS is unfrozen */ -- cgit v1.2.3 From 2e82b84c01d9438d86079980e22e036eee71e754 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 12 Apr 2017 12:24:48 +0200 Subject: block: Remove unused functions Now that all backing_dev_info structure are allocated separately, we can drop some unused functions. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 47a98e6e2a65..aaeb2ec5d33c 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -17,8 +17,6 @@ #include #include -int __must_check bdi_init(struct backing_dev_info *bdi); - static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) { kref_get(&bdi->refcnt); @@ -32,12 +30,9 @@ int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_va(struct backing_dev_info *bdi, struct device *parent, const char *fmt, va_list args); -int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner); void bdi_unregister(struct backing_dev_info *bdi); -int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); -void bdi_destroy(struct backing_dev_info *bdi); struct backing_dev_info *bdi_alloc_node(gfp_t gfp_mask, int node_id); static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask) { -- cgit v1.2.3 From 7c4cc30024946dae9530cd6dc0d8d4eb40fca173 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 12 Apr 2017 12:24:49 +0200 Subject: bdi: Drop 'parent' argument from bdi_register[_va]() Drop 'parent' argument of bdi_register() and bdi_register_va(). It is always NULL. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index aaeb2ec5d33c..557d84063934 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -25,11 +25,10 @@ static inline struct backing_dev_info *bdi_get(struct backing_dev_info *bdi) void bdi_put(struct backing_dev_info *bdi); -__printf(3, 4) -int bdi_register(struct backing_dev_info *bdi, struct device *parent, - const char *fmt, ...); -int bdi_register_va(struct backing_dev_info *bdi, struct device *parent, - const char *fmt, va_list args); +__printf(2, 3) +int bdi_register(struct backing_dev_info *bdi, const char *fmt, ...); +int bdi_register_va(struct backing_dev_info *bdi, const char *fmt, + va_list args); int bdi_register_owner(struct backing_dev_info *bdi, struct device *owner); void bdi_unregister(struct backing_dev_info *bdi); -- cgit v1.2.3 From b7819b9259185dcdcc81eb32182a4dc13d695738 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Apr 2017 16:02:55 +0200 Subject: block: remove the blk_execute_rq return value The function only returns -EIO if rq->errors is non-zero, which is not very useful and lets a large number of callers ignore the return value. Just let the callers figure out their error themselves. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 51c9e391798e..e2064ed3c703 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -970,7 +970,7 @@ extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, uns extern int blk_rq_map_user_iov(struct request_queue *, struct request *, struct rq_map_data *, const struct iov_iter *, gfp_t); -extern int blk_execute_rq(struct request_queue *, struct gendisk *, +extern void blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); -- cgit v1.2.3 From 17d5363b83f8c73ef9109f75a4a9b578f31d842f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Apr 2017 16:03:01 +0200 Subject: scsi: introduce a result field in struct scsi_request This passes on the scsi_cmnd result field to users of passthrough requests. Currently we abuse req->errors for this purpose, but that field will go away in its current form. Note that the old IDE code abuses the errors field in very creative ways and stores all kinds of different values in it. I didn't dare to touch this magic, so the abuses are brought forward 1:1. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 2f51c1724b5a..6980ca322074 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -88,7 +88,7 @@ static inline bool ata_pm_request(struct request *rq) ide_req(rq)->type == ATA_PRIV_PM_RESUME); } -/* Error codes returned in rq->errors to the higher part of the driver. */ +/* Error codes returned in result to the higher part of the driver. */ enum { IDE_DRV_ERROR_GENERAL = 101, IDE_DRV_ERROR_FILEMARK = 102, -- cgit v1.2.3 From 08e0029aa2a4acdd365613ce88a1184e5351a8a1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Apr 2017 16:03:09 +0200 Subject: blk-mq: remove the error argument to blk_mq_complete_request Now that all drivers that call blk_mq_complete_requests have a ->complete callback we can remove the direct call to blk_mq_end_request, as well as the error argument to blk_mq_complete_request. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d75de612845d..0c4dadb85f62 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -228,7 +228,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_abort_requeue_list(struct request_queue *q); -void blk_mq_complete_request(struct request *rq, int error); +void blk_mq_complete_request(struct request *rq); bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); -- cgit v1.2.3 From e26738e037f34aedfe05e412f442833f44f4a6e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Apr 2017 16:03:11 +0200 Subject: block: add a error_count field to struct request This is for the legacy floppy and ataflop drivers that currently abuse ->errors for this purpose. It's stashed away in a union to not grow the struct size, the other fields are either used by modern drivers for different purposes or the I/O scheduler before queing the I/O to drivers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e2064ed3c703..a3dcee624de3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -175,6 +175,7 @@ struct request { struct rb_node rb_node; /* sort/lookup */ struct bio_vec special_vec; void *completion_data; + int error_count; /* for legacy drivers, don't use */ }; /* -- cgit v1.2.3 From caf7df12272118e0274c8353bcfeaf60c7743a47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 20 Apr 2017 16:03:16 +0200 Subject: block: remove the errors field from struct request MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Acked-by: Roger Pau Monné Reviewed-by: Konrad Rzeszutek Wilk Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a3dcee624de3..6c4ab0d4a160 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -220,8 +220,6 @@ struct request { void *special; /* opaque pointer available for LLD use */ - int errors; - unsigned int extra_len; /* length of alignment and padding */ unsigned long deadline; -- cgit v1.2.3 From d8f07aee3f2fd959878bf614d4e984900018eb9e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Jan 2017 23:30:05 -0800 Subject: block: kill bdev_dax_capable() This is leftover dead code that has since been replaced by bdev_dax_supported(). Signed-off-by: Dan Williams --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5a7da607ca04..f72708399b83 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1958,7 +1958,6 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); extern int bdev_dax_supported(struct super_block *, int); -extern bool bdev_dax_capable(struct block_device *); #else /* CONFIG_BLOCK */ struct block_device; -- cgit v1.2.3 From b0686260fecaa924d8eff2ace94bee70506bc308 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 26 Jan 2017 20:37:35 -0800 Subject: dax: introduce dax_direct_access() Replace bdev_direct_access() with dax_direct_access() that uses dax_device and dax_operations instead of a block_device and block_device_operations for dax. Once all consumers of the old api have been converted bdev_direct_access() will be deleted. Given that block device partitioning decisions can cause dax page alignment constraints to be violated this also introduces the bdev_dax_pgoff() helper. It handles calculating a logical pgoff relative to the dax_device and also checks for page alignment. Signed-off-by: Dan Williams --- include/linux/blkdev.h | 1 + include/linux/dax.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f72708399b83..612c497d1461 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1958,6 +1958,7 @@ extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); extern int bdev_dax_supported(struct super_block *, int); +int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #else /* CONFIG_BLOCK */ struct block_device; diff --git a/include/linux/dax.h b/include/linux/dax.h index 39a0312c45c3..7e62e280c11f 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -27,6 +27,8 @@ void put_dax(struct dax_device *dax_dev); bool dax_alive(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); +long dax_direct_access(struct dax_device *dax_dev, pgoff_t pgoff, long nr_pages, + void **kaddr, pfn_t *pfn); /* * We use lowest available bit in exceptional entry for locking, one bit for -- cgit v1.2.3 From f26c5719b2d7b00de69eb83eb1c1c831759fdc9b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 12 Apr 2017 12:35:44 -0700 Subject: dm: add dax_device and dax_operations support Allocate a dax_device to represent the capacity of a device-mapper instance. Provide a ->direct_access() method via the new dax_operations indirection that mirrors the functionality of the current direct_access support via block_device_operations. Once fs/dax.c has been converted to use dax_operations the old dm_blk_direct_access() will be removed. A new helper dm_dax_get_live_target() is introduced to separate some of the dm-specifics from the direct_access implementation. This enabling is only for the top-level dm representation to upper layers. Converting target direct_access implementations is deferred to a separate patch. Cc: Toshi Kani Reviewed-by: Mike Snitzer Signed-off-by: Dan Williams --- include/linux/device-mapper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index a7e6903866fd..bcba4d89089c 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -130,6 +130,7 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); */ typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, void **kaddr, pfn_t *pfn, long size); +#define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); -- cgit v1.2.3 From 5be661412762bbef45a55eaf1e6847258d69b3a4 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Tue, 18 Apr 2017 16:55:36 +0200 Subject: net: add netif_is_ovs_port helper To find out if a netdev is an OVS port. Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b0aa089ce67f..0f3c38ce5417 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4171,6 +4171,11 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } +static inline bool netif_is_ovs_port(const struct net_device *dev) +{ + return dev->priv_flags & IFF_OVS_DATAPATH; +} + static inline bool netif_is_team_master(const struct net_device *dev) { return dev->priv_flags & IFF_TEAM; -- cgit v1.2.3 From 0206319fdfee7c36b97aa6c0561bab206132f813 Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Thu, 20 Apr 2017 16:59:11 -0600 Subject: blk-mq: Fix poll_stat for new size-based bucketing. Fixes an issue where the size of the poll_stat array in request_queue does not match the size expected by the new size based bucketing for IO completion polling. Fixes: 720b8ccc4500 ("blk-mq: Add a polling specific stats function") Signed-off-by: Stephen Bates Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6c4ab0d4a160..6c247861cb66 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -46,6 +46,9 @@ struct blk_stat_callback; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ +/* Must be consisitent with blk_mq_poll_stats_bkt() */ +#define BLK_MQ_POLL_STATS_BKTS 16 + /* * Maximum number of blkcg policies allowed to be registered concurrently. * Defined here to simplify include dependency. @@ -517,7 +520,7 @@ struct request_queue { int poll_nsec; struct blk_stat_callback *poll_cb; - struct blk_rq_stat poll_stat[2]; + struct blk_rq_stat poll_stat[BLK_MQ_POLL_STATS_BKTS]; struct timer_list timeout; struct work_struct timeout_work; -- cgit v1.2.3 From 92a68fa047ca5b8e1991af2d50b23ad9452613cd Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 31 Mar 2017 19:21:41 -0400 Subject: ftrace: Move the function commands into the tracing directory As nothing outside the tracing directory uses the function command mechanism, I'm moving the prototypes out of the include/linux/ftrace.h and into the local kernel/trace/trace.h header. I plan on making them hook to the trace_array structure which is local to kernel/trace, and I do not want to expose it to the rest of the kernel. This requires that the command functions must also be local to tracing. But luckily nothing else uses them. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 3e790ff1c501..774e7a95c201 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -326,14 +326,6 @@ static inline void stack_tracer_disable(void) { } static inline void stack_tracer_enable(void) { } #endif -struct ftrace_func_command { - struct list_head list; - char *name; - int (*func)(struct ftrace_hash *hash, - char *func, char *cmd, - char *params, int enable); -}; - #ifdef CONFIG_DYNAMIC_FTRACE int ftrace_arch_code_modify_prepare(void); @@ -421,9 +413,6 @@ void ftrace_set_global_notrace(unsigned char *buf, int len, int reset); void ftrace_free_filter(struct ftrace_ops *ops); void ftrace_ops_set_global_filter(struct ftrace_ops *ops); -int register_ftrace_command(struct ftrace_func_command *cmd); -int unregister_ftrace_command(struct ftrace_func_command *cmd); - enum { FTRACE_UPDATE_CALLS = (1 << 0), FTRACE_DISABLE_CALLS = (1 << 1), @@ -639,14 +628,6 @@ static inline void ftrace_enable_daemon(void) { } static inline void ftrace_module_init(struct module *mod) { } static inline void ftrace_module_enable(struct module *mod) { } static inline void ftrace_release_mod(struct module *mod) { } -static inline __init int register_ftrace_command(struct ftrace_func_command *cmd) -{ - return -EINVAL; -} -static inline __init int unregister_ftrace_command(char *cmd_name) -{ - return -EINVAL; -} static inline int ftrace_text_reserved(const void *start, const void *end) { return 0; -- cgit v1.2.3 From eee8ded131f15e0f5b1897c9c4a7687fabd28822 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 4 Apr 2017 21:31:28 -0400 Subject: ftrace: Have the function probes call their own function Now that the function probes have their own ftrace_ops, there's no reason to continue using the ftrace_func_hash to find which probe to call in the function callback. The ops that is passed in to the function callback is part of the probe_ops to call. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 774e7a95c201..6d2a63e4ea52 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -443,8 +443,8 @@ enum { FTRACE_ITER_FILTER = (1 << 0), FTRACE_ITER_NOTRACE = (1 << 1), FTRACE_ITER_PRINTALL = (1 << 2), - FTRACE_ITER_DO_HASH = (1 << 3), - FTRACE_ITER_HASH = (1 << 4), + FTRACE_ITER_DO_PROBES = (1 << 3), + FTRACE_ITER_PROBE = (1 << 4), FTRACE_ITER_ENABLED = (1 << 5), }; -- cgit v1.2.3 From ed067d4a859ff696373324c5061392e013a7561a Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 11 Apr 2017 20:08:34 +0200 Subject: linux/kernel.h: Add ALIGN_DOWN macro Few parts of kernel define their own macro for aligning down so provide a common define for this, with the same usage and assumptions as existing ALIGN. Convert also three existing implementations to this one. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Herbert Xu --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4c26dc3a8295..3d9f8420f973 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -47,6 +47,7 @@ /* @a is a power of 2 value */ #define ALIGN(x, a) __ALIGN_KERNEL((x), (a)) +#define ALIGN_DOWN(x, a) __ALIGN_KERNEL((x) - ((a) - 1), (a)) #define __ALIGN_MASK(x, mask) __ALIGN_KERNEL_MASK((x), (mask)) #define PTR_ALIGN(p, a) ((typeof(p))ALIGN((unsigned long)(p), (a))) #define IS_ALIGNED(x, a) (((x) & ((typeof(x))(a) - 1)) == 0) -- cgit v1.2.3 From 6ade8694f471d847500c7cec152cc15171cef5d5 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 20 Apr 2017 17:30:06 -0700 Subject: kvm: Move srcu_struct fields to end of struct kvm Parallelizing SRCU callback handling will increase the size of srcu_struct, which will move the kvm structure's kvm_arch field out of reach of powerpc's current assembly code, which will result in the following sort of build error: arch/powerpc/kvm/book3s_hv_rmhandlers.S:617: Error: operand out of range (0x000000000000b328 is not between 0xffffffffffff8000 and 0x0000000000007fff) This commit moves the srcu_struct fields in the kvm structure to follow the kvm_arch field, which will allow powerpc's assembly code to continue to be able to reach the kvm_arch field. Reported-by: Stephen Rothwell Reported-by: Michael Ellerman Reported-by: kbuild test robot Suggested-by: Paolo Bonzini Signed-off-by: Paul E. McKenney Tested-by: Michael Ellerman Acked-by: Paolo Bonzini [ paulmck: Moved this commit to precede SRCU callback parallelization, and reworded the commit log into future tense, all in the name of bisectability. ] --- include/linux/kvm_host.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c14ad9809da..96c8e29c6442 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -375,8 +375,6 @@ struct kvm { struct mutex slots_lock; struct mm_struct *mm; /* userspace tied to this vm */ struct kvm_memslots *memslots[KVM_ADDRESS_SPACE_NUM]; - struct srcu_struct srcu; - struct srcu_struct irq_srcu; struct kvm_vcpu *vcpus[KVM_MAX_VCPUS]; /* @@ -429,6 +427,8 @@ struct kvm { struct list_head devices; struct dentry *debugfs_dentry; struct kvm_stat_data **debugfs_stat_data; + struct srcu_struct srcu; + struct srcu_struct irq_srcu; }; #define kvm_err(fmt, ...) \ -- cgit v1.2.3 From da915ad5cf25b5f5d358dd3670c3378d8ae8c03e Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Wed, 5 Apr 2017 09:01:53 -0700 Subject: srcu: Parallelize callback handling Peter Zijlstra proposed using SRCU to reduce mmap_sem contention [1,2], however, there are workloads that could result in a high volume of concurrent invocations of call_srcu(), which with current SRCU would result in excessive lock contention on the srcu_struct structure's ->queue_lock, which protects SRCU's callback lists. This commit therefore moves SRCU to per-CPU callback lists, thus greatly reducing contention. Because a given SRCU instance no longer has a single centralized callback list, starting grace periods and invoking callbacks are both more complex than in the single-list Classic SRCU implementation. Starting grace periods and handling callbacks are now handled using an srcu_node tree that is in some ways similar to the rcu_node trees used by RCU-bh, RCU-preempt, and RCU-sched (for example, the srcu_node tree shape is controlled by exactly the same Kconfig options and boot parameters that control the shape of the rcu_node tree). In addition, the old per-CPU srcu_array structure is now named srcu_data and contains an rcu_segcblist structure named ->srcu_cblist for its callbacks (and a spinlock to protect this). The srcu_struct gets an srcu_gp_seq that is used to associate callback segments with the corresponding completion-time grace-period number. These completion-time grace-period numbers are propagated up the srcu_node tree so that the grace-period workqueue handler can determine whether additional grace periods are needed on the one hand and where to look for callbacks that are ready to be invoked. The srcu_barrier() function must now wait on all instances of the per-CPU ->srcu_cblist. Because each ->srcu_cblist is protected by ->lock, srcu_barrier() can remotely add the needed callbacks. In theory, it could also remotely start grace periods, but in practice doing so is complex and racy. And interestingly enough, it is never necessary for srcu_barrier() to start a grace period because srcu_barrier() only enqueues a callback when a callback is already present--and it turns out that a grace period has to have already been started for this pre-existing callback. Furthermore, it is only the callback that srcu_barrier() needs to wait on, not any particular grace period. Therefore, a new rcu_segcblist_entrain() function enqueues the srcu_barrier() function's callback into the same segment occupied by the last pre-existing callback in the list. The special case where all the pre-existing callbacks are on a different list (because they are in the process of being invoked) is handled by enqueuing srcu_barrier()'s callback into the RCU_DONE_TAIL segment, relying on the done-callbacks check that takes place after all callbacks are inovked. Note that the readers use the same algorithm as before. Note that there is a separate srcu_idx that tells the readers what counter to increment. This unfortunately cannot be combined with srcu_gp_seq because they need to be incremented at different times. This commit introduces some ugly #ifdefs in rcutorture. These will go away when I feel good enough about Tree SRCU to ditch Classic SRCU. Some crude performance comparisons, courtesy of a quickly hacked rcuperf asynchronous-grace-period capability: Callback Queuing Overhead ------------------------- # CPUS Classic SRCU Tree SRCU ------ ------------ --------- 2 0.349 us 0.342 us 16 31.66 us 0.4 us 41 --------- 0.417 us The times are the 90th percentiles, a statistic that was chosen to reject the overheads of the occasional srcu_barrier() call needed to avoid OOMing the test machine. The rcuperf test hangs when running Classic SRCU at 41 CPUs, hence the line of dashes. Despite the hacks to both the rcuperf code and that statistics, this is a convincing demonstration of Tree SRCU's performance and scalability advantages. [1] https://lwn.net/Articles/309030/ [2] https://patchwork.kernel.org/patch/5108281/ Signed-off-by: Paul E. McKenney [ paulmck: Fix initialization if synchronize_srcu_expedited() called first. ] --- include/linux/rcu_segcblist.h | 42 ++++++++++++++++++++--- include/linux/srcutree.h | 80 ++++++++++++++++++++++++++++++++++--------- 2 files changed, 102 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index 74b1e7243955..ced8f313fd05 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -401,6 +401,37 @@ static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, rsclp->tails[RCU_NEXT_TAIL] = &rhp->next; } +/* + * Entrain the specified callback onto the specified rcu_segcblist at + * the end of the last non-empty segment. If the entire rcu_segcblist + * is empty, make no change, but return false. + * + * This is intended for use by rcu_barrier()-like primitives, -not- + * for normal grace-period use. IMPORTANT: The callback you enqueue + * will wait for all prior callbacks, NOT necessarily for a grace + * period. You have been warned. + */ +static inline bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, + struct rcu_head *rhp, bool lazy) +{ + int i; + + if (rcu_segcblist_n_cbs(rsclp) == 0) + return false; + WRITE_ONCE(rsclp->len, rsclp->len + 1); + if (lazy) + rsclp->len_lazy++; + smp_mb(); /* Ensure counts are updated before callback is entrained. */ + rhp->next = NULL; + for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) + if (rsclp->tails[i] != rsclp->tails[i - 1]) + break; + *rsclp->tails[i] = rhp; + for (; i <= RCU_NEXT_TAIL; i++) + rsclp->tails[i] = &rhp->next; + return true; +} + /* * Extract only the counts from the specified rcu_segcblist structure, * and place them in the specified rcu_cblist structure. This function @@ -537,7 +568,8 @@ static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp, int i, j; WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); - WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)); + if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) + return; /* * Find all callbacks whose ->gp_seq numbers indicate that they @@ -582,8 +614,9 @@ static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp, * them to complete at the end of the earlier grace period. * * This function operates on an rcu_segcblist structure, and also the - * grace-period sequence number at which new callbacks would become - * ready to invoke. + * grace-period sequence number seq at which new callbacks would become + * ready to invoke. Returns true if there are callbacks that won't be + * ready to invoke until seq, false otherwise. */ static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, unsigned long seq) @@ -591,7 +624,8 @@ static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, int i; WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); - WARN_ON_ONCE(rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)); + if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) + return false; /* * Find the segment preceding the oldest segment of callbacks diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index f2b3bd6c6bc2..0400e211aa44 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -24,25 +24,75 @@ #ifndef _LINUX_SRCU_TREE_H #define _LINUX_SRCU_TREE_H -struct srcu_array { - unsigned long lock_count[2]; - unsigned long unlock_count[2]; +#include +#include + +struct srcu_node; +struct srcu_struct; + +/* + * Per-CPU structure feeding into leaf srcu_node, similar in function + * to rcu_node. + */ +struct srcu_data { + /* Read-side state. */ + unsigned long srcu_lock_count[2]; /* Locks per CPU. */ + unsigned long srcu_unlock_count[2]; /* Unlocks per CPU. */ + + /* Update-side state. */ + spinlock_t lock ____cacheline_internodealigned_in_smp; + struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ + unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ + bool srcu_cblist_invoking; /* Invoking these CBs? */ + struct delayed_work work; /* Context for CB invoking. */ + struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */ + struct srcu_node *mynode; /* Leaf srcu_node. */ + int cpu; + struct srcu_struct *sp; }; +/* + * Node in SRCU combining tree, similar in function to rcu_data. + */ +struct srcu_node { + spinlock_t lock; + unsigned long srcu_have_cbs[4]; /* GP seq for children */ + /* having CBs, but only */ + /* is > ->srcu_gq_seq. */ + struct srcu_node *srcu_parent; /* Next up in tree. */ + int grplo; /* Least CPU for node. */ + int grphi; /* Biggest CPU for node. */ +}; + +/* + * Per-SRCU-domain structure, similar in function to rcu_state. + */ struct srcu_struct { - unsigned long completed; - unsigned long srcu_gp_seq; - atomic_t srcu_exp_cnt; - struct srcu_array __percpu *per_cpu_ref; - spinlock_t queue_lock; /* protect ->srcu_cblist */ - struct rcu_segcblist srcu_cblist; + struct srcu_node node[NUM_RCU_NODES]; /* Combining tree. */ + struct srcu_node *level[RCU_NUM_LVLS + 1]; + /* First node at each level. */ + struct mutex srcu_cb_mutex; /* Serialize CB preparation. */ + spinlock_t gp_lock; /* protect ->srcu_cblist */ + struct mutex srcu_gp_mutex; /* Serialize GP work. */ + unsigned int srcu_idx; /* Current rdr array element. */ + unsigned long srcu_gp_seq; /* Grace-period seq #. */ + unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ + atomic_t srcu_exp_cnt; /* # ongoing expedited GPs. */ + struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ + unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ + struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ + struct completion srcu_barrier_completion; + /* Awaken barrier rq at end. */ + atomic_t srcu_barrier_cpu_cnt; /* # CPUs not yet posting a */ + /* callback for the barrier */ + /* operation. */ struct delayed_work work; #ifdef CONFIG_DEBUG_LOCK_ALLOC struct lockdep_map dep_map; #endif /* #ifdef CONFIG_DEBUG_LOCK_ALLOC */ }; -/* Values for -> state variable. */ +/* Values for state variable (bottom bits of ->srcu_gp_seq). */ #define SRCU_STATE_IDLE 0 #define SRCU_STATE_SCAN1 1 #define SRCU_STATE_SCAN2 2 @@ -51,11 +101,9 @@ void process_srcu(struct work_struct *work); #define __SRCU_STRUCT_INIT(name) \ { \ - .completed = -300, \ - .per_cpu_ref = &name##_srcu_array, \ - .queue_lock = __SPIN_LOCK_UNLOCKED(name.queue_lock), \ - .srcu_cblist = RCU_SEGCBLIST_INITIALIZER(name.srcu_cblist),\ - .work = __DELAYED_WORK_INITIALIZER(name.work, process_srcu, 0),\ + .sda = &name##_srcu_data, \ + .gp_lock = __SPIN_LOCK_UNLOCKED(name.gp_lock), \ + .srcu_gp_seq_needed = 0 - 1, \ __SRCU_DEP_MAP_INIT(name) \ } @@ -79,7 +127,7 @@ void process_srcu(struct work_struct *work); * See include/linux/percpu-defs.h for the rules on per-CPU variables. */ #define __DEFINE_SRCU(name, is_static) \ - static DEFINE_PER_CPU(struct srcu_array, name##_srcu_array);\ + static DEFINE_PER_CPU(struct srcu_data, name##_srcu_data);\ is_static struct srcu_struct name = __SRCU_STRUCT_INIT(name) #define DEFINE_SRCU(name) __DEFINE_SRCU(name, /* not static */) #define DEFINE_STATIC_SRCU(name) __DEFINE_SRCU(name, static) -- cgit v1.2.3 From bcbfdd01dce5556a952fae84ef16fd0f12525e7b Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 11 Apr 2017 15:50:41 -0700 Subject: rcu: Make non-preemptive schedule be Tasks RCU quiescent state Currently, a call to schedule() acts as a Tasks RCU quiescent state only if a context switch actually takes place. However, just the call to schedule() guarantees that the calling task has moved off of whatever tracing trampoline that it might have been one previously. This commit therefore plumbs schedule()'s "preempt" parameter into rcu_note_context_switch(), which then records the Tasks RCU quiescent state, but only if this call to schedule() was -not- due to a preemption. To avoid adding overhead to the common-case context-switch path, this commit hides the rcu_note_context_switch() check under an existing non-common-case check. Suggested-by: Steven Rostedt Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 11 ++++++++--- include/linux/rcutiny.h | 13 +++++++++---- include/linux/rcutree.h | 5 +++-- 3 files changed, 20 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index e6146d0074f8..f531b29207da 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -363,15 +363,20 @@ static inline void rcu_init_nohz(void) #ifdef CONFIG_TASKS_RCU #define TASKS_RCU(x) x extern struct srcu_struct tasks_rcu_exit_srcu; -#define rcu_note_voluntary_context_switch(t) \ +#define rcu_note_voluntary_context_switch_lite(t) \ do { \ - rcu_all_qs(); \ if (READ_ONCE((t)->rcu_tasks_holdout)) \ WRITE_ONCE((t)->rcu_tasks_holdout, false); \ } while (0) +#define rcu_note_voluntary_context_switch(t) \ + do { \ + rcu_all_qs(); \ + rcu_note_voluntary_context_switch_lite(t); \ + } while (0) #else /* #ifdef CONFIG_TASKS_RCU */ #define TASKS_RCU(x) do { } while (0) -#define rcu_note_voluntary_context_switch(t) rcu_all_qs() +#define rcu_note_voluntary_context_switch_lite(t) do { } while (0) +#define rcu_note_voluntary_context_switch(t) rcu_all_qs() #endif /* #else #ifdef CONFIG_TASKS_RCU */ /** diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index 5219be250f00..74d9c3a1feee 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -92,10 +92,11 @@ static inline void kfree_call_rcu(struct rcu_head *head, call_rcu(head, func); } -static inline void rcu_note_context_switch(void) -{ - rcu_sched_qs(); -} +#define rcu_note_context_switch(preempt) \ + do { \ + rcu_sched_qs(); \ + rcu_note_voluntary_context_switch_lite(current); \ + } while (0) /* * Take advantage of the fact that there is only one CPU, which @@ -242,6 +243,10 @@ static inline bool rcu_is_watching(void) #endif /* #else defined(CONFIG_DEBUG_LOCK_ALLOC) || defined(CONFIG_RCU_TRACE) */ +static inline void rcu_request_urgent_qs_task(struct task_struct *t) +{ +} + static inline void rcu_all_qs(void) { barrier(); /* Avoid RCU read-side critical sections leaking across. */ diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 63a4e4cf40a5..0bacb6b2af69 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -30,7 +30,7 @@ #ifndef __LINUX_RCUTREE_H #define __LINUX_RCUTREE_H -void rcu_note_context_switch(void); +void rcu_note_context_switch(bool preempt); int rcu_needs_cpu(u64 basem, u64 *nextevt); void rcu_cpu_stall_reset(void); @@ -41,7 +41,7 @@ void rcu_cpu_stall_reset(void); */ static inline void rcu_virt_note_context_switch(int cpu) { - rcu_note_context_switch(); + rcu_note_context_switch(false); } void synchronize_rcu_bh(void); @@ -108,6 +108,7 @@ void rcu_scheduler_starting(void); extern int rcu_scheduler_active __read_mostly; bool rcu_is_watching(void); +void rcu_request_urgent_qs_task(struct task_struct *t); void rcu_all_qs(void); -- cgit v1.2.3 From f9f38e33389c019ec880f6825119c94867c1fde0 Mon Sep 17 00:00:00 2001 From: Helen Koike Date: Mon, 10 Apr 2017 12:51:07 -0300 Subject: nvme: improve performance for virtual NVMe devices This change provides a mechanism to reduce the number of MMIO doorbell writes for the NVMe driver. When running in a virtualized environment like QEMU, the cost of an MMIO is quite hefy here. The main idea for the patch is provide the device two memory location locations: 1) to store the doorbell values so they can be lookup without the doorbell MMIO write 2) to store an event index. I believe the doorbell value is obvious, the event index not so much. Similar to the virtio specification, the virtual device can tell the driver (guest OS) not to write MMIO unless you are writing past this value. FYI: doorbell values are written by the nvme driver (guest OS) and the event index is written by the virtual device (host OS). The patch implements a new admin command that will communicate where these two memory locations reside. If the command fails, the nvme driver will work as before without any optimizations. Contributions: Eric Northup Frank Swiderski Ted Tso Keith Busch Just to give an idea on the performance boost with the vendor extension: Running fio [1], a stock NVMe driver I get about 200K read IOPs with my vendor patch I get about 1000K read IOPs. This was running with a null device i.e. the backing device simply returned success on every read IO request. [1] Running on a 4 core machine: fio --time_based --name=benchmark --runtime=30 --filename=/dev/nvme0n1 --nrfiles=1 --ioengine=libaio --iodepth=32 --direct=1 --invalidate=1 --verify=0 --verify_fatal=0 --numjobs=4 --rw=randread --blocksize=4k --randrepeat=false Signed-off-by: Rob Nelson [mlin: port for upstream] Signed-off-by: Ming Lin [koike: updated for upstream] Signed-off-by: Helen Koike Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch --- include/linux/nvme.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9061780b141f..b625bacf37ef 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -245,6 +245,7 @@ enum { NVME_CTRL_ONCS_WRITE_ZEROES = 1 << 3, NVME_CTRL_VWC_PRESENT = 1 << 0, NVME_CTRL_OACS_SEC_SUPP = 1 << 0, + NVME_CTRL_OACS_DBBUF_SUPP = 1 << 7, }; struct nvme_lbaf { @@ -603,6 +604,7 @@ enum nvme_admin_opcode { nvme_admin_download_fw = 0x11, nvme_admin_ns_attach = 0x15, nvme_admin_keep_alive = 0x18, + nvme_admin_dbbuf = 0x7C, nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, nvme_admin_security_recv = 0x82, @@ -874,6 +876,16 @@ struct nvmf_property_get_command { __u8 resv4[16]; }; +struct nvme_dbbuf { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __le64 prp2; + __u32 rsvd12[6]; +}; + struct nvme_command { union { struct nvme_common_command common; @@ -893,6 +905,7 @@ struct nvme_command { struct nvmf_connect_command connect; struct nvmf_property_set_command prop_set; struct nvmf_property_get_command prop_get; + struct nvme_dbbuf dbbuf; }; }; -- cgit v1.2.3 From 39498faef7c02f9f6de4060ccdc7e8975a6e690b Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 11 Apr 2017 11:32:28 -0700 Subject: nvmet_fc: add target feature flags for upcall isr contexts Two new feature flags were added to control whether upcalls to the transport result in context switches or stay in the calling context. NVMET_FCTGTFEAT_CMD_IN_ISR: By default, if the flag is not set, the transport assumes the lldd is in a non-isr context and in the cpu context it should be for the io queue. As such, the cmd handler is called directly in the calling context. If the flag is set, indicating the upcall is an isr context, the transport mandates a transition to a workqueue. The workqueue assigned to the queue is used for the context. NVMET_FCTGTFEAT_OPDONE_IN_ISR By default, if the flag is not set, the transport assumes the lldd is in a non-isr context and in the cpu context it should be for the io queue. As such, the fcp operation done callback is called directly in the calling context. If the flag is set, indicating the upcall is an isr context, the transport mandates a transition to a workqueue. The workqueue assigned to the queue is used for the context. Updated lpfc for flags Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- include/linux/nvme-fc-driver.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 16eb264980c2..d70a9c98bc23 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -655,6 +655,22 @@ enum { * on. The transport should pick a cpu to schedule the work * on. */ + NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 2), + /* Bit 2: When 0, the LLDD is calling the cmd rcv handler + * in a non-isr context, allowing the transport to finish + * op completion in the calling context. When 1, the LLDD + * is calling the cmd rcv handler in an ISR context, + * requiring the transport to transition to a workqueue + * for op completion. + */ + NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 3), + /* Bit 3: When 0, the LLDD is calling the op done handler + * in a non-isr context, allowing the transport to finish + * op completion in the calling context. When 1, the LLDD + * is calling the op done handler in an ISR context, + * requiring the transport to transition to a workqueue + * for op completion. + */ }; -- cgit v1.2.3 From 19b58d9473e8e3d38e7f3602a07c8febfbd07bc1 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 11 Apr 2017 11:32:29 -0700 Subject: nvmet_fc: add req_release to lldd api With the advent of the opdone calls changing context, the lldd can no longer assume that once the op->done call returns for RSP operations that the request struct is no longer being accessed. As such, revise the lldd api for a req_release callback that the transport will call when the job is complete. This will also be used with abort cases. Fixed text in api header for change in io complete semantics. Revised lpfc to support the new req_release api. Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- include/linux/nvme-fc-driver.h | 57 ++++++++++++++++++++++++++---------------- 1 file changed, 35 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index d70a9c98bc23..d98ddb2feabc 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -741,12 +741,12 @@ struct nvmet_fc_target_port { * be freed/released. * Entrypoint is Mandatory. * - * @fcp_op: Called to perform a data transfer, transmit a response, or - * abort an FCP opertion. The nvmefc_tgt_fcp_req structure is the same - * LLDD-supplied exchange structure specified in the - * nvmet_fc_rcv_fcp_req() call made when the FCP CMD IU was received. - * The op field in the structure shall indicate the operation for - * the LLDD to perform relative to the io. + * @fcp_op: Called to perform a data transfer or transmit a response. + * The nvmefc_tgt_fcp_req structure is the same LLDD-supplied + * exchange structure specified in the nvmet_fc_rcv_fcp_req() call + * made when the FCP CMD IU was received. The op field in the + * structure shall indicate the operation for the LLDD to perform + * relative to the io. * NVMET_FCOP_READDATA operation: the LLDD is to send the * payload data (described by sglist) to the host in 1 or * more FC sequences (preferrably 1). Note: the fc-nvme layer @@ -768,29 +768,35 @@ struct nvmet_fc_target_port { * successfully, the LLDD is to update the nvmefc_tgt_fcp_req * transferred_length field and may subsequently transmit the * FCP_RSP iu payload (described by rspbuf, rspdma, rsplen). - * The LLDD is to await FCP_CONF reception to confirm the RSP - * reception by the host. The LLDD may retramsit the FCP_RSP iu - * if necessary per FC-NVME. Upon reception of FCP_CONF, or upon - * FCP_CONF failure, the LLDD is to set the nvmefc_tgt_fcp_req - * fcp_error field and consider the operation complete.. + * If FCP_CONF is supported, the LLDD is to await FCP_CONF + * reception to confirm the RSP reception by the host. The LLDD + * may retramsit the FCP_RSP iu if necessary per FC-NVME. Upon + * transmission of the FCP_RSP iu if FCP_CONF is not supported, + * or upon success/failure of FCP_CONF if it is supported, the + * LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and + * consider the operation complete. * NVMET_FCOP_RSP: the LLDD is to transmit the FCP_RSP iu payload - * (described by rspbuf, rspdma, rsplen). The LLDD is to await - * FCP_CONF reception to confirm the RSP reception by the host. - * The LLDD may retramsit the FCP_RSP iu if necessary per FC-NVME. - * Upon reception of FCP_CONF, or upon FCP_CONF failure, the + * (described by rspbuf, rspdma, rsplen). If FCP_CONF is + * supported, the LLDD is to await FCP_CONF reception to confirm + * the RSP reception by the host. The LLDD may retramsit the + * FCP_RSP iu if FCP_CONF is not received per FC-NVME. Upon + * transmission of the FCP_RSP iu if FCP_CONF is not supported, + * or upon success/failure of FCP_CONF if it is supported, the * LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and - * consider the operation complete.. + * consider the operation complete. * NVMET_FCOP_ABORT: the LLDD is to terminate the exchange * corresponding to the fcp operation. The LLDD shall send * ABTS and follow FC exchange abort-multi rules, including * ABTS retries and possible logout. * Upon completing the indicated operation, the LLDD is to set the * status fields for the operation (tranferred_length and fcp_error - * status) in the request, then all the "done" routine - * indicated in the fcp request. Upon return from the "done" - * routine for either a NVMET_FCOP_RSP or NVMET_FCOP_ABORT operation - * the fc-nvme layer will not longer reference the fcp request, - * allowing the LLDD to free/release the fcp request. + * status) in the request, then call the "done" routine + * indicated in the fcp request. After the operation completes, + * regardless of whether the FCP_RSP iu was successfully transmit, + * the LLDD-supplied exchange structure must remain valid until the + * transport calls the fcp_req_release() callback to return ownership + * of the exchange structure back to the LLDD so that it may be used + * for another fcp command. * Note: when calling the done routine for READDATA or WRITEDATA * operations, the fc-nvme layer may immediate convert, in the same * thread and before returning to the LLDD, the fcp operation to @@ -802,6 +808,11 @@ struct nvmet_fc_target_port { * Returns 0 on success, - on failure (Ex: -EIO) * Entrypoint is Mandatory. * + * @fcp_req_release: Called by the transport to return a nvmefc_tgt_fcp_req + * to the LLDD after all operations on the fcp operation are complete. + * This may be due to the command completing or upon completion of + * abort cleanup. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -836,7 +847,9 @@ struct nvmet_fc_target_template { int (*xmt_ls_rsp)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_ls_req *tls_req); int (*fcp_op)(struct nvmet_fc_target_port *tgtport, - struct nvmefc_tgt_fcp_req *); + struct nvmefc_tgt_fcp_req *fcpreq); + void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); u32 max_hw_queues; u16 max_sgl_segments; -- cgit v1.2.3 From a97ec51b37efacb84f286979876675a8143035b0 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 11 Apr 2017 11:32:31 -0700 Subject: nvmet_fc: Rework target side abort handling target transport: ---------------------- There are cases when there is a need to abort in-progress target operations (writedata) so that controller termination or errors can clean up. That can't happen currently as the abort is another target op type, so it can't be used till the running one finishes (and it may not). Solve by removing the abort op type and creating a separate downcall from the transport to the lldd to request an io to be aborted. The transport will abort ios on queue teardown or io errors. In general the transport tries to call the lldd abort only when the io state is idle. Meaning: ops that transmit data (readdata or rsp) will always finish their transmit (or the lldd will see a state on the link or initiator port that fails the transmit) and the done call for the operation will occur. The transport will wait for the op done upcall before calling the abort function, and as the io is idle, the io can be cleaned up immediately after the abort call; Similarly, ios that are not waiting for data or transmitting data must be in the nvmet layer being processed. The transport will wait for the nvmet layer completion before calling the abort function, and as the io is idle, the io can be cleaned up immediately after the abort call; As for ops that are waiting for data (writedata), they may be outstanding indefinitely if the lldd doesn't see a condition where the initiatior port or link is bad. In those cases, the transport will call the abort function and wait for the lldd's op done upcall for the operation, where it will then clean up the io. Additionally, if a lldd receives an ABTS and matches it to an outstanding request in the transport, A new new transport upcall was created to abort the outstanding request in the transport. The transport expects any outstanding op call (readdata or writedata) will completed by the lldd and the operation upcall made. The transport doesn't act on the reported abort (e.g. clean up the io) until an op done upcall occurs, a new op is attempted, or the nvmet layer completes the io processing. fcloop: ---------------------- Updated to support the new target apis. On fcp io aborts from the initiator, the loopback context is updated to NULL out the half that has completed. The initiator side is immediately called after the abort request with an io completion (abort status). On fcp io aborts from the target, the io is stopped and the initiator side sees it as an aborted io. Target side ops, perhaps in progress while the initiator side is done, continue but noop the data movement as there's no structure on the initiator side to reference. patch also contains: ---------------------- Revised lpfc to support the new abort api commonized rsp buffer syncing and nulling of private data based on calling paths. errors in op done calls don't take action on the fod. They're bad operations which implies the fod may be bad. Signed-off-by: James Smart Signed-off-by: Sagi Grimberg --- include/linux/nvme-fc-driver.h | 25 ++++++++++++++++--------- 1 file changed, 16 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index d98ddb2feabc..0db37158a61d 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -533,9 +533,6 @@ enum { * rsp as well */ NVMET_FCOP_RSP = 4, /* send rsp frame */ - NVMET_FCOP_ABORT = 5, /* abort exchange via ABTS */ - NVMET_FCOP_BA_ACC = 6, /* send BA_ACC */ - NVMET_FCOP_BA_RJT = 7, /* send BA_RJT */ }; /** @@ -572,8 +569,6 @@ enum { * upon compeletion of the operation. The nvmet-fc layer will also set a * private pointer for its own use in the done routine. * - * Note: the LLDD must never fail a NVMET_FCOP_ABORT request !! - * * Values set by the NVMET-FC layer prior to calling the LLDD fcp_op * entrypoint. * @op: Indicates the FCP IU operation to perform (see NVMET_FCOP_xxx) @@ -784,10 +779,6 @@ struct nvmet_fc_target_port { * or upon success/failure of FCP_CONF if it is supported, the * LLDD is to set the nvmefc_tgt_fcp_req fcp_error field and * consider the operation complete. - * NVMET_FCOP_ABORT: the LLDD is to terminate the exchange - * corresponding to the fcp operation. The LLDD shall send - * ABTS and follow FC exchange abort-multi rules, including - * ABTS retries and possible logout. * Upon completing the indicated operation, the LLDD is to set the * status fields for the operation (tranferred_length and fcp_error * status) in the request, then call the "done" routine @@ -808,6 +799,17 @@ struct nvmet_fc_target_port { * Returns 0 on success, - on failure (Ex: -EIO) * Entrypoint is Mandatory. * + * @fcp_abort: Called by the transport to abort an active command. + * The command may be in-between operations (nothing active in LLDD) + * or may have an active WRITEDATA operation pending. The LLDD is to + * initiate the ABTS process for the command and return from the + * callback. The ABTS does not need to be complete on the command. + * The fcp_abort callback inherently cannot fail. After the + * fcp_abort() callback completes, the transport will wait for any + * outstanding operation (if there was one) to complete, then will + * call the fcp_req_release() callback to return the command's + * exchange context back to the LLDD. + * * @fcp_req_release: Called by the transport to return a nvmefc_tgt_fcp_req * to the LLDD after all operations on the fcp operation are complete. * This may be due to the command completing or upon completion of @@ -848,6 +850,8 @@ struct nvmet_fc_target_template { struct nvmefc_tgt_ls_req *tls_req); int (*fcp_op)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); + void (*fcp_abort)(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); void (*fcp_req_release)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); @@ -877,4 +881,7 @@ int nvmet_fc_rcv_fcp_req(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq, void *cmdiubuf, u32 cmdiubuf_len); +void nvmet_fc_rcv_fcp_abort(struct nvmet_fc_target_port *tgtport, + struct nvmefc_tgt_fcp_req *fcpreq); + #endif /* _NVME_FC_DRIVER_H */ -- cgit v1.2.3 From 50f2112cf7a3e62a8d33838eb205d5fef306457a Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 11 Apr 2017 12:50:09 -0400 Subject: locks: Set FL_CLOSE when removing flock locks on close() Set FL_CLOSE in fl_flags as in locks_remove_posix() when clearing locks. NFS will check for this flag to ensure an unlock is sent in a following patch. Fuse handles flock and posix locks differently for FL_CLOSE, and so requires a fixup to retain the existing behavior for flock. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Acked-by: Miklos Szeredi Signed-off-by: Trond Myklebust --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7251f7bb45e8..72061aa65405 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -909,6 +909,8 @@ static inline struct file *get_file(struct file *f) #define FL_OFDLCK 1024 /* lock is "owned" by struct file */ #define FL_LAYOUT 2048 /* outstanding pNFS layout */ +#define FL_CLOSE_POSIX (FL_POSIX | FL_CLOSE) + /* * Special return value from posix_lock_file() and vfs_lock_file() for * asynchronous locking. -- cgit v1.2.3 From 7d6ddf88c4db372689c8aa65ea652d0514d66c06 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 11 Apr 2017 12:50:10 -0400 Subject: NFS: Add an iocounter wait function for async RPC tasks By sleeping on a new NFS Unlock-On-Close waitqueue, rpc tasks may wait for a lock context's iocounter to reach zero. The rpc waitqueue is only woken when the open_context has the NFS_CONTEXT_UNLOCK flag set in order to mitigate spurious wake-ups for any iocounter reaching zero. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_page.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 1b29915247b2..9aa044e76820 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -76,6 +76,7 @@ struct nfs_open_context { #define NFS_CONTEXT_ERROR_WRITE (0) #define NFS_CONTEXT_RESEND_WRITES (1) #define NFS_CONTEXT_BAD (2) +#define NFS_CONTEXT_UNLOCK (3) int error; struct list_head list; diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index b34097c67848..2a70f34dffe8 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -222,6 +222,7 @@ struct nfs_server { u32 mountd_version; unsigned short mountd_port; unsigned short mountd_protocol; + struct rpc_wait_queue uoc_rpcwaitq; }; /* Server capabilities */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index 6f01e28bba27..247cc3d3498f 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -141,6 +141,7 @@ extern int nfs_page_group_lock(struct nfs_page *, bool); extern void nfs_page_group_lock_wait(struct nfs_page *); extern void nfs_page_group_unlock(struct nfs_page *); extern bool nfs_page_group_sync_on_bit(struct nfs_page *, unsigned int); +extern bool nfs_async_iocounter_wait(struct rpc_task *, struct nfs_lock_context *); /* * Lock the page of an asynchronous request -- cgit v1.2.3 From b1ece737f44f91dca8f4829cf0b442e752e406db Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Tue, 11 Apr 2017 12:50:11 -0400 Subject: lockd: Introduce nlmclnt_operations NFS would enjoy the ability to modify the behavior of the NLM client's unlock RPC task in order to delay the transmission of the unlock until IO that was submitted under that lock has completed. This ability can ensure that the NLM client will always complete the transmission of an unlock even if the waiting caller has been interrupted with fatal signal. For this purpose, a pointer to a struct nlmclnt_operations can be assigned in a nfs_module's nfs_rpc_ops that will install those nlmclnt_operations on the nlm_host. The struct nlmclnt_operations defines three callback operations that will be used in a following patch: nlmclnt_alloc_call - used to call back after a successful allocation of a struct nlm_rqst in nlmclnt_proc(). nlmclnt_unlock_prepare - used to call back during NLM unlock's rpc_call_prepare. The NLM client defers calling rpc_call_start() until this callback returns false. nlmclnt_release_call - used to call back when the NLM client's struct nlm_rqst is freed. Signed-off-by: Benjamin Coddington Reviewed-by: Jeff Layton Signed-off-by: Trond Myklebust --- include/linux/lockd/bind.h | 24 ++++++++++++++++++++++-- include/linux/lockd/lockd.h | 2 ++ include/linux/nfs_xdr.h | 1 + 3 files changed, 25 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/bind.h b/include/linux/lockd/bind.h index 140edab64446..05728396a1a1 100644 --- a/include/linux/lockd/bind.h +++ b/include/linux/lockd/bind.h @@ -18,6 +18,7 @@ /* Dummy declarations */ struct svc_rqst; +struct rpc_task; /* * This is the set of functions for lockd->nfsd communication @@ -43,6 +44,7 @@ struct nlmclnt_initdata { u32 nfs_version; int noresvport; struct net *net; + const struct nlmclnt_operations *nlmclnt_ops; }; /* @@ -52,8 +54,26 @@ struct nlmclnt_initdata { extern struct nlm_host *nlmclnt_init(const struct nlmclnt_initdata *nlm_init); extern void nlmclnt_done(struct nlm_host *host); -extern int nlmclnt_proc(struct nlm_host *host, int cmd, - struct file_lock *fl); +/* + * NLM client operations provide a means to modify RPC processing of NLM + * requests. Callbacks receive a pointer to data passed into the call to + * nlmclnt_proc(). + */ +struct nlmclnt_operations { + /* Called on successful allocation of nlm_rqst, use for allocation or + * reference counting. */ + void (*nlmclnt_alloc_call)(void *); + + /* Called in rpc_task_prepare for unlock. A return value of true + * indicates the callback has put the task to sleep on a waitqueue + * and NLM should not call rpc_call_start(). */ + bool (*nlmclnt_unlock_prepare)(struct rpc_task*, void *); + + /* Called when the nlm_rqst is freed, callbacks should clean up here */ + void (*nlmclnt_release_call)(void *); +}; + +extern int nlmclnt_proc(struct nlm_host *host, int cmd, struct file_lock *fl, void *data); extern int lockd_up(struct net *net); extern void lockd_down(struct net *net); diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index b37dee3acaba..41f7b6a04d69 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -69,6 +69,7 @@ struct nlm_host { char *h_addrbuf; /* address eyecatcher */ struct net *net; /* host net */ char nodename[UNX_MAXNODENAME + 1]; + const struct nlmclnt_operations *h_nlmclnt_ops; /* Callback ops for NLM users */ }; /* @@ -142,6 +143,7 @@ struct nlm_rqst { struct nlm_block * a_block; unsigned int a_retries; /* Retry count */ u8 a_owner[NLMCLNT_OHSIZE]; + void * a_callback_data; /* sent to nlmclnt_operations callbacks */ }; /* diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 51e27f9746ee..677c6b91dfcd 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1551,6 +1551,7 @@ struct nfs_rpc_ops { const struct inode_operations *dir_inode_ops; const struct inode_operations *file_inode_ops; const struct file_operations *file_ops; + const struct nlmclnt_operations *nlmclnt_ops; int (*getroot) (struct nfs_server *, struct nfs_fh *, struct nfs_fsinfo *); -- cgit v1.2.3 From b62ea4112ce3746664dcc2f232d03461f0e6f3c7 Mon Sep 17 00:00:00 2001 From: Martin Kaiser Date: Fri, 21 Apr 2017 16:47:11 +0200 Subject: video: fbdev: imxfb: support AUS mode Some displays require setting AUS mode in the LDCD AUS Mode Control Register to work with the imxfb driver. Like the value of the Panel Configuration Register, the AUS mode setting depends on the display mode. Allow setting AUS mode from the device tree by adding a boolean property. Make this property optional to keep the DT ABI stable. AUS mode can be set only on imx21 and compatible chipsets. Signed-off-by: Martin Kaiser Cc: Sascha Hauer Cc: Rob Herring Cc: Mark Rutland Signed-off-by: Bartlomiej Zolnierkiewicz --- include/linux/platform_data/video-imxfb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/video-imxfb.h b/include/linux/platform_data/video-imxfb.h index a5c0a71ec914..cf9348b376ac 100644 --- a/include/linux/platform_data/video-imxfb.h +++ b/include/linux/platform_data/video-imxfb.h @@ -50,6 +50,7 @@ struct imx_fb_videomode { struct fb_videomode mode; u32 pcr; + bool aus_mode; unsigned char bpp; }; -- cgit v1.2.3 From dd77abf8a03a1ebd4dd3ddebecce312dcb0d1af1 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Sun, 19 Mar 2017 11:01:28 +0200 Subject: IB/mlx4: Support RAW Ethernet when RoCE is disabled On some environments, such as certain SR-IOV VF configurations, RoCE isn't supported for mlx4 Ethernet ports. Currently the driver will not open IB device on that port. This is problematic since we do want user-space RAW Ethernet QPs functionality to remain in place. For that end, enhance the relevant driver flows such that we do create a device instance in that case. Signed-off-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 1beb1ec2fbdf..74b765ce48ab 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -1011,8 +1011,7 @@ struct mlx4_mad_ifc { #define mlx4_foreach_ib_transport_port(port, dev) \ for ((port) = 1; (port) <= (dev)->caps.num_ports; (port)++) \ if (((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_IB) || \ - ((dev)->caps.flags & MLX4_DEV_CAP_FLAG_IBOE) || \ - ((dev)->caps.flags2 & MLX4_DEV_CAP_FLAG2_ROCE_V1_V2)) + ((dev)->caps.port_mask[port] == MLX4_PORT_TYPE_ETH)) #define MLX4_INVALID_SLAVE_ID 0xFF #define MLX4_SINK_COUNTER_INDEX(dev) (dev->caps.max_counters - 1) -- cgit v1.2.3 From 19cc75249adc61401aa4b21a6654e0a7f7ea8fe2 Mon Sep 17 00:00:00 2001 From: Ariel Levkovich Date: Mon, 3 Apr 2017 13:11:03 +0300 Subject: IB/mlx5: Use IP version matching to classify IP traffic This change adds the ability for flow steering to classify IPv4/6 packets with MPLS tag (Ethertype 0x8847 and 0x8848) as standard IP packets and hit IPv4/6 classifed steering rules. When user added a flow rule with IP classification, driver was implicitly adding ethertype matching to the created rule in order to distinguish between IPv4 and IPv6 protocols. Since IP packets with MPLS tag header have MPLS ethertype, they missed the rule and ended up hitting the default filters. Such behavior prevented from MPLS packets to undergo inbound traffic load balancing flows (if such were defined by configuring RSS) to achieve higher throughput - the way that non-MPLS IP packets performed. Since our device is able to look past the MPLS tag and identify the next protocol we introduce this solution which replaces Ethertype matching by the device's capability to perform IP version parsing and matching in order to distinguish between IPv4 and IPv6. Therefore, whenever a flow with IP spec is added and device support IP version matching, driver will implicitly add IP version matching to the rule (Based on the IP spec type) without Ethertype matching which will cause relevant MPLS tagged packets to hit this rule as well. Otherwise (device doesn't support IP version matching), we fall back to setting Ethertype matching. If the user's filters specify an L2 ethertype and an IP spec the rule will then match both the ethertype and the IP version. The device's support for IP version matching is reported by the device via dedicated capability bit in query_device_cap and named outer/inner_ip_version. Signed-off-by: Ariel Levkovich Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 7c50bd39b297..4da6e803b627 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -236,7 +236,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 outer_dmac[0x1]; u8 outer_smac[0x1]; u8 outer_ether_type[0x1]; - u8 reserved_at_3[0x1]; + u8 outer_ip_version[0x1]; u8 outer_first_prio[0x1]; u8 outer_first_cfi[0x1]; u8 outer_first_vid[0x1]; @@ -265,7 +265,7 @@ struct mlx5_ifc_flow_table_fields_supported_bits { u8 inner_dmac[0x1]; u8 inner_smac[0x1]; u8 inner_ether_type[0x1]; - u8 reserved_at_23[0x1]; + u8 inner_ip_version[0x1]; u8 inner_first_prio[0x1]; u8 inner_first_cfi[0x1]; u8 inner_first_vid[0x1]; @@ -371,7 +371,7 @@ struct mlx5_ifc_fte_match_set_lyr_2_4_bits { u8 cvlan_tag[0x1]; u8 svlan_tag[0x1]; u8 frag[0x1]; - u8 reserved_at_93[0x4]; + u8 ip_version[0x4]; u8 tcp_flags[0x9]; u8 tcp_sport[0x10]; -- cgit v1.2.3 From e1f24a79f424ddb03828de7c0152668c9a30146e Mon Sep 17 00:00:00 2001 From: Parav Pandit Date: Sun, 16 Apr 2017 07:29:29 +0300 Subject: IB/mlx5: Support congestion related counters This patch adds support to query the congestion related hardware counters through new command and links them with other hw counters being available in hw_counters sysfs location. In order to reuse existing infrastructure it renames related q_counter data structures to more generic counters to reflect q_counters and congestion counters and maybe some other counters in the future. New hardware counters: * rp_cnp_handled - CNP packets handled by the reaction point * rp_cnp_ignored - CNP packets ignored by the reaction point * np_cnp_sent - CNP packets sent by notification point to respond to CE marked RoCE packets * np_ecn_marked_roce_packets - CE marked RoCE packets received by notification point It also avoids returning ENOSYS which is specific for invalid system call and produces the following checkpatch.pl warning. WARNING: ENOSYS means 'invalid syscall nr' and nothing else + return -ENOSYS; Signed-off-by: Parav Pandit Reviewed-by: Eli Cohen Reviewed-by: Daniel Jurgens Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 4da6e803b627..954f42c268a4 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -4735,17 +4735,17 @@ struct mlx5_ifc_query_cong_statistics_out_bits { u8 reserved_at_40[0x40]; - u8 cur_flows[0x20]; + u8 rp_cur_flows[0x20]; u8 sum_flows[0x20]; - u8 cnp_ignored_high[0x20]; + u8 rp_cnp_ignored_high[0x20]; - u8 cnp_ignored_low[0x20]; + u8 rp_cnp_ignored_low[0x20]; - u8 cnp_handled_high[0x20]; + u8 rp_cnp_handled_high[0x20]; - u8 cnp_handled_low[0x20]; + u8 rp_cnp_handled_low[0x20]; u8 reserved_at_140[0x100]; @@ -4755,13 +4755,13 @@ struct mlx5_ifc_query_cong_statistics_out_bits { u8 accumulators_period[0x20]; - u8 ecn_marked_roce_packets_high[0x20]; + u8 np_ecn_marked_roce_packets_high[0x20]; - u8 ecn_marked_roce_packets_low[0x20]; + u8 np_ecn_marked_roce_packets_low[0x20]; - u8 cnps_sent_high[0x20]; + u8 np_cnp_sent_high[0x20]; - u8 cnps_sent_low[0x20]; + u8 np_cnp_sent_low[0x20]; u8 reserved_at_320[0x560]; }; -- cgit v1.2.3 From 2bca34455b257d75080d87e800ae14afe49001bf Mon Sep 17 00:00:00 2001 From: Vignesh R Date: Tue, 11 Apr 2017 17:22:24 +0530 Subject: spi: Add can_dma like interface for spi_flash_read Add an interface analogous to ->can_dma() for spi_flash_read() interface. This will enable SPI controller drivers to inform SPI core when not to do DMA mappings. Signed-off-by: Vignesh R Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 75c6bd0ac605..cd8ae65568e3 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -375,6 +375,8 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * @unprepare_message: undo any work done by prepare_message(). * @spi_flash_read: to support spi-controller hardwares that provide * accelerated interface to read from flash devices. + * @spi_flash_can_dma: analogous to can_dma() interface, but for + * controllers implementing spi_flash_read. * @flash_read_supported: spi device supports flash read * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS * number. Any individual value may be -ENOENT for CS lines that @@ -538,6 +540,8 @@ struct spi_master { struct spi_message *message); int (*spi_flash_read)(struct spi_device *spi, struct spi_flash_read_message *msg); + bool (*spi_flash_can_dma)(struct spi_device *spi, + struct spi_flash_read_message *msg); bool (*flash_read_supported)(struct spi_device *spi); /* -- cgit v1.2.3 From 7acf8a1e8a28b3d7407a8d8061a7d0766cfac2f4 Mon Sep 17 00:00:00 2001 From: Matthew Whitehead Date: Wed, 19 Apr 2017 12:37:10 -0400 Subject: Replace 2 jiffies with sysctl netdev_budget_usecs to enable softirq tuning Constants used for tuning are generally a bad idea, especially as hardware changes over time. Replace the constant 2 jiffies with sysctl variable netdev_budget_usecs to enable sysadmins to tune the softirq processing. Also document the variable. For example, a very fast machine might tune this to 1000 microseconds, while my regression testing 486DX-25 needs it to be 4000 microseconds on a nearly idle network to prevent time_squeeze from being incremented. Version 2: changed jiffies to microseconds for predictable units. Signed-off-by: Matthew Whitehead Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 0f3c38ce5417..c49cf21f2b31 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3296,6 +3296,7 @@ static __always_inline int ____dev_forward_skb(struct net_device *dev, void dev_queue_xmit_nit(struct sk_buff *skb, struct net_device *dev); extern int netdev_budget; +extern unsigned int netdev_budget_usecs; /* Called by rtnetlink.c:rtnl_unlock() */ void netdev_run_todo(void); -- cgit v1.2.3 From 3073f070a137e140e3faefa87f2446a8deffc07f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 17 Feb 2017 23:13:25 -0500 Subject: switch memcpy_from_msg() to copy_from_iter_full() Signed-off-by: Al Viro --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index c776abd86937..53383bce27f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3113,7 +3113,7 @@ struct sk_buff *pskb_extract(struct sk_buff *skb, int off, int to_copy, static inline int memcpy_from_msg(void *data, struct msghdr *msg, int len) { - return copy_from_iter(data, len, &msg->msg_iter) == len ? 0 : -EFAULT; + return copy_from_iter_full(data, len, &msg->msg_iter) ? 0 : -EFAULT; } static inline int memcpy_to_msg(struct msghdr *msg, void *data, int len) -- cgit v1.2.3 From 4f757f3cbf54edef7b75c68d6d6d2f1a0ca08d2e Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 15 Apr 2017 17:31:22 -0400 Subject: make sure that mntns_install() doesn't end up with referral for root new flag: LOOKUP_DOWN. If the starting point is overmounted, cross into whatever's mounted on top, triggering referrals et.al. Use that instead of follow_down_one() loop in mntns_install(), handle errors properly. Signed-off-by: Al Viro --- include/linux/namei.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/namei.h b/include/linux/namei.h index f29abda31e6d..8b4794e83196 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -44,6 +44,7 @@ enum {LAST_NORM, LAST_ROOT, LAST_DOT, LAST_DOTDOT, LAST_BIND}; #define LOOKUP_JUMPED 0x1000 #define LOOKUP_ROOT 0x2000 #define LOOKUP_EMPTY 0x4000 +#define LOOKUP_DOWN 0x8000 extern int path_pts(struct path *path); -- cgit v1.2.3 From 19b7ccf8651df09d274671b53039c672a52ad84d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 18 Apr 2017 18:43:20 +0200 Subject: block: get rid of blk_integrity_revalidate() Commit 25520d55cdb6 ("block: Inline blk_integrity in struct gendisk") introduced blk_integrity_revalidate(), which seems to assume ownership of the stable pages flag and unilaterally clears it if no blk_integrity profile is registered: if (bi->profile) disk->queue->backing_dev_info->capabilities |= BDI_CAP_STABLE_WRITES; else disk->queue->backing_dev_info->capabilities &= ~BDI_CAP_STABLE_WRITES; It's called from revalidate_disk() and rescan_partitions(), making it impossible to enable stable pages for drivers that support partitions and don't use blk_integrity: while the call in revalidate_disk() can be trivially worked around (see zram, which doesn't support partitions and hence gets away with zram_revalidate_disk()), rescan_partitions() can be triggered from userspace at any time. This breaks rbd, where the ceph messenger is responsible for generating/verifying CRCs. Since blk_integrity_{un,}register() "must" be used for (un)registering the integrity profile with the block layer, move BDI_CAP_STABLE_WRITES setting there. This way drivers that call blk_integrity_register() and use integrity infrastructure won't interfere with drivers that don't but still want stable pages. Fixes: 25520d55cdb6 ("block: Inline blk_integrity in struct gendisk") Cc: "Martin K. Petersen" Cc: Christoph Hellwig Cc: Mike Snitzer Cc: stable@vger.kernel.org # 4.4+, needs backporting Tested-by: Dan Williams Signed-off-by: Ilya Dryomov Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9e11082c7f9b..acff9437e5c3 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -722,11 +722,9 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) #if defined(CONFIG_BLK_DEV_INTEGRITY) extern void blk_integrity_add(struct gendisk *); extern void blk_integrity_del(struct gendisk *); -extern void blk_integrity_revalidate(struct gendisk *); #else /* CONFIG_BLK_DEV_INTEGRITY */ static inline void blk_integrity_add(struct gendisk *disk) { } static inline void blk_integrity_del(struct gendisk *disk) { } -static inline void blk_integrity_revalidate(struct gendisk *disk) { } #endif /* CONFIG_BLK_DEV_INTEGRITY */ #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From 6c478ae9204b489f6228e4b535c6ac72851e06d8 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 17 Apr 2017 22:10:04 -0500 Subject: signal: Make kill_proc_info static There are no users outside of signal.c so make the function static so the compiler and other developers have that information. Signed-off-by: "Eric W. Biederman" --- include/linux/sched/signal.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/signal.h b/include/linux/sched/signal.h index 2cf446704cd4..c06d63b3a583 100644 --- a/include/linux/sched/signal.h +++ b/include/linux/sched/signal.h @@ -293,7 +293,6 @@ extern int kill_pid_info_as_cred(int, struct siginfo *, struct pid *, const struct cred *, u32); extern int kill_pgrp(struct pid *pid, int sig, int priv); extern int kill_pid(struct pid *pid, int sig, int priv); -extern int kill_proc_info(int, struct siginfo *, pid_t); extern __must_check bool do_notify_parent(struct task_struct *, int); extern void __wake_up_parent(struct task_struct *p, struct task_struct *parent); extern void force_sig(int, struct task_struct *); -- cgit v1.2.3 From cf0c3e68aa81f992b0301f62e341b710d385bf68 Mon Sep 17 00:00:00 2001 From: Jeroen Hofstee Date: Fri, 21 Apr 2017 15:21:11 +0900 Subject: kbuild: fix asm-offset generation to work with clang KBuild abuses the asm statement to write to a file and clang chokes about these invalid asm statements. Hack it even more by fooling this is actual valid asm code. [masahiro: Import Jeroen's work for U-Boot: http://patchwork.ozlabs.org/patch/375026/ Tweak sed script a little to avoid garbage '#' for GCC case, like #define NR_PAGEFLAGS 23 /* __NR_PAGEFLAGS # */ ] Signed-off-by: Jeroen Hofstee Signed-off-by: Masahiro Yamada Reviewed-by: Matthias Kaehlcke Tested-by: Matthias Kaehlcke --- include/linux/kbuild.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kbuild.h b/include/linux/kbuild.h index 22a72198c14b..4e80f3a9ad58 100644 --- a/include/linux/kbuild.h +++ b/include/linux/kbuild.h @@ -2,14 +2,14 @@ #define __LINUX_KBUILD_H #define DEFINE(sym, val) \ - asm volatile("\n->" #sym " %0 " #val : : "i" (val)) + asm volatile("\n.ascii \"->" #sym " %0 " #val "\"" : : "i" (val)) -#define BLANK() asm volatile("\n->" : : ) +#define BLANK() asm volatile("\n.ascii \"->\"" : : ) #define OFFSET(sym, str, mem) \ DEFINE(sym, offsetof(struct str, mem)) #define COMMENT(x) \ - asm volatile("\n->#" x) + asm volatile("\n.ascii \"->#" x "\"") #endif -- cgit v1.2.3 From 8ba4fcdf0f4068407e98cd9cc0f230c2dd8d56de Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Wed, 19 Apr 2017 09:47:22 +0800 Subject: module: Unify the return value type of try_module_get The prototypes of try_module_get are different with different macro. When enable module and module unload, it returns bool, but others not. Make the return type for try_module_get consistent across all module config options. Signed-off-by: Gao Feng [jeyu: slightly amended changelog to make it clearer] Signed-off-by: Jessica Yu --- include/linux/module.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index 0297c5cd7cdf..6b79eb76a523 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -582,7 +582,7 @@ extern bool try_module_get(struct module *module); extern void module_put(struct module *module); #else /*!CONFIG_MODULE_UNLOAD*/ -static inline int try_module_get(struct module *module) +static inline bool try_module_get(struct module *module) { return !module || module_is_live(module); } @@ -674,9 +674,9 @@ static inline void __module_get(struct module *module) { } -static inline int try_module_get(struct module *module) +static inline bool try_module_get(struct module *module) { - return 1; + return true; } static inline void module_put(struct module *module) -- cgit v1.2.3 From e2460f2a4bc740fae9e23f14d653cf53e90b3f9a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Tue, 18 Apr 2017 16:51:48 -0400 Subject: dm: mark targets that pass integrity data A dm-crypt on dm-integrity device incorrectly advertises an integrity profile on the DM crypt device. It can be seen in the files "/sys/block/dm-*/integrity/*" that both dm-integrity and dm-crypt target advertise the integrity profile. That is incorrect, only the dm-integrity target should advertise the integrity profile. A general problem in DM is that if we have a DM device that depends on another device with an integrity profile, the upper device will always advertise the integrity profile, even when the target driver doesn't support handling integrity data. Most targets don't support integrity data, so we provide a whitelist of targets that support it (linear, delay and striped). The targets that support passing integrity data to the lower device are marked with the flag DM_TARGET_PASSES_INTEGRITY. The DM core will now advertise integrity data on a DM device only if all the targets support the integrity data. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 874462153f14..98f981026e4e 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -227,6 +227,12 @@ typedef unsigned (*dm_num_write_bios_fn) (struct dm_target *ti, struct bio *bio) #define DM_TARGET_INTEGRITY 0x00000010 #define dm_target_has_integrity(type) ((type)->features & DM_TARGET_INTEGRITY) +/* + * A target passes integrity data to the lower device. + */ +#define DM_TARGET_PASSES_INTEGRITY 0x00000020 +#define dm_target_passes_integrity(type) ((type)->features & DM_TARGET_PASSES_INTEGRITY) + struct dm_target { struct dm_table *table; struct target_type *type; -- cgit v1.2.3 From 49632b5822ea2af0e9531f8d20dcd5fb786093a9 Mon Sep 17 00:00:00 2001 From: "sudarsana.kalluru@cavium.com" Date: Thu, 20 Apr 2017 22:31:20 -0700 Subject: qed: Add support for static dcbx. The patch adds driver support for static/local dcbx mode. In this mode adapter brings up the dcbx link with locally configured parameters instead of performing the dcbx negotiation with the peer. The feature is useful when peer device/switch doesn't support dcbx. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index d44933a058ee..9f966be89510 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -144,6 +144,7 @@ struct qed_dcbx_operational_params { bool enabled; bool ieee; bool cee; + bool local; u32 err; }; -- cgit v1.2.3 From 82dfb540aeb277d945bf646ff780493b8a520d8a Mon Sep 17 00:00:00 2001 From: Gerard Garcia Date: Fri, 21 Apr 2017 10:10:46 +0100 Subject: VSOCK: Add virtio vsock vsockmon hooks The virtio drivers deal with struct virtio_vsock_pkt. Add virtio_transport_deliver_tap_pkt(pkt) for handing packets to the vsockmon device. We call virtio_transport_deliver_tap_pkt(pkt) from net/vmw_vsock/virtio_transport.c and drivers/vhost/vsock.c instead of common code. This is because the drivers may drop packets before handing them to common code - we still want to capture them. Signed-off-by: Gerard Garcia Signed-off-by: Stefan Hajnoczi Reviewed-by: Jorgen Hansen Signed-off-by: David S. Miller --- include/linux/virtio_vsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/virtio_vsock.h b/include/linux/virtio_vsock.h index 584f9a647ad4..ab13f0743da8 100644 --- a/include/linux/virtio_vsock.h +++ b/include/linux/virtio_vsock.h @@ -153,5 +153,6 @@ void virtio_transport_free_pkt(struct virtio_vsock_pkt *pkt); void virtio_transport_inc_tx_pkt(struct virtio_vsock_sock *vvs, struct virtio_vsock_pkt *pkt); u32 virtio_transport_get_credit(struct virtio_vsock_sock *vvs, u32 wanted); void virtio_transport_put_credit(struct virtio_vsock_sock *vvs, u32 credit); +void virtio_transport_deliver_tap_pkt(struct virtio_vsock_pkt *pkt); #endif /* _LINUX_VIRTIO_VSOCK_H */ -- cgit v1.2.3 From 69226896ad636b94f6d2e55d75ff21a29c4de83b Mon Sep 17 00:00:00 2001 From: Roger Quadros Date: Fri, 21 Apr 2017 16:15:38 +0300 Subject: mdio_bus: Issue GPIO RESET to PHYs. Some boards [1] leave the PHYs at an invalid state during system power-up or reset thus causing unreliability issues with the PHY which manifests as PHY not being detected or link not functional. To fix this, these PHYs need to be RESET via a GPIO connected to the PHY's RESET pin. Some boards have a single GPIO controlling the PHY RESET pin of all PHYs on the bus whereas some others have separate GPIOs controlling individual PHY RESETs. In both cases, the RESET de-assertion cannot be done in the PHY driver as the PHY will not probe till its reset is de-asserted. So do the RESET de-assertion in the MDIO bus driver. [1] - am572x-idk, am571x-idk, a437x-idk Signed-off-by: Roger Quadros Signed-off-by: David S. Miller --- include/linux/phy.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 624cecf69c28..37ca77d86983 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -217,6 +217,13 @@ struct mii_bus { * matching its address */ int irq[PHY_MAX_ADDR]; + + /* GPIO reset pulse width in microseconds */ + int reset_delay_us; + /* Number of reset GPIOs */ + int num_reset_gpios; + /* Array of RESET GPIO descriptors */ + struct gpio_desc **reset_gpiod; }; #define to_mii_bus(d) container_of(d, struct mii_bus, dev) -- cgit v1.2.3 From cf1ef3f0719b4dcb74810ed507e2a2540f9811b4 Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 20 Apr 2017 14:45:46 -0700 Subject: net/tcp_fastopen: Disable active side TFO in certain scenarios Middlebox firewall issues can potentially cause server's data being blackholed after a successful 3WHS using TFO. Following are the related reports from Apple: https://www.nanog.org/sites/default/files/Paasch_Network_Support.pdf Slide 31 identifies an issue where the client ACK to the server's data sent during a TFO'd handshake is dropped. C ---> syn-data ---> S C <--- syn/ack ----- S C (accept & write) C <---- data ------- S C ----- ACK -> X S [retry and timeout] https://www.ietf.org/proceedings/94/slides/slides-94-tcpm-13.pdf Slide 5 shows a similar situation that the server's data gets dropped after 3WHS. C ---- syn-data ---> S C <--- syn/ack ----- S C ---- ack --------> S S (accept & write) C? X <- data ------ S [retry and timeout] This is the worst failure b/c the client can not detect such behavior to mitigate the situation (such as disabling TFO). Failing to proceed, the application (e.g., SSL library) may simply timeout and retry with TFO again, and the process repeats indefinitely. The proposed solution is to disable active TFO globally under the following circumstances: 1. client side TFO socket detects out of order FIN 2. client side TFO socket receives out of order RST We disable active side TFO globally for 1hr at first. Then if it happens again, we disable it for 2h, then 4h, 8h, ... And we reset the timeout to 1hr if a client side TFO sockets not opened on loopback has successfully received data segs from server. And we examine this condition during close(). The rational behind it is that when such firewall issue happens, application running on the client should eventually close the socket as it is not able to get the data it is expecting. Or application running on the server should close the socket as it is not able to receive any response from client. In both cases, out of order FIN or RST will get received on the client given that the firewall will not block them as no data are in those frames. And we want to disable active TFO globally as it helps if the middle box is very close to the client and most of the connections are likely to fail. Also, add a debug sysctl: tcp_fastopen_blackhole_detect_timeout_sec: the initial timeout to use when firewall blackhole issue happens. This can be set and read. When setting it to 0, it means to disable the active disable logic. Signed-off-by: Wei Wang Acked-by: Yuchung Cheng Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index cfc2d9506ce8..cbe5b602a2d3 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -233,6 +233,7 @@ struct tcp_sock { u8 syn_data:1, /* SYN includes data */ syn_fastopen:1, /* SYN includes Fast Open option */ syn_fastopen_exp:1,/* SYN includes Fast Open exp. option */ + syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ save_syn:1, /* Save headers of SYN packet */ is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ -- cgit v1.2.3 From 029c1ecbb2429cf08c7bd2de81e929f81feea914 Mon Sep 17 00:00:00 2001 From: Benjamin LaHaise Date: Sat, 22 Apr 2017 16:52:46 -0400 Subject: flow_dissector: add mpls support (v2) Add support for parsing MPLS flows to the flow dissector in preparation for adding MPLS match support to cls_flower. Signed-off-by: Benjamin LaHaise Signed-off-by: Benjamin LaHaise Reviewed-by: Jakub Kicinski Cc: "David S. Miller" Cc: Simon Horman Cc: Jamal Hadi Salim Cc: Cong Wang Cc: Jiri Pirko Cc: Eric Dumazet Cc: Hadar Hen Zion Cc: Gao Feng Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/mpls.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mpls.h b/include/linux/mpls.h index 9999145bc190..384fb22b6c43 100644 --- a/include/linux/mpls.h +++ b/include/linux/mpls.h @@ -3,4 +3,9 @@ #include +#define MPLS_TTL_MASK (MPLS_LS_TTL_MASK >> MPLS_LS_TTL_SHIFT) +#define MPLS_BOS_MASK (MPLS_LS_S_MASK >> MPLS_LS_S_SHIFT) +#define MPLS_TC_MASK (MPLS_LS_TC_MASK >> MPLS_LS_TC_SHIFT) +#define MPLS_LABEL_MASK (MPLS_LS_LABEL_MASK >> MPLS_LS_LABEL_SHIFT) + #endif /* _LINUX_MPLS_H */ -- cgit v1.2.3 From 490cb6ddb17df5ef5f5eb33c9a34f3033b31c204 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Wed, 19 Apr 2017 17:48:55 +0100 Subject: PCI: Implement devm_pci_remap_cfgspace() The introduction of the pci_remap_cfgspace() interface allows PCI host controller drivers to map PCI config space through a dedicated kernel interface. Current PCI host controller drivers use the devm_ioremap_*() devres interfaces to map PCI configuration space regions so in order to update them to the new pci_remap_cfgspace() mapping interface a new set of devres interfaces should be implemented so that PCI host controller drivers can make use of them. Introduce two new functions in the PCI kernel layer and Devres documentation: - devm_pci_remap_cfgspace() - devm_pci_remap_cfg_resource() so that PCI host controller drivers can make use of them to map PCI configuration space regions. Signed-off-by: Lorenzo Pieralisi Signed-off-by: Bjorn Helgaas Cc: Jonathan Corbet --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index eb3da1a04e6c..70534d66d18a 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1199,6 +1199,11 @@ unsigned long pci_address_to_pio(phys_addr_t addr); phys_addr_t pci_pio_to_address(unsigned long pio); int pci_remap_iospace(const struct resource *res, phys_addr_t phys_addr); void pci_unmap_iospace(struct resource *res); +void __iomem *devm_pci_remap_cfgspace(struct device *dev, + resource_size_t offset, + resource_size_t size); +void __iomem *devm_pci_remap_cfg_resource(struct device *dev, + struct resource *res); static inline pci_bus_addr_t pci_bus_address(struct pci_dev *pdev, int bar) { -- cgit v1.2.3 From 6335d68349a85382cc55a5260d5bfda85f8e24a8 Mon Sep 17 00:00:00 2001 From: Heiner Kallweit Date: Sun, 26 Mar 2017 20:41:32 +0200 Subject: mmc: core: add mmc_get_dma_dir Add function for determining DMA direction to core. Signed-off-by: Heiner Kallweit Reviewed-by: Shawn Lin Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 83f1c4a9f03b..21385ac0c9b1 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -17,6 +17,7 @@ #include #include #include +#include struct mmc_ios { unsigned int clock; /* clock rate */ @@ -499,6 +500,11 @@ static inline bool mmc_can_retune(struct mmc_host *host) return host->can_retune == 1; } +static inline enum dma_data_direction mmc_get_dma_dir(struct mmc_data *data) +{ + return data->flags & MMC_DATA_WRITE ? DMA_TO_DEVICE : DMA_FROM_DEVICE; +} + int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); int mmc_abort_tuning(struct mmc_host *host, u32 opcode); -- cgit v1.2.3 From 7b410d074b253a44624497a18e73f666a9574f37 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Mar 2017 14:36:36 +0200 Subject: mmc: queue: Share mmc request array between partitions eMMC can have multiple internal partitions that are represented as separate disks / queues. However switching between partitions is only done when the queue is empty. Consequently the array of mmc requests that are queued can be shared between partitions saving memory. Keep a pointer to the mmc request queue on the card, and use that instead of allocating a new one for each partition. Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 77e61e0a216a..119ef8f0155c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -208,6 +208,7 @@ struct sdio_cis { struct mmc_host; struct sdio_func; struct sdio_func_tuple; +struct mmc_queue_req; #define SDIO_MAX_FUNCS 7 @@ -300,6 +301,10 @@ struct mmc_card { struct dentry *debugfs_root; struct mmc_part part[MMC_NUM_PHY_PARTITION]; /* physical partitions */ unsigned int nr_parts; + + struct mmc_queue_req *mqrq; /* Shared queue structure */ + unsigned int bouncesz; /* Bounce buffer size */ + int qdepth; /* Shared queue depth */ }; static inline bool mmc_large_sector(struct mmc_card *card) -- cgit v1.2.3 From b658af718465cd1e8011c8da281befdfc2debefd Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Mar 2017 14:36:37 +0200 Subject: mmc: mmc: Add functions to enable / disable the Command Queue Add helper functions to enable or disable the Command Queue. Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 119ef8f0155c..94637796b99c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -89,6 +89,7 @@ struct mmc_ext_csd { unsigned int boot_ro_lock; /* ro lock support */ bool boot_ro_lockable; bool ffu_capable; /* Firmware upgrade support */ + bool cmdq_en; /* Command Queue enabled */ bool cmdq_support; /* Command Queue supported */ unsigned int cmdq_depth; /* Command Queue depth */ #define MMC_FIRMWARE_LEN 8 -- cgit v1.2.3 From 9d4579a85c84340044b10ffa6cd576397f59dc93 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 13 Mar 2017 14:36:38 +0200 Subject: mmc: mmc_test: Disable Command Queue while mmc_test is used Normal read and write commands may not be used while the command queue is enabled. Disable the Command Queue when mmc_test is probed and re-enable it when it is removed. Signed-off-by: Adrian Hunter Reviewed-by: Harjani Ritesh Reviewed-by: Linus Walleij Signed-off-by: Ulf Hansson --- include/linux/mmc/card.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 94637796b99c..85b5f2bc8bb9 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -269,6 +269,8 @@ struct mmc_card { #define MMC_QUIRK_TRIM_BROKEN (1<<12) /* Skip trim */ #define MMC_QUIRK_BROKEN_HPI (1<<13) /* Disable broken HPI support */ + bool reenable_cmdq; /* Re-enable Command Queue */ + unsigned int erase_size; /* erase size in sectors */ unsigned int erase_shift; /* if erase unit is power 2 */ unsigned int pref_erase; /* in sectors */ -- cgit v1.2.3 From 33e6d74d65c358270f00d228877178964aab84b3 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 24 Apr 2017 13:41:55 -0500 Subject: mmc: core: Export API to allow hosts to get the card address Some hosts controllers, like Cavium, needs to know whether the card operates in byte- or block-address mode. Therefore export a new API, mmc_card_is_blockaddr(), which provides this information. Signed-off-by: Ulf Hansson Signed-off-by: Steven J. Hill Acked-by: David Daney --- include/linux/mmc/card.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 85b5f2bc8bb9..aad015e0152b 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -315,6 +315,8 @@ static inline bool mmc_large_sector(struct mmc_card *card) return card->ext_csd.data_sector_size == 4096; } +bool mmc_card_is_blockaddr(struct mmc_card *card); + #define mmc_card_mmc(c) ((c)->type == MMC_TYPE_MMC) #define mmc_card_sd(c) ((c)->type == MMC_TYPE_SD) #define mmc_card_sdio(c) ((c)->type == MMC_TYPE_SDIO) -- cgit v1.2.3 From 29fa6c567855eb92d21122162029c5709892106b Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Wed, 19 Apr 2017 17:36:38 -0700 Subject: f2fs: add parentheses for macro variables more This patch adds parentheses for macro variables more in include/linux/f2fs_fs.h. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index e2d239ed4c60..639cbdf65e2b 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -32,9 +32,9 @@ /* 0, 1(node nid), 2(meta nid) are reserved node id */ #define F2FS_RESERVED_NODE_NUM 3 -#define F2FS_ROOT_INO(sbi) (sbi->root_ino_num) -#define F2FS_NODE_INO(sbi) (sbi->node_ino_num) -#define F2FS_META_INO(sbi) (sbi->meta_ino_num) +#define F2FS_ROOT_INO(sbi) ((sbi)->root_ino_num) +#define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) +#define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) #define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ @@ -161,7 +161,7 @@ struct f2fs_checkpoint { */ #define F2FS_ORPHANS_PER_BLOCK 1020 -#define GET_ORPHAN_BLOCKS(n) ((n + F2FS_ORPHANS_PER_BLOCK - 1) / \ +#define GET_ORPHAN_BLOCKS(n) (((n) + F2FS_ORPHANS_PER_BLOCK - 1) / \ F2FS_ORPHANS_PER_BLOCK) struct f2fs_orphan_block { @@ -449,7 +449,7 @@ typedef __le32 f2fs_hash_t; #define F2FS_SLOT_LEN 8 #define F2FS_SLOT_LEN_BITS 3 -#define GET_DENTRY_SLOTS(x) ((x + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) +#define GET_DENTRY_SLOTS(x) (((x) + F2FS_SLOT_LEN - 1) >> F2FS_SLOT_LEN_BITS) /* MAX level for dir lookup */ #define MAX_DIR_HASH_DEPTH 63 -- cgit v1.2.3 From e390b55d5aefe2b51569068b2a505d19d72afbf1 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 24 Apr 2017 22:14:35 +0200 Subject: bpf: make bpf_xdp_adjust_head support mandatory Now that also the last in-tree user of the xdp_adjust_head bit has been removed, we can remove the flag from struct bpf_prog altogether. This, at the same time, also makes sure that any future driver for XDP comes with bpf_xdp_adjust_head() support right away. A rejection based on this flag would also mean that tail calls couldn't be used with such driver as per c2002f983767 ("bpf: fix checking xdp_adjust_head on tail calls") fix, thus lets not allow for it in the first place. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 511fe910bf1d..9a7786db14fa 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -413,8 +413,7 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1, /* Do we need dst entry? */ - xdp_adjust_head:1; /* Adjusting pkt head? */ + dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ -- cgit v1.2.3 From c3df7c5755ee1a53cd56a4efcf3426334ab9eea4 Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 19 Jan 2017 16:31:06 +0100 Subject: can: peak: move header file to new can common subdir The CAN-FD IP from PEAK-System runs into several kinds of PC CAN-FD interfaces. Up to now, only the USB CAN-FD adapters were supported by the Kernel. In order to prepare the adding of some new non-USB CAN-FD interfaces, this patch moves - and rename - the IP definitions file from its private (usb) sub-directory into a - newly created - CAN specific one. Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev/peak_canfd.h | 243 +++++++++++++++++++++++++++++++++++++ 1 file changed, 243 insertions(+) create mode 100644 include/linux/can/dev/peak_canfd.h (limited to 'include/linux') diff --git a/include/linux/can/dev/peak_canfd.h b/include/linux/can/dev/peak_canfd.h new file mode 100644 index 000000000000..25e20ef2fef8 --- /dev/null +++ b/include/linux/can/dev/peak_canfd.h @@ -0,0 +1,243 @@ +/* + * CAN driver for PEAK System micro-CAN based adapters + * + * Copyright (C) 2003-2011 PEAK System-Technik GmbH + * Copyright (C) 2011-2013 Stephane Grosjean + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published + * by the Free Software Foundation; version 2 of the License. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + */ +#ifndef PUCAN_H +#define PUCAN_H + +/* uCAN commands opcodes list (low-order 10 bits) */ +#define PUCAN_CMD_NOP 0x000 +#define PUCAN_CMD_RESET_MODE 0x001 +#define PUCAN_CMD_NORMAL_MODE 0x002 +#define PUCAN_CMD_LISTEN_ONLY_MODE 0x003 +#define PUCAN_CMD_TIMING_SLOW 0x004 +#define PUCAN_CMD_TIMING_FAST 0x005 +#define PUCAN_CMD_FILTER_STD 0x008 +#define PUCAN_CMD_TX_ABORT 0x009 +#define PUCAN_CMD_WR_ERR_CNT 0x00a +#define PUCAN_CMD_SET_EN_OPTION 0x00b +#define PUCAN_CMD_CLR_DIS_OPTION 0x00c +#define PUCAN_CMD_END_OF_COLLECTION 0x3ff + +/* uCAN received messages list */ +#define PUCAN_MSG_CAN_RX 0x0001 +#define PUCAN_MSG_ERROR 0x0002 +#define PUCAN_MSG_STATUS 0x0003 +#define PUCAN_MSG_BUSLOAD 0x0004 +#define PUCAN_MSG_CAN_TX 0x1000 + +/* uCAN command common header */ +struct __packed pucan_command { + __le16 opcode_channel; + u16 args[3]; +}; + +#define PUCAN_TSLOW_BRP_BITS 10 +#define PUCAN_TSLOW_TSGEG1_BITS 8 +#define PUCAN_TSLOW_TSGEG2_BITS 7 +#define PUCAN_TSLOW_SJW_BITS 7 + +#define PUCAN_TSLOW_BRP_MASK ((1 << PUCAN_TSLOW_BRP_BITS) - 1) +#define PUCAN_TSLOW_TSEG1_MASK ((1 << PUCAN_TSLOW_TSGEG1_BITS) - 1) +#define PUCAN_TSLOW_TSEG2_MASK ((1 << PUCAN_TSLOW_TSGEG2_BITS) - 1) +#define PUCAN_TSLOW_SJW_MASK ((1 << PUCAN_TSLOW_SJW_BITS) - 1) + +/* uCAN TIMING_SLOW command fields */ +#define PUCAN_TSLOW_SJW_T(s, t) (((s) & PUCAN_TSLOW_SJW_MASK) | \ + ((!!(t)) << 7)) +#define PUCAN_TSLOW_TSEG2(t) ((t) & PUCAN_TSLOW_TSEG2_MASK) +#define PUCAN_TSLOW_TSEG1(t) ((t) & PUCAN_TSLOW_TSEG1_MASK) +#define PUCAN_TSLOW_BRP(b) ((b) & PUCAN_TSLOW_BRP_MASK) + +struct __packed pucan_timing_slow { + __le16 opcode_channel; + + u8 ewl; /* Error Warning limit */ + u8 sjw_t; /* Sync Jump Width + Triple sampling */ + u8 tseg2; /* Timing SEGment 2 */ + u8 tseg1; /* Timing SEGment 1 */ + + __le16 brp; /* BaudRate Prescaler */ +}; + +#define PUCAN_TFAST_BRP_BITS 10 +#define PUCAN_TFAST_TSGEG1_BITS 5 +#define PUCAN_TFAST_TSGEG2_BITS 4 +#define PUCAN_TFAST_SJW_BITS 4 + +#define PUCAN_TFAST_BRP_MASK ((1 << PUCAN_TFAST_BRP_BITS) - 1) +#define PUCAN_TFAST_TSEG1_MASK ((1 << PUCAN_TFAST_TSGEG1_BITS) - 1) +#define PUCAN_TFAST_TSEG2_MASK ((1 << PUCAN_TFAST_TSGEG2_BITS) - 1) +#define PUCAN_TFAST_SJW_MASK ((1 << PUCAN_TFAST_SJW_BITS) - 1) + +/* uCAN TIMING_FAST command fields */ +#define PUCAN_TFAST_SJW(s) ((s) & PUCAN_TFAST_SJW_MASK) +#define PUCAN_TFAST_TSEG2(t) ((t) & PUCAN_TFAST_TSEG2_MASK) +#define PUCAN_TFAST_TSEG1(t) ((t) & PUCAN_TFAST_TSEG1_MASK) +#define PUCAN_TFAST_BRP(b) ((b) & PUCAN_TFAST_BRP_MASK) + +struct __packed pucan_timing_fast { + __le16 opcode_channel; + + u8 unused; + u8 sjw; /* Sync Jump Width */ + u8 tseg2; /* Timing SEGment 2 */ + u8 tseg1; /* Timing SEGment 1 */ + + __le16 brp; /* BaudRate Prescaler */ +}; + +/* uCAN FILTER_STD command fields */ +#define PUCAN_FLTSTD_ROW_IDX_BITS 6 + +struct __packed pucan_filter_std { + __le16 opcode_channel; + + __le16 idx; + __le32 mask; /* CAN-ID bitmask in idx range */ +}; + +/* uCAN WR_ERR_CNT command fields */ +#define PUCAN_WRERRCNT_TE 0x4000 /* Tx error cntr write Enable */ +#define PUCAN_WRERRCNT_RE 0x8000 /* Rx error cntr write Enable */ + +struct __packed pucan_wr_err_cnt { + __le16 opcode_channel; + + __le16 sel_mask; + u8 tx_counter; /* Tx error counter new value */ + u8 rx_counter; /* Rx error counter new value */ + + u16 unused; +}; + +/* uCAN SET_EN/CLR_DIS _OPTION command fields */ +#define PUCAN_OPTION_ERROR 0x0001 +#define PUCAN_OPTION_BUSLOAD 0x0002 +#define PUCAN_OPTION_CANDFDISO 0x0004 + +struct __packed pucan_options { + __le16 opcode_channel; + + __le16 options; + u32 unused; +}; + +/* uCAN received messages global format */ +struct __packed pucan_msg { + __le16 size; + __le16 type; + __le32 ts_low; + __le32 ts_high; +}; + +/* uCAN flags for CAN/CANFD messages */ +#define PUCAN_MSG_SELF_RECEIVE 0x80 +#define PUCAN_MSG_ERROR_STATE_IND 0x40 /* error state indicator */ +#define PUCAN_MSG_BITRATE_SWITCH 0x20 /* bitrate switch */ +#define PUCAN_MSG_EXT_DATA_LEN 0x10 /* extended data length */ +#define PUCAN_MSG_SINGLE_SHOT 0x08 +#define PUCAN_MSG_LOOPED_BACK 0x04 +#define PUCAN_MSG_EXT_ID 0x02 +#define PUCAN_MSG_RTR 0x01 + +struct __packed pucan_rx_msg { + __le16 size; + __le16 type; + __le32 ts_low; + __le32 ts_high; + __le32 tag_low; + __le32 tag_high; + u8 channel_dlc; + u8 client; + __le16 flags; + __le32 can_id; + u8 d[0]; +}; + +/* uCAN error types */ +#define PUCAN_ERMSG_BIT_ERROR 0 +#define PUCAN_ERMSG_FORM_ERROR 1 +#define PUCAN_ERMSG_STUFF_ERROR 2 +#define PUCAN_ERMSG_OTHER_ERROR 3 +#define PUCAN_ERMSG_ERR_CNT_DEC 4 + +struct __packed pucan_error_msg { + __le16 size; + __le16 type; + __le32 ts_low; + __le32 ts_high; + u8 channel_type_d; + u8 code_g; + u8 tx_err_cnt; + u8 rx_err_cnt; +}; + +#define PUCAN_BUS_PASSIVE 0x20 +#define PUCAN_BUS_WARNING 0x40 +#define PUCAN_BUS_BUSOFF 0x80 + +struct __packed pucan_status_msg { + __le16 size; + __le16 type; + __le32 ts_low; + __le32 ts_high; + u8 channel_p_w_b; + u8 unused[3]; +}; + +/* uCAN transmitted message format */ +#define PUCAN_MSG_CHANNEL_DLC(c, d) (((c) & 0xf) | ((d) << 4)) + +struct __packed pucan_tx_msg { + __le16 size; + __le16 type; + __le32 tag_low; + __le32 tag_high; + u8 channel_dlc; + u8 client; + __le16 flags; + __le32 can_id; + u8 d[0]; +}; + +/* build the cmd opcode_channel field with respect to the correct endianness */ +static inline __le16 pucan_cmd_opcode_channel(int index, int opcode) +{ + return cpu_to_le16(((index) << 12) | ((opcode) & 0x3ff)); +} + +/* return the channel number part from any received message channel_dlc field */ +static inline int pucan_msg_get_channel(const struct pucan_rx_msg *msg) +{ + return msg->channel_dlc & 0xf; +} + +/* return the dlc value from any received message channel_dlc field */ +static inline int pucan_msg_get_dlc(const struct pucan_rx_msg *msg) +{ + return msg->channel_dlc >> 4; +} + +static inline int pucan_ermsg_get_channel(const struct pucan_error_msg *msg) +{ + return msg->channel_type_d & 0x0f; +} + +static inline int pucan_stmsg_get_channel(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & 0x0f; +} + +#endif -- cgit v1.2.3 From 8ac8321e4a7981312348413b9ec314fd93d71a0c Mon Sep 17 00:00:00 2001 From: Stephane Grosjean Date: Thu, 19 Jan 2017 16:31:07 +0100 Subject: can: peak: add support for PEAK PCAN-PCIe FD CAN-FD boards MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch adds the support of the PCAN-PCI Express FD boards made by PEAK-System, for computers using the PCI Express slot. The PCAN-PCI Express FD has one or two CAN FD channels, depending on the model. A galvanic isolation of the CAN ports protects the electronics of the card and the respective computer against disturbances of up to 500 Volts. The PCAN-PCI Express FD can be operated with ambient temperatures in a range of -40 to +85 °C. Such boards run an extented version of the CAN-FD IP running into USB CAN-FD interfaces from PEAK-System, so this patch adds several new commands and their corresponding data types to the PEAK CAN-FD common definitions header file too. Signed-off-by: Stephane Grosjean Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev/peak_canfd.h | 65 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 65 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/dev/peak_canfd.h b/include/linux/can/dev/peak_canfd.h index 25e20ef2fef8..46dceef2cfa6 100644 --- a/include/linux/can/dev/peak_canfd.h +++ b/include/linux/can/dev/peak_canfd.h @@ -23,11 +23,14 @@ #define PUCAN_CMD_LISTEN_ONLY_MODE 0x003 #define PUCAN_CMD_TIMING_SLOW 0x004 #define PUCAN_CMD_TIMING_FAST 0x005 +#define PUCAN_CMD_SET_STD_FILTER 0x006 +#define PUCAN_CMD_RESERVED2 0x007 #define PUCAN_CMD_FILTER_STD 0x008 #define PUCAN_CMD_TX_ABORT 0x009 #define PUCAN_CMD_WR_ERR_CNT 0x00a #define PUCAN_CMD_SET_EN_OPTION 0x00b #define PUCAN_CMD_CLR_DIS_OPTION 0x00c +#define PUCAN_CMD_RX_BARRIER 0x010 #define PUCAN_CMD_END_OF_COLLECTION 0x3ff /* uCAN received messages list */ @@ -35,6 +38,10 @@ #define PUCAN_MSG_ERROR 0x0002 #define PUCAN_MSG_STATUS 0x0003 #define PUCAN_MSG_BUSLOAD 0x0004 + +#define PUCAN_MSG_CACHE_CRITICAL 0x0102 + +/* uCAN transmitted messages */ #define PUCAN_MSG_CAN_TX 0x1000 /* uCAN command common header */ @@ -43,6 +50,12 @@ struct __packed pucan_command { u16 args[3]; }; +/* return the opcode from the opcode_channel field of a command */ +static inline u16 pucan_cmd_get_opcode(struct pucan_command *c) +{ + return le16_to_cpu(c->opcode_channel) & 0x3ff; +} + #define PUCAN_TSLOW_BRP_BITS 10 #define PUCAN_TSLOW_TSGEG1_BITS 8 #define PUCAN_TSLOW_TSGEG2_BITS 7 @@ -108,6 +121,27 @@ struct __packed pucan_filter_std { __le32 mask; /* CAN-ID bitmask in idx range */ }; +#define PUCAN_FLTSTD_ROW_IDX_MAX ((1 << PUCAN_FLTSTD_ROW_IDX_BITS) - 1) + +/* uCAN SET_STD_FILTER command fields */ +struct __packed pucan_std_filter { + __le16 opcode_channel; + + u8 unused; + u8 idx; + __le32 mask; /* CAN-ID bitmask in idx range */ +}; + +/* uCAN TX_ABORT commands fields */ +#define PUCAN_TX_ABORT_FLUSH 0x0001 + +struct __packed pucan_tx_abort { + __le16 opcode_channel; + + __le16 flags; + u32 unused; +}; + /* uCAN WR_ERR_CNT command fields */ #define PUCAN_WRERRCNT_TE 0x4000 /* Tx error cntr write Enable */ #define PUCAN_WRERRCNT_RE 0x8000 /* Rx error cntr write Enable */ @@ -184,6 +218,12 @@ struct __packed pucan_error_msg { u8 rx_err_cnt; }; +static inline int pucan_error_get_channel(const struct pucan_error_msg *msg) +{ + return msg->channel_type_d & 0x0f; +} + +#define PUCAN_RX_BARRIER 0x10 #define PUCAN_BUS_PASSIVE 0x20 #define PUCAN_BUS_WARNING 0x40 #define PUCAN_BUS_BUSOFF 0x80 @@ -197,6 +237,31 @@ struct __packed pucan_status_msg { u8 unused[3]; }; +static inline int pucan_status_get_channel(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & 0x0f; +} + +static inline int pucan_status_is_rx_barrier(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & PUCAN_RX_BARRIER; +} + +static inline int pucan_status_is_passive(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & PUCAN_BUS_PASSIVE; +} + +static inline int pucan_status_is_warning(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & PUCAN_BUS_WARNING; +} + +static inline int pucan_status_is_busoff(const struct pucan_status_msg *msg) +{ + return msg->channel_p_w_b & PUCAN_BUS_BUSOFF; +} + /* uCAN transmitted message format */ #define PUCAN_MSG_CHANNEL_DLC(c, d) (((c) & 0xf) | ((d) << 4)) -- cgit v1.2.3 From cb5635a3677679666e4e81ecbb209d32f13dedcd Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Tue, 25 Apr 2017 08:19:41 +0200 Subject: can: complete initial namespace support The statistics and its proc output was not implemented as per-net in the initial network namespace support by Mario Kicherer (8e8cda6d737d). This patch adds the missing per-net statistics for the CAN subsystem. Signed-off-by: Oliver Hartkopp Signed-off-by: Marc Kleine-Budde --- include/linux/can/core.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/core.h b/include/linux/can/core.h index 319a0da827b8..c9a17bb1221c 100644 --- a/include/linux/can/core.h +++ b/include/linux/can/core.h @@ -5,7 +5,7 @@ * * Authors: Oliver Hartkopp * Urs Thuermann - * Copyright (c) 2002-2007 Volkswagen Group Electronic Research + * Copyright (c) 2002-2017 Volkswagen Group Electronic Research * All rights reserved. * */ @@ -17,7 +17,7 @@ #include #include -#define CAN_VERSION "20120528" +#define CAN_VERSION "20170425" /* increment this number each time you change some user-space interface */ #define CAN_ABI_VERSION "9" -- cgit v1.2.3 From f107d7a43923a83d837b3ea3c7b7de58cd014bbd Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 16 Mar 2017 09:02:42 +0100 Subject: mtd: nand: Remove unused chip->write_page() hook The last/only user of the chip->write_page() hook (the Atmel NAND controller driver) has been reworked and is no longer specifying a custom ->write_page() implementation. Drop this hook before someone else start abusing it. Signed-off-by: Boris Brezillon Reviewed-by: Masahiro Yamada --- include/linux/mtd/nand.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c7de017c7f4c..40657939797c 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -828,7 +828,6 @@ struct nand_manufacturer_ops { * @errstat: [OPTIONAL] hardware specific function to perform * additional error status checks (determine if errors are * correctable). - * @write_page: [REPLACEABLE] High-level page write function * @manufacturer: [INTERN] Contains manufacturer information */ @@ -854,9 +853,6 @@ struct nand_chip { int (*scan_bbt)(struct mtd_info *mtd); int (*errstat)(struct mtd_info *mtd, struct nand_chip *this, int state, int status, int page); - int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip, - uint32_t offset, int data_len, const uint8_t *buf, - int oob_required, int page, int cached, int raw); int (*onfi_set_features)(struct mtd_info *mtd, struct nand_chip *chip, int feature_addr, uint8_t *subfeature_para); int (*onfi_get_features)(struct mtd_info *mtd, struct nand_chip *chip, -- cgit v1.2.3 From 07604686e808cd93d352172806a7828860f048f5 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 30 Mar 2017 15:45:47 +0900 Subject: mtd: nand: relax ecc.read_page() return value for uncorrectable ECC The comment for ecc.read_page() requires that it should return "0 if bitflips uncorrectable". Actually, drivers could return positive values when uncorrectable bitflips occur. For example, nand_read_page_swecc() is the case. If ecc.correct() returns -EBADMSG for the first ECC sector, and a positive value for the second one, nand_read_page_swecc() returns a positive max_bitflips and increments ecc_stats.failed for the same page. The requirement can be relaxed by tweaking nand_do_read_ops(). Move the max_bitflips calculation below the retry. Signed-off-by: Masahiro Yamada Suggested-by: Boris Brezillon Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 40657939797c..9e0c93c44bef 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -516,7 +516,7 @@ static inline void nand_hw_control_init(struct nand_hw_control *nfc) * out-of-band data). * @read_page: function to read a page according to the ECC generator * requirements; returns maximum number of bitflips corrected in - * any single ECC step, 0 if bitflips uncorrectable, -EIO hw error + * any single ECC step, -EIO hw error * @read_subpage: function to read parts of the page covered by ECC; * returns same as read_page() * @write_subpage: function to write parts of the page covered by ECC. -- cgit v1.2.3 From 477544c62a84d3bacd9f90ba75ffc16c04d78071 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 30 Mar 2017 17:15:05 +0900 Subject: mtd: nand: allow drivers to request minimum alignment for passed buffer In some cases, nand_do_{read,write}_ops is passed with unaligned ops->datbuf. Drivers using DMA will be unhappy about unaligned buffer. The new struct member, buf_align, represents the minimum alignment the driver require for the buffer. If the buffer passed from the upper MTD layer does not have enough alignment, nand_do_*_ops will use bufpoi. Signed-off-by: Masahiro Yamada Signed-off-by: Boris Brezillon --- include/linux/mtd/nand.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 9e0c93c44bef..8f67b1581683 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -755,6 +755,7 @@ struct nand_manufacturer_ops { * setting the read-retry mode. Mostly needed for MLC NAND. * @ecc: [BOARDSPECIFIC] ECC control structure * @buffers: buffer structure for read/write + * @buf_align: minimum buffer alignment required by a platform * @hwcontrol: platform-specific hardware control structure * @erase: [REPLACEABLE] erase function * @scan_bbt: [REPLACEABLE] function to scan bad block table @@ -905,6 +906,7 @@ struct nand_chip { struct nand_ecc_ctrl ecc; struct nand_buffers *buffers; + unsigned long buf_align; struct nand_hw_control hwcontrol; uint8_t *bbt; -- cgit v1.2.3 From 22161f3eb65dc29434325736c4d780908fe3bf6a Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 18 Apr 2017 11:43:49 +0100 Subject: regulator: arizona-micsupp: Move pdata into a separate structure In preparation for sharing this driver with Madera, move the pdata for the micsupp regulator out of struct arizona_pdata into a dedicated pdata struct for this driver. As a result the code in arizona_micsupp_of_get_pdata() can be made independent of struct arizona. This patch also updates the definition of struct arizona_pdata and the use of this pdata in mach-crag6410-module.c Signed-off-by: Richard Fitzgerald Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/arizona/pdata.h | 3 ++- include/linux/regulator/arizona-micsupp.h | 21 +++++++++++++++++++++ 2 files changed, 23 insertions(+), 1 deletion(-) create mode 100644 include/linux/regulator/arizona-micsupp.h (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 64faeeff698c..43e875f9850c 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -12,6 +12,7 @@ #define _ARIZONA_PDATA_H #include +#include #define ARIZONA_GPN_DIR_MASK 0x8000 /* GPN_DIR */ #define ARIZONA_GPN_DIR_SHIFT 15 /* GPN_DIR */ @@ -79,7 +80,7 @@ struct arizona_pdata { int ldoena; /** GPIO controlling LODENA, if any */ /** Regulator configuration for MICVDD */ - struct regulator_init_data *micvdd; + struct arizona_micsupp_pdata micvdd; /** Regulator configuration for LDO1 */ struct regulator_init_data *ldo1; diff --git a/include/linux/regulator/arizona-micsupp.h b/include/linux/regulator/arizona-micsupp.h new file mode 100644 index 000000000000..616842619c00 --- /dev/null +++ b/include/linux/regulator/arizona-micsupp.h @@ -0,0 +1,21 @@ +/* + * Platform data for Arizona micsupp regulator + * + * Copyright 2017 Cirrus Logic + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ARIZONA_MICSUPP_H +#define ARIZONA_MICSUPP_H + +struct regulator_init_data; + +struct arizona_micsupp_pdata { + /** Regulator configuration for micsupp */ + const struct regulator_init_data *init_data; +}; + +#endif -- cgit v1.2.3 From aaa84e6a0399df374634c42590e644a698fcc3ff Mon Sep 17 00:00:00 2001 From: Richard Fitzgerald Date: Tue, 18 Apr 2017 11:43:52 +0100 Subject: regulator: arizona-ldo1: Move pdata into a separate structure In preparation for sharing this driver with Madera, move the pdata for the LDO1 regulator out of struct arizona_pdata into a dedicated pdata struct for this driver. As a result the code in arizona_ldo1_of_get_pdata() can be made independent of struct arizona. This patch also updates the definition of struct arizona_pdata and the use of this pdata in mach-crag6410-module.c Signed-off-by: Richard Fitzgerald Acked-by: Krzysztof Kozlowski Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/arizona/pdata.h | 4 ++-- include/linux/regulator/arizona-ldo1.h | 24 ++++++++++++++++++++++++ 2 files changed, 26 insertions(+), 2 deletions(-) create mode 100644 include/linux/regulator/arizona-ldo1.h (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 43e875f9850c..bfeecf179895 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -12,6 +12,7 @@ #define _ARIZONA_PDATA_H #include +#include #include #define ARIZONA_GPN_DIR_MASK 0x8000 /* GPN_DIR */ @@ -77,13 +78,12 @@ struct arizona_micd_range { struct arizona_pdata { int reset; /** GPIO controlling /RESET, if any */ - int ldoena; /** GPIO controlling LODENA, if any */ /** Regulator configuration for MICVDD */ struct arizona_micsupp_pdata micvdd; /** Regulator configuration for LDO1 */ - struct regulator_init_data *ldo1; + struct arizona_ldo1_pdata ldo1; /** If a direct 32kHz clock is provided on an MCLK specify it here */ int clk32k_src; diff --git a/include/linux/regulator/arizona-ldo1.h b/include/linux/regulator/arizona-ldo1.h new file mode 100644 index 000000000000..c685f1277c63 --- /dev/null +++ b/include/linux/regulator/arizona-ldo1.h @@ -0,0 +1,24 @@ +/* + * Platform data for Arizona LDO1 regulator + * + * Copyright 2017 Cirrus Logic + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef ARIZONA_LDO1_H +#define ARIZONA_LDO1_H + +struct regulator_init_data; + +struct arizona_ldo1_pdata { + /** GPIO controlling LDOENA, if any */ + int ldoena; + + /** Regulator configuration for LDO1 */ + const struct regulator_init_data *init_data; +}; + +#endif -- cgit v1.2.3 From 19489c7f0d9040ed2ffc23747e14af95dba479d2 Mon Sep 17 00:00:00 2001 From: "Chopra, Manish" Date: Mon, 24 Apr 2017 10:00:45 -0700 Subject: qed/qede: Enable tunnel offloads based on hw configuration This patch enables tunnel feature offloads based on hw configuration at initialization time instead of enabling them always. Signed-off-by: Manish Chopra Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 9f966be89510..5544d7b2f2bb 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -338,6 +338,11 @@ struct qed_dev_info { bool wol_support; enum qed_dev_type dev_type; + + /* Output parameters for qede */ + bool vxlan_enable; + bool gre_enable; + bool geneve_enable; }; enum qed_sb_type { -- cgit v1.2.3 From 97379f15c21e7ae27eb1ecf84adcace42c960c87 Mon Sep 17 00:00:00 2001 From: "Chopra, Manish" Date: Mon, 24 Apr 2017 10:00:48 -0700 Subject: qed/qede: Add UDP ports in bulletin board This patch adds support for UDP ports in bulletin board to notify UDP ports change to the VFs Signed-off-by: Manish Chopra Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 1eba803cb7f1..15fa7c6e4c6f 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -158,6 +158,7 @@ struct qed_tunn_params { struct qed_eth_cb_ops { struct qed_common_cb_ops common; void (*force_mac) (void *dev, u8 *mac, bool forced); + void (*ports_update)(void *dev, u16 vxlan_port, u16 geneve_port); }; #define QED_MAX_PHC_DRIFT_PPB 291666666 -- cgit v1.2.3 From b5cdae3291f7be7a34e75affe4c0ec1f7f328b64 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 18 Apr 2017 15:36:58 -0400 Subject: net: Generic XDP This provides a generic SKB based non-optimized XDP path which is used if either the driver lacks a specific XDP implementation, or the user requests it via a new IFLA_XDP_FLAGS value named XDP_FLAGS_SKB_MODE. It is arguable that perhaps I should have required something like this as part of the initial XDP feature merge. I believe this is critical for two reasons: 1) Accessibility. More people can play with XDP with less dependencies. Yes I know we have XDP support in virtio_net, but that just creates another depedency for learning how to use this facility. I wrote this to make life easier for the XDP newbies. 2) As a model for what the expected semantics are. If there is a pure generic core implementation, it serves as a semantic example for driver folks adding XDP support. One thing I have not tried to address here is the issue of XDP_PACKET_HEADROOM, thanks to Daniel for spotting that. It seems incredibly expensive to do a skb_cow(skb, XDP_PACKET_HEADROOM) or whatever even if the XDP program doesn't try to push headers at all. I think we really need the verifier to somehow propagate whether certain XDP helpers are used or not. v5: - Handle both negative and positive offset after running prog - Fix mac length in XDP_TX case (Alexei) - Use rcu_dereference_protected() in free_netdev (kbuild test robot) v4: - Fix MAC header adjustmnet before calling prog (David Ahern) - Disable LRO when generic XDP is installed (Michael Chan) - Bypass qdisc et al. on XDP_TX and record the event (Alexei) - Do not perform generic XDP on reinjected packets (DaveM) v3: - Make sure XDP program sees packet at MAC header, push back MAC header if we do XDP_TX. (Alexei) - Elide GRO when generic XDP is in use. (Alexei) - Add XDP_FLAG_SKB_MODE flag which the user can use to request generic XDP even if the driver has an XDP implementation. (Alexei) - Report whether SKB mode is in use in rtnl_xdp_fill() via XDP_FLAGS attribute. (Daniel) v2: - Add some "fall through" comments in switch statements based upon feedback from Andrew Lunn - Use RCU for generic xdp_prog, thanks to Johannes Berg. Tested-by: Andy Gospodarek Tested-by: Jesper Dangaard Brouer Tested-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5d5267febd56..46d220c2bf92 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1905,9 +1905,17 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; struct lock_class_key *qdisc_running_key; bool proto_down; + struct bpf_prog __rcu *xdp_prog; }; #define to_net_dev(d) container_of(d, struct net_device, dev) +static inline bool netif_elide_gro(const struct net_device *dev) +{ + if (!(dev->features & NETIF_F_GRO) || dev->xdp_prog) + return true; + return false; +} + #define NETDEV_ALIGN 32 static inline -- cgit v1.2.3 From 817bf40265459578abc36c6bd53e27775b5c7ec4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 12 Apr 2017 13:37:44 -0700 Subject: dm: teach dm-targets to use a dax_device + dax_operations Arrange for dm to lookup the dax services available from member devices. Update the dax-capable targets, linear and stripe, to route dax operations to the underlying device. Changes the target-internal ->direct_access() method to more closely align with the dax_operations ->direct_access() calling convention. Cc: Toshi Kani Reviewed-by: Mike Snitzer Signed-off-by: Dan Williams --- include/linux/device-mapper.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index bcba4d89089c..df830d167892 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -128,14 +128,15 @@ typedef int (*dm_busy_fn) (struct dm_target *ti); * < 0 : error * >= 0 : the number of bytes accessible at the address */ -typedef long (*dm_direct_access_fn) (struct dm_target *ti, sector_t sector, - void **kaddr, pfn_t *pfn, long size); +typedef long (*dm_dax_direct_access_fn) (struct dm_target *ti, pgoff_t pgoff, + long nr_pages, void **kaddr, pfn_t *pfn); #define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); struct dm_dev { struct block_device *bdev; + struct dax_device *dax_dev; fmode_t mode; char name[16]; }; @@ -177,7 +178,7 @@ struct target_type { dm_busy_fn busy; dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; - dm_direct_access_fn direct_access; + dm_dax_direct_access_fn direct_access; /* For internal device-mapper use. */ struct list_head list; -- cgit v1.2.3 From fa5d932c323e8e0d9b24b3517997d15b36d1607d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 27 Jan 2017 12:04:59 -0800 Subject: ext2, ext4, xfs: retrieve dax_device for iomap operations In preparation for converting fs/dax.c to use dax_direct_access() instead of bdev_direct_access(), add the plumbing to retrieve the dax_device associated with a given block_device. Signed-off-by: Dan Williams --- include/linux/iomap.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 7291810067eb..f753e788da31 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -41,6 +41,7 @@ struct iomap { u16 type; /* type of mapping */ u16 flags; /* flags for mapping */ struct block_device *bdev; /* block device for I/O */ + struct dax_device *dax_dev; /* dax_dev for dax operations */ }; /* -- cgit v1.2.3 From a41fe02b6bba853a29c864d00fd161bbe6cfc715 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 27 Jan 2017 14:13:15 -0800 Subject: Revert "block: use DAX for partition table reads" commit d1a5f2b4d8a1 ("block: use DAX for partition table reads") was part of a stalled effort to allow dax mappings of block devices. Since then the device-dax mechanism has filled the role of dax-mapping static device ranges. Now that we are moving ->direct_access() from a block_device operation to a dax_inode operation we would need block devices to map and carry their own dax_inode reference. Unless / until we decide to revive dax mapping of raw block devices through the dax_inode scheme, there is no need to carry read_dax_sector(). Its removal in turn allows for the removal of bdev_direct_access() and should have been included in commit 223757016837 ("block_dev: remove DAX leftovers"). Cc: Jeff Moyer Signed-off-by: Dan Williams --- include/linux/dax.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 7e62e280c11f..0d0d890f9186 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -70,15 +70,9 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, pgoff_t index, void *entry, bool wake_all); #ifdef CONFIG_FS_DAX -struct page *read_dax_sector(struct block_device *bdev, sector_t n); int __dax_zero_page_range(struct block_device *bdev, sector_t sector, unsigned int offset, unsigned int length); #else -static inline struct page *read_dax_sector(struct block_device *bdev, - sector_t n) -{ - return ERR_PTR(-ENXIO); -} static inline int __dax_zero_page_range(struct block_device *bdev, sector_t sector, unsigned int offset, unsigned int length) { -- cgit v1.2.3 From cccbce67158290537cc671cbd4c1564876485a65 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 27 Jan 2017 13:31:42 -0800 Subject: filesystem-dax: convert to dax_direct_access() Now that a dax_device is plumbed through all dax-capable drivers we can switch from block_device_operations to dax_operations for invoking ->direct_access. This also lets us kill off some usages of struct blk_dax_ctl on the way to its eventual removal. Suggested-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/dax.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 0d0d890f9186..d3158e74a59e 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -70,11 +70,13 @@ void dax_wake_mapping_entry_waiter(struct address_space *mapping, pgoff_t index, void *entry, bool wake_all); #ifdef CONFIG_FS_DAX -int __dax_zero_page_range(struct block_device *bdev, sector_t sector, +int __dax_zero_page_range(struct block_device *bdev, + struct dax_device *dax_dev, sector_t sector, unsigned int offset, unsigned int length); #else static inline int __dax_zero_page_range(struct block_device *bdev, - sector_t sector, unsigned int offset, unsigned int length) + struct dax_device *dax_dev, sector_t sector, + unsigned int offset, unsigned int length) { return -ENXIO; } -- cgit v1.2.3 From d4b29fd78ea6fc2be219be3af1a992149b4ff0f6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 27 Jan 2017 17:22:03 -0800 Subject: block: remove block_device_operations ->direct_access() Now that all the producers and consumers of dax interfaces have been converted to using dax_operations on a dax_device, remove the block device direct_access enabling. Signed-off-by: Dan Williams --- include/linux/blkdev.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 612c497d1461..848f87eb1905 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1916,28 +1916,12 @@ static inline bool integrity_req_gap_front_merge(struct request *req, #endif /* CONFIG_BLK_DEV_INTEGRITY */ -/** - * struct blk_dax_ctl - control and output parameters for ->direct_access - * @sector: (input) offset relative to a block_device - * @addr: (output) kernel virtual address for @sector populated by driver - * @pfn: (output) page frame number for @addr populated by driver - * @size: (input) number of bytes requested - */ -struct blk_dax_ctl { - sector_t sector; - void *addr; - long size; - pfn_t pfn; -}; - struct block_device_operations { int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); int (*rw_page)(struct block_device *, sector_t, struct page *, bool); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); - long (*direct_access)(struct block_device *, sector_t, void **, pfn_t *, - long); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ @@ -1956,7 +1940,6 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); extern int bdev_dax_supported(struct super_block *, int); int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #else /* CONFIG_BLOCK */ -- cgit v1.2.3 From 6abccd1bfee49e491095772fd5aa9e96d915ae52 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 13 Jan 2017 14:14:23 -0800 Subject: x86, dax, pmem: remove indirection around memcpy_from_pmem() memcpy_from_pmem() maps directly to memcpy_mcsafe(). The wrapper serves no real benefit aside from affording a more generic function name than the x86-specific 'mcsafe'. However this would not be the first time that x86 terminology leaked into the global namespace. For lack of better name, just use memcpy_mcsafe() directly. This conversion also catches a place where we should have been using plain memcpy, acpi_nfit_blk_single_io(). Cc: Cc: Jan Kara Cc: Jeff Moyer Cc: Ingo Molnar Cc: Christoph Hellwig Cc: "H. Peter Anvin" Cc: Thomas Gleixner Cc: Matthew Wilcox Cc: Ross Zwisler Acked-by: Tony Luck Signed-off-by: Dan Williams --- include/linux/pmem.h | 23 ----------------------- include/linux/string.h | 8 ++++++++ 2 files changed, 8 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index e856c2cb0fe8..71ecf3d46aac 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -31,12 +31,6 @@ static inline void arch_memcpy_to_pmem(void *dst, const void *src, size_t n) BUG(); } -static inline int arch_memcpy_from_pmem(void *dst, const void *src, size_t n) -{ - BUG(); - return -EFAULT; -} - static inline size_t arch_copy_from_iter_pmem(void *addr, size_t bytes, struct iov_iter *i) { @@ -65,23 +59,6 @@ static inline bool arch_has_pmem_api(void) return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); } -/* - * memcpy_from_pmem - read from persistent memory with error handling - * @dst: destination buffer - * @src: source buffer - * @size: transfer length - * - * Returns 0 on success negative error code on failure. - */ -static inline int memcpy_from_pmem(void *dst, void const *src, size_t size) -{ - if (arch_has_pmem_api()) - return arch_memcpy_from_pmem(dst, src, size); - else - memcpy(dst, src, size); - return 0; -} - /** * memcpy_to_pmem - copy data to persistent memory * @dst: destination buffer for the copy diff --git a/include/linux/string.h b/include/linux/string.h index 26b6f6a66f83..9d6f189157e2 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -114,6 +114,14 @@ extern int memcmp(const void *,const void *,__kernel_size_t); #ifndef __HAVE_ARCH_MEMCHR extern void * memchr(const void *,int,__kernel_size_t); #endif +#ifndef __HAVE_ARCH_MEMCPY_MCSAFE +static inline __must_check int memcpy_mcsafe(void *dst, const void *src, + size_t cnt) +{ + memcpy(dst, src, cnt); + return 0; +} +#endif void *memchr_inv(const void *s, int c, size_t n); char *strreplace(char *s, char old, char new); -- cgit v1.2.3 From 7acedaf5c4355f812cfef883ac28bf15f7d9205e Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Apr 2017 11:36:52 -0700 Subject: net: move xdp_prog field in RX cache lines (struct net_device, xdp_prog) field should be moved in RX cache lines, reducing latencies when a single packet is received on idle host, since netif_elide_gro() needs it. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 46d220c2bf92..8c5c8cdc7b97 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1790,6 +1790,7 @@ struct net_device { unsigned int real_num_rx_queues; #endif + struct bpf_prog __rcu *xdp_prog; unsigned long gro_flush_timeout; rx_handler_func_t __rcu *rx_handler; void __rcu *rx_handler_data; @@ -1905,7 +1906,6 @@ struct net_device { struct lock_class_key *qdisc_tx_busylock; struct lock_class_key *qdisc_running_key; bool proto_down; - struct bpf_prog __rcu *xdp_prog; }; #define to_net_dev(d) container_of(d, struct net_device, dev) -- cgit v1.2.3 From 51f567777799c9d85a778302b9eb61cf15214a98 Mon Sep 17 00:00:00 2001 From: "J. Bruce Fields" Date: Thu, 6 Apr 2017 22:36:31 -0400 Subject: nfsd: check for oversized NFSv2/v3 arguments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit A client can append random data to the end of an NFSv2 or NFSv3 RPC call without our complaining; we'll just stop parsing at the end of the expected data and ignore the rest. Encoded arguments and replies are stored together in an array of pages, and if a call is too large it could leave inadequate space for the reply. This is normally OK because NFS RPC's typically have either short arguments and long replies (like READ) or long arguments and short replies (like WRITE). But a client that sends an incorrectly long reply can violate those assumptions. This was observed to cause crashes. So, insist that the argument not be any longer than we expect. Also, several operations increment rq_next_page in the decode routine before checking the argument size, which can leave rq_next_page pointing well past the end of the page array, causing trouble later in svc_free_pages. As followup we may also want to rewrite the encoding routines to check more carefully that they aren't running off the end of the page array. Reported-by: Tuomas Haanpää Reported-by: Ari Kauppi Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index e770abeed32d..6ef19cf658b4 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -336,8 +336,7 @@ xdr_argsize_check(struct svc_rqst *rqstp, __be32 *p) { char *cp = (char *)p; struct kvec *vec = &rqstp->rq_arg.head[0]; - return cp >= (char*)vec->iov_base - && cp <= (char*)vec->iov_base + vec->iov_len; + return cp == (char *)vec->iov_base + vec->iov_len; } static inline int -- cgit v1.2.3 From 17f5f7f506aaca985b95df7ef7fc2ff49c36a8e9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:05:36 -0400 Subject: svcrdma: Move send_wr to svc_rdma_op_ctxt Clean up: Move the ib_send_wr off the stack, and move common code to post a Send Work Request into a helper. This is a refactoring change only. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index b105f73e3ca2..287db5c179d8 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -85,6 +85,7 @@ struct svc_rdma_op_ctxt { enum dma_data_direction direction; int count; unsigned int mapped_sges; + struct ib_send_wr send_wr; struct ib_sge sge[RPCSVC_MAXPAGES]; struct page *pages[RPCSVC_MAXPAGES]; }; @@ -227,6 +228,9 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, /* svc_rdma_sendto.c */ extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *, struct svc_rdma_req_map *, bool); +extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt, + int num_sge, u32 inv_rkey); extern int svc_rdma_sendto(struct svc_rqst *); extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, int); -- cgit v1.2.3 From 6e6092ca305ad785c605d7e313727aad96c228a5 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:05:44 -0400 Subject: svcrdma: Add svc_rdma_map_reply_hdr() Introduce a helper to DMA-map a reply's transport header before sending it. This will in part replace the map vector cache. Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 287db5c179d8..002a46d1faa1 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -228,6 +228,9 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, /* svc_rdma_sendto.c */ extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *, struct svc_rdma_req_map *, bool); +extern int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma, + struct svc_rdma_op_ctxt *ctxt, + __be32 *rdma_resp, unsigned int len); extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma, struct svc_rdma_op_ctxt *ctxt, int num_sge, u32 inv_rkey); -- cgit v1.2.3 From b623589dbacbc786c2fffc85113a1dc1a331e2ca Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:05:52 -0400 Subject: svcrdma: Eliminate RPCRDMA_SQ_DEPTH_MULT The Send Queue depth is temporarily reduced to 1 SQE per credit. The new rdma_rw API does an internal computation, during QP creation, to increase the depth of the Send Queue to handle RDMA Read and Write operations. This change has to come before the NFSD code paths are updated to use the rdma_rw API. Without this patch, rdma_rw_init_qp() increases the size of the SQ too much, resulting in memory allocation failures during QP creation. Signed-off-by: Chuck Lever Reviewed-by: Christoph Hellwig Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 002a46d1faa1..11d5aa123f17 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -182,7 +182,6 @@ struct svcxprt_rdma { /* The default ORD value is based on two outstanding full-size writes with a * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ #define RPCRDMA_ORD (64/4) -#define RPCRDMA_SQ_DEPTH_MULT 8 #define RPCRDMA_MAX_REQUESTS 32 #define RPCRDMA_MAX_REQ_SIZE 4096 -- cgit v1.2.3 From f13193f50b64e2e0c87706b838d6b9895626a892 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:06:16 -0400 Subject: svcrdma: Introduce local rdma_rw API helpers The plan is to replace the local bespoke code that constructs and posts RDMA Read and Write Work Requests with calls to the rdma_rw API. This shares code with other RDMA-enabled ULPs that manages the gory details of buffer registration and posting Work Requests. Some design notes: o The structure of RPC-over-RDMA transport headers is flexible, allowing multiple segments per Reply with arbitrary alignment, each with a unique R_key. Write and Send WRs continue to be built and posted in separate code paths. However, one whole chunk (with one or more RDMA segments apiece) gets exactly one ib_post_send and one work completion. o svc_xprt reference counting is modified, since a chain of rdma_rw_ctx structs generates one completion, no matter how many Write WRs are posted. o The current code builds the transport header as it is construct- ing Write WRs. I've replaced that with marshaling of transport header data items in a separate step. This is because the exact structure of client-provided segments may not align with the components of the server's reply xdr_buf, or the pages in the page list. Thus parts of each client-provided segment may be written at different points in the send path. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 11d5aa123f17..ca08671fb7e2 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -145,12 +145,15 @@ struct svcxprt_rdma { u32 sc_max_requests; /* Max requests */ u32 sc_max_bc_requests;/* Backward credits */ int sc_max_req_size; /* Size of each RQ WR buf */ + u8 sc_port_num; struct ib_pd *sc_pd; spinlock_t sc_ctxt_lock; struct list_head sc_ctxts; int sc_ctxt_used; + spinlock_t sc_rw_ctxt_lock; + struct list_head sc_rw_ctxts; spinlock_t sc_map_lock; struct list_head sc_maps; @@ -224,6 +227,14 @@ extern int rdma_read_chunk_frmr(struct svcxprt_rdma *, struct svc_rqst *, struct svc_rdma_op_ctxt *, int *, u32 *, u32, u32, u64, bool); +/* svc_rdma_rw.c */ +extern void svc_rdma_destroy_rw_ctxts(struct svcxprt_rdma *rdma); +extern int svc_rdma_send_write_chunk(struct svcxprt_rdma *rdma, + __be32 *wr_ch, struct xdr_buf *xdr); +extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, + __be32 *rp_ch, bool writelist, + struct xdr_buf *xdr); + /* svc_rdma_sendto.c */ extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *, struct svc_rdma_req_map *, bool); -- cgit v1.2.3 From 9a6a180b7867ceceeeab88a6f011bac23174b939 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:06:25 -0400 Subject: svcrdma: Use rdma_rw API in RPC reply path The current svcrdma sendto code path posts one RDMA Write WR at a time. Each of these Writes typically carries a small number of pages (for instance, up to 30 pages for mlx4 devices). That means a 1MB NFS READ reply requires 9 ib_post_send() calls for the Write WRs, and one for the Send WR carrying the actual RPC Reply message. Instead, use the new rdma_rw API. The details of Write WR chain construction and memory registration are taken care of in the RDMA core. svcrdma can focus on the details of the RPC-over-RDMA protocol. This gives three main benefits: 1. All Write WRs for one RDMA segment are posted in a single chain. As few as one ib_post_send() for each Write chunk. 2. The Write path can now use FRWR to register the Write buffers. If the device's maximum page list depth is large, this means a single Write WR is needed for each RPC's Write chunk data. 3. The new code introduces support for RPCs that carry both a Write list and a Reply chunk. This combination can be used for an NFSv4 READ where the data payload is large, and thus is removed from the Payload Stream, but the Payload Stream is still larger than the inline threshold. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index ca08671fb7e2..599ee03ee3fb 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -212,7 +212,6 @@ extern int svc_rdma_xdr_decode_req(struct xdr_buf *); extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, struct rpcrdma_msg *, enum rpcrdma_errcode, __be32 *); -extern void svc_rdma_xdr_encode_write_list(struct rpcrdma_msg *, int); extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, __be32, __be64, u32); -- cgit v1.2.3 From 6b19cc5ca2f78ebc88f5d39ba6a94197bb392fcc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:06:33 -0400 Subject: svcrdma: Clean up RDMA_ERROR path Now that svc_rdma_sendto has been renovated, svc_rdma_send_error can be refactored to reduce code duplication and remove C structure- based XDR encoding. It is also relocated to the source file that contains its only caller. This is a refactoring change only. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/rpc_rdma.h | 3 +++ include/linux/sunrpc/svc_rdma.h | 5 ----- 2 files changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 245fc59b7324..b7e85b341a54 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -143,6 +143,9 @@ enum rpcrdma_proc { #define rdma_done cpu_to_be32(RDMA_DONE) #define rdma_error cpu_to_be32(RDMA_ERROR) +#define err_vers cpu_to_be32(ERR_VERS) +#define err_chunk cpu_to_be32(ERR_CHUNK) + /* * Private extension to RPC-over-RDMA Version One. * Message passed during RDMA-CM connection set-up. diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 599ee03ee3fb..a770d200f607 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -209,9 +209,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct xdr_buf *); -extern int svc_rdma_xdr_encode_error(struct svcxprt_rdma *, - struct rpcrdma_msg *, - enum rpcrdma_errcode, __be32 *); extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, __be32, __be64, u32); @@ -244,8 +241,6 @@ extern int svc_rdma_post_send_wr(struct svcxprt_rdma *rdma, struct svc_rdma_op_ctxt *ctxt, int num_sge, u32 inv_rkey); extern int svc_rdma_sendto(struct svc_rqst *); -extern void svc_rdma_send_error(struct svcxprt_rdma *, struct rpcrdma_msg *, - int); /* svc_rdma_transport.c */ extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *); -- cgit v1.2.3 From f5821c76b2c9c2fb98b276c0bf6a101bfe9050a3 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:06:49 -0400 Subject: svcrdma: Clean up RPC-over-RDMA backchannel reply processing Replace C structure-based XDR decoding with pointer arithmetic. Pointer arithmetic is considered more portable. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index a770d200f607..44d642bbfce6 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -204,7 +204,7 @@ static inline void svc_rdma_count_mappings(struct svcxprt_rdma *rdma, /* svc_rdma_backchannel.c */ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, - struct rpcrdma_msg *rmsgp, + __be32 *rdma_resp, struct xdr_buf *rcvbuf); /* svc_rdma_marshal.c */ -- cgit v1.2.3 From ded8d19641a605232ab48f5d27f542648beba3cc Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:06:57 -0400 Subject: svcrdma: Reduce size of sge array in struct svc_rdma_op_ctxt The sge array in struct svc_rdma_op_ctxt is no longer used for sending RDMA Write WRs. It need only accommodate the construction of Send and Receive WRs. The maximum inline size is the largest payload it needs to handle now. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 44d642bbfce6..e84b77556784 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -48,6 +48,12 @@ #include #define SVCRDMA_DEBUG +/* Default and maximum inline threshold sizes */ +enum { + RPCRDMA_DEF_INLINE_THRESH = 4096, + RPCRDMA_MAX_INLINE_THRESH = 65536 +}; + /* RPC/RDMA parameters and stats */ extern unsigned int svcrdma_ord; extern unsigned int svcrdma_max_requests; @@ -86,7 +92,7 @@ struct svc_rdma_op_ctxt { int count; unsigned int mapped_sges; struct ib_send_wr send_wr; - struct ib_sge sge[RPCSVC_MAXPAGES]; + struct ib_sge sge[1 + RPCRDMA_MAX_INLINE_THRESH / PAGE_SIZE]; struct page *pages[RPCSVC_MAXPAGES]; }; @@ -186,7 +192,6 @@ struct svcxprt_rdma { * page size of 4k, or 32k * 2 ops / 4k = 16 outstanding RDMA_READ. */ #define RPCRDMA_ORD (64/4) #define RPCRDMA_MAX_REQUESTS 32 -#define RPCRDMA_MAX_REQ_SIZE 4096 /* Typical ULP usage of BC requests is NFSv4.1 backchannel. Our * current NFSv4.1 implementation supports one backchannel slot. -- cgit v1.2.3 From 68cc4636bbbca89b9fedcf46d8b6bee444fc5e4e Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:07:05 -0400 Subject: svcrdma: Remove unused RDMA Write completion handler Clean up. All RDMA Write completions are now handled by svc_rdma_wc_write_ctx. Signed-off-by: Chuck Lever Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index e84b77556784..f58c5349beb7 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -249,7 +249,6 @@ extern int svc_rdma_sendto(struct svc_rqst *); /* svc_rdma_transport.c */ extern void svc_rdma_wc_send(struct ib_cq *, struct ib_wc *); -extern void svc_rdma_wc_write(struct ib_cq *, struct ib_wc *); extern void svc_rdma_wc_reg(struct ib_cq *, struct ib_wc *); extern void svc_rdma_wc_read(struct ib_cq *, struct ib_wc *); extern void svc_rdma_wc_inv(struct ib_cq *, struct ib_wc *); -- cgit v1.2.3 From 2cf32924c68a22783e6f630e1b5345a80aa1a376 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:07:13 -0400 Subject: svcrdma: Remove the req_map cache req_maps are no longer used by the send path and can thus be removed. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 34 +--------------------------------- 1 file changed, 1 insertion(+), 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index f58c5349beb7..479bb7f65233 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -96,23 +96,6 @@ struct svc_rdma_op_ctxt { struct page *pages[RPCSVC_MAXPAGES]; }; -/* - * NFS_ requests are mapped on the client side by the chunk lists in - * the RPCRDMA header. During the fetching of the RPC from the client - * and the writing of the reply to the client, the memory in the - * client and the memory in the server must be mapped as contiguous - * vaddr/len for access by the hardware. These data strucures keep - * these mappings. - * - * For an RDMA_WRITE, the 'sge' maps the RPC REPLY. For RDMA_READ, the - * 'sge' in the svc_rdma_req_map maps the server side RPC reply and the - * 'ch' field maps the read-list of the RPCRDMA header to the 'sge' - * mapping of the reply. - */ -struct svc_rdma_chunk_sge { - int start; /* sge no for this chunk */ - int count; /* sge count for this chunk */ -}; struct svc_rdma_fastreg_mr { struct ib_mr *mr; struct scatterlist *sg; @@ -121,15 +104,7 @@ struct svc_rdma_fastreg_mr { enum dma_data_direction direction; struct list_head frmr_list; }; -struct svc_rdma_req_map { - struct list_head free; - unsigned long count; - union { - struct kvec sge[RPCSVC_MAXPAGES]; - struct svc_rdma_chunk_sge ch[RPCSVC_MAXPAGES]; - unsigned long lkey[RPCSVC_MAXPAGES]; - }; -}; + #define RDMACTXT_F_LAST_CTXT 2 #define SVCRDMA_DEVCAP_FAST_REG 1 /* fast mr registration */ @@ -160,8 +135,6 @@ struct svcxprt_rdma { int sc_ctxt_used; spinlock_t sc_rw_ctxt_lock; struct list_head sc_rw_ctxts; - spinlock_t sc_map_lock; - struct list_head sc_maps; struct list_head sc_rq_dto_q; spinlock_t sc_rq_dto_lock; @@ -237,8 +210,6 @@ extern int svc_rdma_send_reply_chunk(struct svcxprt_rdma *rdma, struct xdr_buf *xdr); /* svc_rdma_sendto.c */ -extern int svc_rdma_map_xdr(struct svcxprt_rdma *, struct xdr_buf *, - struct svc_rdma_req_map *, bool); extern int svc_rdma_map_reply_hdr(struct svcxprt_rdma *rdma, struct svc_rdma_op_ctxt *ctxt, __be32 *rdma_resp, unsigned int len); @@ -259,9 +230,6 @@ extern int svc_rdma_create_listen(struct svc_serv *, int, struct sockaddr *); extern struct svc_rdma_op_ctxt *svc_rdma_get_context(struct svcxprt_rdma *); extern void svc_rdma_put_context(struct svc_rdma_op_ctxt *, int); extern void svc_rdma_unmap_dma(struct svc_rdma_op_ctxt *ctxt); -extern struct svc_rdma_req_map *svc_rdma_get_req_map(struct svcxprt_rdma *); -extern void svc_rdma_put_req_map(struct svcxprt_rdma *, - struct svc_rdma_req_map *); extern struct svc_rdma_fastreg_mr *svc_rdma_get_frmr(struct svcxprt_rdma *); extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); -- cgit v1.2.3 From dadf3e435debb85dfcf28c157012047153a21a97 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sun, 9 Apr 2017 13:07:21 -0400 Subject: svcrdma: Clean out old XDR encoders Clean up: These have been replaced and are no longer used. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc_rdma.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 479bb7f65233..f3787d800ba4 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -187,10 +187,6 @@ extern int svc_rdma_handle_bc_reply(struct rpc_xprt *xprt, /* svc_rdma_marshal.c */ extern int svc_rdma_xdr_decode_req(struct xdr_buf *); -extern void svc_rdma_xdr_encode_reply_array(struct rpcrdma_write_array *, int); -extern void svc_rdma_xdr_encode_array_chunk(struct rpcrdma_write_array *, int, - __be32, __be64, u32); -extern unsigned int svc_rdma_xdr_get_reply_hdr_len(__be32 *rdma_resp); /* svc_rdma_recvfrom.c */ extern int svc_rdma_recvfrom(struct svc_rqst *); -- cgit v1.2.3 From aee12a0a3727e16fb837367c4755cb6daaf45109 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Thu, 20 Apr 2017 00:45:48 +0200 Subject: ebtables: remove nf_hook_register usage Similar to ip_register_table, pass nf_hook_ops to ebt_register_table(). This allows to handle hook registration also via pernet_ops and allows us to avoid use of legacy register_hook api. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 984b2112c77b..a30efb437e6d 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -109,8 +109,10 @@ struct ebt_table { #define EBT_ALIGN(s) (((s) + (__alignof__(struct _xt_align)-1)) & \ ~(__alignof__(struct _xt_align)-1)) extern struct ebt_table *ebt_register_table(struct net *net, - const struct ebt_table *table); -extern void ebt_unregister_table(struct net *net, struct ebt_table *table); + const struct ebt_table *table, + const struct nf_hook_ops *); +extern void ebt_unregister_table(struct net *net, struct ebt_table *table, + const struct nf_hook_ops *); extern unsigned int ebt_do_table(struct sk_buff *skb, const struct nf_hook_state *state, struct ebt_table *table); -- cgit v1.2.3 From 701cac61d0250912b89cbc28589969530179099a Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Apr 2017 19:15:53 -0400 Subject: CONFIG_ARCH_HAS_RAW_COPY_USER is unconditional now all architectures converted Signed-off-by: Al Viro --- include/linux/uaccess.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index 7fc2104b88bc..e0cbfb09e60f 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -12,12 +12,10 @@ #include -#ifdef CONFIG_ARCH_HAS_RAW_COPY_USER /* * Architectures should provide two primitives (raw_copy_{to,from}_user()) - * select ARCH_HAS_RAW_COPY_FROM_USER and get rid of their private instances - * of copy_{to,from}_user() and __copy_{to,from}_user{,_inatomic}(). Once - * all of them switch, this part of linux/uaccess.h will become unconditional. + * and get rid of their private instances of copy_{to,from}_user() and + * __copy_{to,from}_user{,_inatomic}(). * * raw_copy_{to,from}_user(to, from, size) should copy up to size bytes and * return the amount left to copy. They should assume that access_ok() has @@ -196,7 +194,6 @@ copy_in_user(void __user *to, const void *from, unsigned long n) return n; } #endif -#endif static __always_inline void pagefault_disabled_inc(void) { -- cgit v1.2.3 From c373fff7bd252ec36e8a895c58a584088f1d38bc Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 26 Apr 2017 12:26:22 -0400 Subject: NFSv4: Don't special case "launder" If the client receives a fatal server error from nfs_pageio_add_request(), then we should always truncate the page on which the error occurred. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 9aa044e76820..bb0eb2c9acca 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -500,24 +500,12 @@ extern int nfs_updatepage(struct file *, struct page *, unsigned int, unsigned */ extern int nfs_sync_inode(struct inode *inode); extern int nfs_wb_all(struct inode *inode); -extern int nfs_wb_single_page(struct inode *inode, struct page *page, bool launder); +extern int nfs_wb_page(struct inode *inode, struct page *page); extern int nfs_wb_page_cancel(struct inode *inode, struct page* page); extern int nfs_commit_inode(struct inode *, int); extern struct nfs_commit_data *nfs_commitdata_alloc(bool never_fail); extern void nfs_commit_free(struct nfs_commit_data *data); -static inline int -nfs_wb_launder_page(struct inode *inode, struct page *page) -{ - return nfs_wb_single_page(inode, page, true); -} - -static inline int -nfs_wb_page(struct inode *inode, struct page *page) -{ - return nfs_wb_single_page(inode, page, false); -} - static inline int nfs_have_writebacks(struct inode *inode) { -- cgit v1.2.3 From c7e88067c1ae89e7bcbed070fb2c4e30bc39b51f Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Apr 2017 16:01:46 -0700 Subject: srcu: Exact tracking of srcu_data structures containing callbacks The current Tree SRCU implementation schedules a workqueue for every srcu_data covered by a given leaf srcu_node structure having callbacks, even if only one of those srcu_data structures actually contains callbacks. This is clearly inefficient for workloads that don't feature callbacks everywhere all the time. This commit therefore adds an array of masks that are used by the leaf srcu_node structures to track exactly which srcu_data structures contain callbacks. Signed-off-by: Paul E. McKenney Tested-by: Mike Galbraith --- include/linux/srcutree.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 0400e211aa44..94515ff226fb 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -47,6 +47,8 @@ struct srcu_data { struct delayed_work work; /* Context for CB invoking. */ struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */ struct srcu_node *mynode; /* Leaf srcu_node. */ + unsigned long grpmask; /* Mask for leaf srcu_node */ + /* ->srcu_data_have_cbs[]. */ int cpu; struct srcu_struct *sp; }; @@ -59,6 +61,8 @@ struct srcu_node { unsigned long srcu_have_cbs[4]; /* GP seq for children */ /* having CBs, but only */ /* is > ->srcu_gq_seq. */ + unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs */ + /* have CBs for given GP? */ struct srcu_node *srcu_parent; /* Next up in tree. */ int grplo; /* Least CPU for node. */ int grphi; /* Biggest CPU for node. */ -- cgit v1.2.3 From 7f6733c3c648ddd6cf459c1b80ad388a95452955 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 18 Apr 2017 17:17:35 -0700 Subject: srcu: Make rcutorture writer stalls print SRCU GP state In the past, SRCU was simple enough that there was little point in making the rcutorture writer stall messages print the SRCU grace-period number state. With the advent of Tree SRCU, this has changed. This commit therefore makes Classic, Tiny, and Tree SRCU report this state to rcutorture as needed. Signed-off-by: Paul E. McKenney Tested-by: Mike Galbraith --- include/linux/srcuclassic.h | 14 ++++++++++++++ include/linux/srcutiny.h | 12 ++++++++++++ include/linux/srcutree.h | 4 ++++ 3 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/srcuclassic.h b/include/linux/srcuclassic.h index 41cf99930f34..5753f7322262 100644 --- a/include/linux/srcuclassic.h +++ b/include/linux/srcuclassic.h @@ -98,4 +98,18 @@ void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); unsigned long srcu_batches_completed(struct srcu_struct *sp); +static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + if (test_type != SRCU_FLAVOR) + return; + *flags = 0; + *completed = sp->completed; + *gpnum = *completed; + if (sp->batch_queue.head || sp->batch_check0.head || sp->batch_check0.head) + (*gpnum)++; +} + #endif diff --git a/include/linux/srcutiny.h b/include/linux/srcutiny.h index 4f284e4f4d8c..42311ee0334f 100644 --- a/include/linux/srcutiny.h +++ b/include/linux/srcutiny.h @@ -78,4 +78,16 @@ static inline unsigned long srcu_batches_completed(struct srcu_struct *sp) return 0; } +static inline void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, + unsigned long *completed) +{ + if (test_type != SRCU_FLAVOR) + return; + *flags = 0; + *completed = sp->srcu_gp_seq; + *gpnum = *completed; +} + #endif diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 94515ff226fb..3865717df124 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -140,4 +140,8 @@ void synchronize_srcu_expedited(struct srcu_struct *sp); void srcu_barrier(struct srcu_struct *sp); unsigned long srcu_batches_completed(struct srcu_struct *sp); +void srcutorture_get_gp_data(enum rcutorture_type test_type, + struct srcu_struct *sp, int *flags, + unsigned long *gpnum, unsigned long *completed); + #endif -- cgit v1.2.3 From f555f34fdc586a56204cd16d9a7c104ec6cb6650 Mon Sep 17 00:00:00 2001 From: Alexander Kochetkov Date: Thu, 20 Apr 2017 14:00:04 +0300 Subject: net: phy: fix auto-negotiation stall due to unavailable interrupt The Ethernet link on an interrupt driven PHY was not coming up if the Ethernet cable was plugged before the Ethernet interface was brought up. The patch trigger PHY state machine to update link state if PHY was requested to do auto-negotiation and auto-negotiation complete flag already set. During power-up cycle the PHY do auto-negotiation, generate interrupt and set auto-negotiation complete flag. Interrupt is handled by PHY state machine but doesn't update link state because PHY is in PHY_READY state. After some time MAC bring up, start and request PHY to do auto-negotiation. If there are no new settings to advertise genphy_config_aneg() doesn't start PHY auto-negotiation. PHY continue to stay in auto-negotiation complete state and doesn't fire interrupt. At the same time PHY state machine expect that PHY started auto-negotiation and is waiting for interrupt from PHY and it won't get it. Fixes: 321beec5047a ("net: phy: Use interrupts when available in NOLINK state") Signed-off-by: Alexander Kochetkov Cc: stable # v4.9+ Tested-by: Roger Quadros Tested-by: Alexandre Belloni Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 43a774873aa9..fb3857337151 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -852,6 +852,7 @@ void phy_change_work(struct work_struct *work); void phy_mac_interrupt(struct phy_device *phydev, int new_link); void phy_start_machine(struct phy_device *phydev); void phy_stop_machine(struct phy_device *phydev); +void phy_trigger_machine(struct phy_device *phydev, bool sync); int phy_ethtool_sset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_gset(struct phy_device *phydev, struct ethtool_cmd *cmd); int phy_ethtool_ksettings_get(struct phy_device *phydev, -- cgit v1.2.3 From 038a3e858de4e3ddf42c330a22b7efcddbc0a81a Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 25 Apr 2017 11:41:34 +0200 Subject: rhashtable: remove insecure_max_entries param no users in the tree, insecure_max_entries is always set to ht->p.max_size * 2 in rhtashtable_init(). Replace only spot that uses it with a ht->p.max_size check. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ae87dcdf52d2..ae93b65d13d7 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -125,7 +125,6 @@ struct rhashtable; * @key_len: Length of key * @key_offset: Offset of key in struct to be hashed * @head_offset: Offset of rhash_head in struct to be hashed - * @insecure_max_entries: Maximum number of entries (may be exceeded) * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking * @nulls_base: Base value to generate nulls marker @@ -140,7 +139,6 @@ struct rhashtable_params { size_t key_len; size_t key_offset; size_t head_offset; - unsigned int insecure_max_entries; unsigned int max_size; unsigned int min_size; u32 nulls_base; @@ -329,8 +327,8 @@ static inline bool rht_grow_above_100(const struct rhashtable *ht, static inline bool rht_grow_above_max(const struct rhashtable *ht, const struct bucket_table *tbl) { - return ht->p.insecure_max_entries && - atomic_read(&ht->nelems) >= ht->p.insecure_max_entries; + return ht->p.max_size && + (atomic_read(&ht->nelems) / 2u) >= ht->p.max_size; } /* The bucket lock is selected based on the hash and protects mutations -- cgit v1.2.3 From 69e996c58a35db9ca79b3f021a15bcd22202e1c0 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Apr 2017 10:15:32 -0700 Subject: tcp: add tp->tcp_mstamp field We want to use precise timestamps in TCP stack, but we do not want to call possibly expensive kernel time services too often. tp->tcp_mstamp is guaranteed to be updated once per incoming packet. We will use it in the following patches, removing specific skb_mstamp_get() calls, and removing ack_time from struct tcp_sacktag_state. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index cbe5b602a2d3..99a22f44c32e 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -240,6 +240,7 @@ struct tcp_sock { u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ + struct skb_mstamp tcp_mstamp; /* most recent packet received/sent */ u32 srtt_us; /* smoothed round trip time << 3 in usecs */ u32 mdev_us; /* medium deviation */ u32 mdev_max_us; /* maximal mdev for the last rtt period */ -- cgit v1.2.3 From 645f4c6f2ebd040688cc2a5f626ffc909e66ccf2 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 25 Apr 2017 10:15:41 -0700 Subject: tcp: switch rcv_rtt_est and rcvq_space to high resolution timestamps Some devices or distributions use HZ=100 or HZ=250 TCP receive buffer autotuning has poor behavior caused by this choice. Since autotuning happens after 4 ms or 10 ms, short distance flows get their receive buffer tuned to a very high value, but after an initial period where it was frozen to (too small) initial value. With tp->tcp_mstamp introduction, we can switch to high resolution timestamps almost for free (at the expense of 8 additional bytes per TCP structure) Note that some TCP stacks use usec TCP timestamps where this patch makes even more sense : Many TCP flows have < 500 usec RTT. Hopefully this finer TS option can be standardized soon. Tested: HZ=100 kernel ./netperf -H lpaa24 -t TCP_RR -l 1000 -- -r 10000,10000 & Peer without patch : lpaa24:~# ss -tmi dst lpaa23 ... skmem:(r0,rb8388608,...) rcv_rtt:10 rcv_space:3210000 minrtt:0.017 Peer with the patch : lpaa23:~# ss -tmi dst lpaa24 ... skmem:(r0,rb428800,...) rcv_rtt:0.069 rcv_space:30000 minrtt:0.017 We can see saner RCVBUF, and more precise rcv_rtt information. Signed-off-by: Eric Dumazet Acked-by: Soheil Hassas Yeganeh Acked-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 99a22f44c32e..b6d5adcee8fc 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -333,16 +333,16 @@ struct tcp_sock { /* Receiver side RTT estimation */ struct { - u32 rtt; - u32 seq; - u32 time; + u32 rtt_us; + u32 seq; + struct skb_mstamp time; } rcv_rtt_est; /* Receiver queue space */ struct { - int space; - u32 seq; - u32 time; + int space; + u32 seq; + struct skb_mstamp time; } rcvq_space; /* TCP-specific MTU probe information. */ -- cgit v1.2.3 From 2836ee4b1acbe7b396219d0677426885f14cd792 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 26 Apr 2017 13:47:56 -0700 Subject: blk-mq: Add blk_mq_ops.show_rq() This new callback function will be used in the next patch to show more information about SCSI requests. Signed-off-by: Bart Van Assche Reviewed-by: Omar Sandoval Cc: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0c4dadb85f62..32bd8eb5ba67 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -142,6 +142,14 @@ struct blk_mq_ops { reinit_request_fn *reinit_request; map_queues_fn *map_queues; + +#ifdef CONFIG_BLK_DEBUG_FS + /* + * Used by the debugfs implementation to show driver-specific + * information about a request. + */ + void (*show_rq)(struct seq_file *m, struct request *rq); +#endif }; enum { -- cgit v1.2.3 From 1cbf41dbacb6c8decdf8d838bbf5ca5b448a269f Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 26 Apr 2017 10:58:46 +0300 Subject: ieee80211: add SUITE_B AKM selectors Add the definitions for SUITE_B and SUITE_B_192 AKM selectors as defined in IEEE802.11REVmc_D5.0, table 9-132. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 294fa6273a62..23e095fa6701 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2356,18 +2356,20 @@ enum ieee80211_sa_query_action { #define WLAN_CIPHER_SUITE_SMS4 SUITE(0x001472, 1) /* AKM suite selectors */ -#define WLAN_AKM_SUITE_8021X SUITE(0x000FAC, 1) -#define WLAN_AKM_SUITE_PSK SUITE(0x000FAC, 2) -#define WLAN_AKM_SUITE_FT_PSK SUITE(0x000FAC, 4) -#define WLAN_AKM_SUITE_8021X_SHA256 SUITE(0x000FAC, 5) -#define WLAN_AKM_SUITE_PSK_SHA256 SUITE(0x000FAC, 6) -#define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) -#define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) -#define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) -#define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) -#define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) -#define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) -#define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) +#define WLAN_AKM_SUITE_8021X SUITE(0x000FAC, 1) +#define WLAN_AKM_SUITE_PSK SUITE(0x000FAC, 2) +#define WLAN_AKM_SUITE_FT_PSK SUITE(0x000FAC, 4) +#define WLAN_AKM_SUITE_8021X_SHA256 SUITE(0x000FAC, 5) +#define WLAN_AKM_SUITE_PSK_SHA256 SUITE(0x000FAC, 6) +#define WLAN_AKM_SUITE_TDLS SUITE(0x000FAC, 7) +#define WLAN_AKM_SUITE_SAE SUITE(0x000FAC, 8) +#define WLAN_AKM_SUITE_FT_OVER_SAE SUITE(0x000FAC, 9) +#define WLAN_AKM_SUITE_8021X_SUITE_B SUITE(0x000FAC, 11) +#define WLAN_AKM_SUITE_8021X_SUITE_B_192 SUITE(0x000FAC, 12) +#define WLAN_AKM_SUITE_FILS_SHA256 SUITE(0x000FAC, 14) +#define WLAN_AKM_SUITE_FILS_SHA384 SUITE(0x000FAC, 15) +#define WLAN_AKM_SUITE_FT_FILS_SHA256 SUITE(0x000FAC, 16) +#define WLAN_AKM_SUITE_FT_FILS_SHA384 SUITE(0x000FAC, 17) #define WLAN_MAX_KEY_LEN 32 -- cgit v1.2.3 From 2ead3235fd7128347a60a3942b3e2048834d62aa Mon Sep 17 00:00:00 2001 From: Luca Coelho Date: Wed, 26 Apr 2017 10:58:48 +0300 Subject: ieee80211: add FT-802.1X AKM suite selector Add the definition for FT-8021.1X AKM selector as defined in IEEE Std 802.11-2016, table 9-133. Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 23e095fa6701..52abfbcd5975 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2358,6 +2358,7 @@ enum ieee80211_sa_query_action { /* AKM suite selectors */ #define WLAN_AKM_SUITE_8021X SUITE(0x000FAC, 1) #define WLAN_AKM_SUITE_PSK SUITE(0x000FAC, 2) +#define WLAN_AKM_SUITE_FT_8021X SUITE(0x000FAC, 3) #define WLAN_AKM_SUITE_FT_PSK SUITE(0x000FAC, 4) #define WLAN_AKM_SUITE_8021X_SHA256 SUITE(0x000FAC, 5) #define WLAN_AKM_SUITE_PSK_SHA256 SUITE(0x000FAC, 6) -- cgit v1.2.3 From f6601e176c8b01bc545959c091778343a8c66951 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 26 Apr 2017 10:58:52 +0300 Subject: ieee80211: fix kernel-doc parsing errors Some of the enum definitions are unnamed but there's still an attempt at documenting them - that doesn't work. Name them to make that work. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 52abfbcd5975..639e77abf064 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2177,37 +2177,37 @@ enum ieee80211_tdls_actioncode { #define WLAN_BSS_COEX_INFORMATION_REQUEST BIT(0) /** - * enum - mesh synchronization method identifier + * enum ieee80211_mesh_sync_method - mesh synchronization method identifier * * @IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET: the default synchronization method * @IEEE80211_SYNC_METHOD_VENDOR: a vendor specific synchronization method * that will be specified in a vendor specific information element */ -enum { +enum ieee80211_mesh_sync_method { IEEE80211_SYNC_METHOD_NEIGHBOR_OFFSET = 1, IEEE80211_SYNC_METHOD_VENDOR = 255, }; /** - * enum - mesh path selection protocol identifier + * enum ieee80211_mesh_path_protocol - mesh path selection protocol identifier * * @IEEE80211_PATH_PROTOCOL_HWMP: the default path selection protocol * @IEEE80211_PATH_PROTOCOL_VENDOR: a vendor specific protocol that will * be specified in a vendor specific information element */ -enum { +enum ieee80211_mesh_path_protocol { IEEE80211_PATH_PROTOCOL_HWMP = 1, IEEE80211_PATH_PROTOCOL_VENDOR = 255, }; /** - * enum - mesh path selection metric identifier + * enum ieee80211_mesh_path_metric - mesh path selection metric identifier * * @IEEE80211_PATH_METRIC_AIRTIME: the default path selection metric * @IEEE80211_PATH_METRIC_VENDOR: a vendor specific metric that will be * specified in a vendor specific information element */ -enum { +enum ieee80211_mesh_path_metric { IEEE80211_PATH_METRIC_AIRTIME = 1, IEEE80211_PATH_METRIC_VENDOR = 255, }; -- cgit v1.2.3 From bfd20f1cc85010d2f2d77e544da05cd8c149ba9b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 26 Apr 2017 09:18:35 -0700 Subject: x86, iommu/vt-d: Add an option to disable Intel IOMMU force on IOMMU harms performance signficantly when we run very fast networking workloads. It's 40GB networking doing XDP test. Software overhead is almost unaware, but it's the IOTLB miss (based on our analysis) which kills the performance. We observed the same performance issue even with software passthrough (identity mapping), only the hardware passthrough survives. The pps with iommu (with software passthrough) is only about ~30% of that without it. This is a limitation in hardware based on our observation, so we'd like to disable the IOMMU force on, but we do want to use TBOOT and we can sacrifice the DMA security bought by IOMMU. I must admit I know nothing about TBOOT, but TBOOT guys (cc-ed) think not eabling IOMMU is totally ok. So introduce a new boot option to disable the force on. It's kind of silly we need to run into intel_iommu_init even without force on, but we need to disable TBOOT PMR registers. For system without the boot option, nothing is changed. Signed-off-by: Shaohua Li Signed-off-by: Joerg Roedel --- include/linux/dma_remapping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 187c10299722..90884072fa73 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -39,6 +39,7 @@ extern int iommu_calculate_agaw(struct intel_iommu *iommu); extern int iommu_calculate_max_sagaw(struct intel_iommu *iommu); extern int dmar_disabled; extern int intel_iommu_enabled; +extern int intel_iommu_tboot_noforce; #else static inline int iommu_calculate_agaw(struct intel_iommu *iommu) { -- cgit v1.2.3 From 1e9a038b7fe9a8c10ef1238f4e695d5fbe0dd594 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 24 Apr 2017 16:02:09 -0700 Subject: srcu: Expedited grace periods with reduced memory contention Commit f60d231a87c5 ("srcu: Crude control of expedited grace periods") introduced a per-srcu_struct atomic counter to track outstanding requests for grace periods. This works, but represents a memory-contention bottleneck. This commit therefore uses the srcu_node combining tree to remove this bottleneck. This commit adds new ->srcu_gp_seq_needed_exp fields to the srcu_data, srcu_node, and srcu_struct structures, which track the farthest-in-the-future grace period that must be expedited, which in turn requires that all nearer-term grace periods also be expedited. Requests for expediting start with the srcu_data structure, run up through the srcu_node tree, and end at the srcu_struct structure. Note that it may be necessary to expedite a grace period that just now started, and this is handled by a new srcu_funnel_exp_start() function, which is invoked when the grace period itself is already in its way, but when that grace period was not marked as expedited. A new srcu_get_delay() function returns zero if there is at least one expedited SRCU grace period in flight, or SRCU_INTERVAL otherwise. This function is used to calculate delays: Normal grace periods are allowed to extend in order to cover more requests with a given grace-period computation, which decreases per-request overhead. Signed-off-by: Paul E. McKenney Tested-by: Mike Galbraith --- include/linux/srcutree.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 3865717df124..86df48d3e97b 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -43,6 +43,7 @@ struct srcu_data { spinlock_t lock ____cacheline_internodealigned_in_smp; struct rcu_segcblist srcu_cblist; /* List of callbacks.*/ unsigned long srcu_gp_seq_needed; /* Furthest future GP needed. */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ bool srcu_cblist_invoking; /* Invoking these CBs? */ struct delayed_work work; /* Context for CB invoking. */ struct rcu_head srcu_barrier_head; /* For srcu_barrier() use. */ @@ -63,6 +64,7 @@ struct srcu_node { /* is > ->srcu_gq_seq. */ unsigned long srcu_data_have_cbs[4]; /* Which srcu_data structs */ /* have CBs for given GP? */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ struct srcu_node *srcu_parent; /* Next up in tree. */ int grplo; /* Least CPU for node. */ int grphi; /* Biggest CPU for node. */ @@ -81,7 +83,7 @@ struct srcu_struct { unsigned int srcu_idx; /* Current rdr array element. */ unsigned long srcu_gp_seq; /* Grace-period seq #. */ unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ - atomic_t srcu_exp_cnt; /* # ongoing expedited GPs. */ + unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ -- cgit v1.2.3 From 22607d66bbc3e81140d3bcf08894f4378eb36428 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 25 Apr 2017 14:03:11 -0700 Subject: srcu: Specify auto-expedite holdoff time On small systems, in the absence of readers, expedited SRCU grace periods can complete in less than a microsecond. This means that an eight-CPU system can have all CPUs doing synchronize_srcu() in a tight loop and almost always expedite. This might actually be desirable in some situations, but in general it is a good way to needlessly burn CPU cycles. And in those situations where it is desirable, your friend is the function synchronize_srcu_expedited(). For other situations, this commit adds a kernel parameter that specifies a holdoff between completing the last SRCU grace period and auto-expediting the next. If the next grace period starts before the holdoff expires, auto-expediting is disabled. The holdoff is 50 microseconds by default, and can be tuned to the desired number of nanoseconds. A value of zero disables auto-expediting. Signed-off-by: Paul E. McKenney Tested-by: Mike Galbraith --- include/linux/srcutree.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/srcutree.h b/include/linux/srcutree.h index 86df48d3e97b..32e86d85fd11 100644 --- a/include/linux/srcutree.h +++ b/include/linux/srcutree.h @@ -84,6 +84,7 @@ struct srcu_struct { unsigned long srcu_gp_seq; /* Grace-period seq #. */ unsigned long srcu_gp_seq_needed; /* Latest gp_seq needed. */ unsigned long srcu_gp_seq_needed_exp; /* Furthest future exp GP. */ + unsigned long srcu_last_gp_end; /* Last GP end timestamp (ns) */ struct srcu_data __percpu *sda; /* Per-CPU srcu_data array. */ unsigned long srcu_barrier_seq; /* srcu_barrier seq #. */ struct mutex srcu_barrier_mutex; /* Serialize barrier ops. */ -- cgit v1.2.3 From d2d58e0e0d6c750941147e505f4263239427e359 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Sat, 15 Apr 2017 08:54:56 +0200 Subject: fs/affs: import amigaffs.h Have that file in global include/linux is not needed. Signed-off-by: Fabian Frederick Signed-off-by: Al Viro --- include/linux/amigaffs.h | 144 ----------------------------------------------- 1 file changed, 144 deletions(-) delete mode 100644 include/linux/amigaffs.h (limited to 'include/linux') diff --git a/include/linux/amigaffs.h b/include/linux/amigaffs.h deleted file mode 100644 index 43b41c06aa37..000000000000 --- a/include/linux/amigaffs.h +++ /dev/null @@ -1,144 +0,0 @@ -#ifndef AMIGAFFS_H -#define AMIGAFFS_H - -#include -#include - -#define FS_OFS 0x444F5300 -#define FS_FFS 0x444F5301 -#define FS_INTLOFS 0x444F5302 -#define FS_INTLFFS 0x444F5303 -#define FS_DCOFS 0x444F5304 -#define FS_DCFFS 0x444F5305 -#define MUFS_FS 0x6d754653 /* 'muFS' */ -#define MUFS_OFS 0x6d754600 /* 'muF\0' */ -#define MUFS_FFS 0x6d754601 /* 'muF\1' */ -#define MUFS_INTLOFS 0x6d754602 /* 'muF\2' */ -#define MUFS_INTLFFS 0x6d754603 /* 'muF\3' */ -#define MUFS_DCOFS 0x6d754604 /* 'muF\4' */ -#define MUFS_DCFFS 0x6d754605 /* 'muF\5' */ - -#define T_SHORT 2 -#define T_LIST 16 -#define T_DATA 8 - -#define ST_LINKFILE -4 -#define ST_FILE -3 -#define ST_ROOT 1 -#define ST_USERDIR 2 -#define ST_SOFTLINK 3 -#define ST_LINKDIR 4 - -#define AFFS_ROOT_BMAPS 25 - -struct affs_date { - __be32 days; - __be32 mins; - __be32 ticks; -}; - -struct affs_short_date { - __be16 days; - __be16 mins; - __be16 ticks; -}; - -struct affs_root_head { - __be32 ptype; - __be32 spare1; - __be32 spare2; - __be32 hash_size; - __be32 spare3; - __be32 checksum; - __be32 hashtable[1]; -}; - -struct affs_root_tail { - __be32 bm_flag; - __be32 bm_blk[AFFS_ROOT_BMAPS]; - __be32 bm_ext; - struct affs_date root_change; - u8 disk_name[32]; - __be32 spare1; - __be32 spare2; - struct affs_date disk_change; - struct affs_date disk_create; - __be32 spare3; - __be32 spare4; - __be32 dcache; - __be32 stype; -}; - -struct affs_head { - __be32 ptype; - __be32 key; - __be32 block_count; - __be32 spare1; - __be32 first_data; - __be32 checksum; - __be32 table[1]; -}; - -struct affs_tail { - __be32 spare1; - __be16 uid; - __be16 gid; - __be32 protect; - __be32 size; - u8 comment[92]; - struct affs_date change; - u8 name[32]; - __be32 spare2; - __be32 original; - __be32 link_chain; - __be32 spare[5]; - __be32 hash_chain; - __be32 parent; - __be32 extension; - __be32 stype; -}; - -struct slink_front -{ - __be32 ptype; - __be32 key; - __be32 spare1[3]; - __be32 checksum; - u8 symname[1]; /* depends on block size */ -}; - -struct affs_data_head -{ - __be32 ptype; - __be32 key; - __be32 sequence; - __be32 size; - __be32 next; - __be32 checksum; - u8 data[1]; /* depends on block size */ -}; - -/* Permission bits */ - -#define FIBF_OTR_READ 0x8000 -#define FIBF_OTR_WRITE 0x4000 -#define FIBF_OTR_EXECUTE 0x2000 -#define FIBF_OTR_DELETE 0x1000 -#define FIBF_GRP_READ 0x0800 -#define FIBF_GRP_WRITE 0x0400 -#define FIBF_GRP_EXECUTE 0x0200 -#define FIBF_GRP_DELETE 0x0100 - -#define FIBF_HIDDEN 0x0080 -#define FIBF_SCRIPT 0x0040 -#define FIBF_PURE 0x0020 /* no use under linux */ -#define FIBF_ARCHIVED 0x0010 /* never set, always cleared on write */ -#define FIBF_NOREAD 0x0008 /* 0 means allowed */ -#define FIBF_NOWRITE 0x0004 /* 0 means allowed */ -#define FIBF_NOEXECUTE 0x0002 /* 0 means allowed, ignored under linux */ -#define FIBF_NODELETE 0x0001 /* 0 means allowed */ - -#define FIBF_OWNER 0x000F /* Bits pertaining to owner */ -#define FIBF_MASK 0xEE0E /* Bits modified by Linux */ - -#endif -- cgit v1.2.3 From a0c111b49bbe11c3970bc668600e3b61fbbb7fca Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Sun, 9 Apr 2017 09:32:14 +0800 Subject: fs: drop duplicate header percpu-rwsem.h Drop duplicate header percpu-rwsem.h from linux/fs.h. Signed-off-by: Geliang Tang Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 7251f7bb45e8..dee12c171e07 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -29,7 +29,6 @@ #include #include #include -#include #include #include -- cgit v1.2.3 From cda37124f4e95ad5ccb11394a5802b0972668b32 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 25 Mar 2017 21:15:37 -0700 Subject: fs: constify tree_descr arrays passed to simple_fill_super() simple_fill_super() is passed an array of tree_descr structures which describe the files to create in the filesystem's root directory. Since these arrays are never modified intentionally, they should be 'const' so that they are placed in .rodata and benefit from memory protection. This patch updates the function signature and all users, and also constifies tree_descr.name. Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- include/linux/fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index dee12c171e07..fc1b4faa6272 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2995,9 +2995,10 @@ extern const struct file_operations simple_dir_operations; extern const struct inode_operations simple_dir_inode_operations; extern void make_empty_dir_inode(struct inode *inode); extern bool is_empty_dir_inode(struct inode *inode); -struct tree_descr { char *name; const struct file_operations *ops; int mode; }; +struct tree_descr { const char *name; const struct file_operations *ops; int mode; }; struct dentry *d_alloc_name(struct dentry *, const char *); -extern int simple_fill_super(struct super_block *, unsigned long, struct tree_descr *); +extern int simple_fill_super(struct super_block *, unsigned long, + const struct tree_descr *); extern int simple_pin_fs(struct file_system_type *, struct vfsmount **mount, int *count); extern void simple_release_fs(struct vfsmount **mount, int *count); -- cgit v1.2.3 From 020c2833dbc76b4069c9a9886b71511052d160df Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 25 Mar 2017 21:02:18 -0700 Subject: fs: remove _submit_bh() _submit_bh() allowed submitting a buffer_head for I/O using custom bio_flags. It used to be used by jbd to set BIO_SNAP_STABLE, introduced by commit 713685111774 ("mm: make snapshotting pages for stable writes a per-bio operation"). However, the code and flag has since been removed and no _submit_bh() users remain. These days, bio_flags are mostly used internally by the block layer to track the state of bio's. As such, it doesn't really make sense for filesystems to use them instead of op_flags when wanting special behavior for block requests. Therefore, remove _submit_bh() and trim the bio_flags argument from submit_bh_wbc(). Cc: Darrick J. Wong Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- include/linux/buffer_head.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 79591c3660cc..bd029e52ef5e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -196,8 +196,6 @@ void ll_rw_block(int, int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); void write_dirty_buffer(struct buffer_head *bh, int op_flags); -int _submit_bh(int op, int op_flags, struct buffer_head *bh, - unsigned long bio_flags); int submit_bh(int, int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); -- cgit v1.2.3 From e04653a9dcf4d98defe2149c885382e5cc72082f Mon Sep 17 00:00:00 2001 From: Archana Patni Date: Wed, 1 Feb 2017 17:22:03 +0100 Subject: mfd: cros_ec: Add ACPI GPE handler for LID0 devices This patch installs an ACPI GPE handler for LID0 ACPI device to indicate ACPI core that this GPE should stay enabled for lid to work in suspend to idle path. Signed-off-by: Archana Patni Signed-off-by: Thierry Escande Signed-off-by: Lee Jones --- include/linux/mfd/cros_ec.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index 7a01c94496f1..b3d04de684d4 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -304,4 +304,22 @@ extern struct attribute_group cros_ec_attr_group; extern struct attribute_group cros_ec_lightbar_attr_group; extern struct attribute_group cros_ec_vbc_attr_group; +/* ACPI GPE handler */ +#ifdef CONFIG_ACPI + +int cros_ec_acpi_install_gpe_handler(struct device *dev); +void cros_ec_acpi_remove_gpe_handler(void); +void cros_ec_acpi_clear_gpe(void); + +#else /* CONFIG_ACPI */ + +static inline int cros_ec_acpi_install_gpe_handler(struct device *dev) +{ + return -ENODEV; +} +static inline void cros_ec_acpi_remove_gpe_handler(void) {} +static inline void cros_ec_acpi_clear_gpe(void) {} + +#endif /* CONFIG_ACPI */ + #endif /* __LINUX_MFD_CROS_EC_H */ -- cgit v1.2.3 From d5aa11bfe9cebb4a3912b11748fd84aa15454229 Mon Sep 17 00:00:00 2001 From: Milo Kim Date: Tue, 28 Feb 2017 15:45:15 +0900 Subject: mfd: Add TI LMU driver TI LMU (Lighting Management Unit) driver supports lighting devices below. LM3532, LM3631, LM3632, LM3633, LM3695 and LM3697. LMU devices have common features. - I2C interface for accessing device registers - Hardware enable pin control - Backlight brightness control - Notifier for hardware fault monitoring - Regulators for LCD display bias It contains fault monitor, backlight, LED and regulator driver. LMU fault monitor ----------------- LM3633 and LM3697 provide hardware monitoring feature. It enables open or short circuit detection. After monitoring is done, each device should be re-initialized. Notifier is used for this case. Separate patch for 'ti-lmu-fault-monitor' will be sent later. Backlight --------- It's handled by TI LMU backlight consolidated driver and chip dependent data. Separate patchset will be sent later. LED indicator ------------- LM3633 has 6 indicator LEDs. Programmable dimming pattern is also supported. Separate patch for 'leds-lm3633' will be sent later. Regulator --------- LM3631 has 5 regulators for the display bias. LM3632 supports 3 regulators. One consolidated driver enables it. The lm363x regulator driver is already upstreamed. Signed-off-by: Milo Kim Tested-by: Tony Lindgren Signed-off-by: Lee Jones --- include/linux/mfd/ti-lmu-register.h | 280 ++++++++++++++++++++++++++++++++++++ include/linux/mfd/ti-lmu.h | 87 +++++++++++ 2 files changed, 367 insertions(+) create mode 100644 include/linux/mfd/ti-lmu-register.h create mode 100644 include/linux/mfd/ti-lmu.h (limited to 'include/linux') diff --git a/include/linux/mfd/ti-lmu-register.h b/include/linux/mfd/ti-lmu-register.h new file mode 100644 index 000000000000..2125c7c02818 --- /dev/null +++ b/include/linux/mfd/ti-lmu-register.h @@ -0,0 +1,280 @@ +/* + * TI LMU (Lighting Management Unit) Device Register Map + * + * Copyright 2017 Texas Instruments + * + * Author: Milo Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MFD_TI_LMU_REGISTER_H__ +#define __MFD_TI_LMU_REGISTER_H__ + +#include + +/* LM3532 */ +#define LM3532_REG_OUTPUT_CFG 0x10 +#define LM3532_ILED1_CFG_MASK 0x03 +#define LM3532_ILED2_CFG_MASK 0x0C +#define LM3532_ILED3_CFG_MASK 0x30 +#define LM3532_ILED1_CFG_SHIFT 0 +#define LM3532_ILED2_CFG_SHIFT 2 +#define LM3532_ILED3_CFG_SHIFT 4 + +#define LM3532_REG_RAMPUP 0x12 +#define LM3532_REG_RAMPDN LM3532_REG_RAMPUP +#define LM3532_RAMPUP_MASK 0x07 +#define LM3532_RAMPUP_SHIFT 0 +#define LM3532_RAMPDN_MASK 0x38 +#define LM3532_RAMPDN_SHIFT 3 + +#define LM3532_REG_ENABLE 0x1D + +#define LM3532_REG_PWM_A_CFG 0x13 +#define LM3532_PWM_A_MASK 0x05 /* zone 0 */ +#define LM3532_PWM_ZONE_0 BIT(2) + +#define LM3532_REG_PWM_B_CFG 0x14 +#define LM3532_PWM_B_MASK 0x09 /* zone 1 */ +#define LM3532_PWM_ZONE_1 BIT(3) + +#define LM3532_REG_PWM_C_CFG 0x15 +#define LM3532_PWM_C_MASK 0x11 /* zone 2 */ +#define LM3532_PWM_ZONE_2 BIT(4) + +#define LM3532_REG_ZONE_CFG_A 0x16 +#define LM3532_REG_ZONE_CFG_B 0x18 +#define LM3532_REG_ZONE_CFG_C 0x1A +#define LM3532_ZONE_MASK (BIT(2) | BIT(3) | BIT(4)) +#define LM3532_ZONE_0 0 +#define LM3532_ZONE_1 BIT(2) +#define LM3532_ZONE_2 BIT(3) + +#define LM3532_REG_BRT_A 0x70 /* zone 0 */ +#define LM3532_REG_BRT_B 0x76 /* zone 1 */ +#define LM3532_REG_BRT_C 0x7C /* zone 2 */ + +#define LM3532_MAX_REG 0x7E + +/* LM3631 */ +#define LM3631_REG_DEVCTRL 0x00 +#define LM3631_LCD_EN_MASK BIT(1) +#define LM3631_BL_EN_MASK BIT(0) + +#define LM3631_REG_BRT_LSB 0x01 +#define LM3631_REG_BRT_MSB 0x02 + +#define LM3631_REG_BL_CFG 0x06 +#define LM3631_BL_CHANNEL_MASK BIT(3) +#define LM3631_BL_DUAL_CHANNEL 0 +#define LM3631_BL_SINGLE_CHANNEL BIT(3) +#define LM3631_MAP_MASK BIT(5) +#define LM3631_EXPONENTIAL_MAP 0 + +#define LM3631_REG_BRT_MODE 0x08 +#define LM3631_MODE_MASK (BIT(1) | BIT(2) | BIT(3)) +#define LM3631_DEFAULT_MODE (BIT(1) | BIT(3)) + +#define LM3631_REG_SLOPE 0x09 +#define LM3631_SLOPE_MASK 0xF0 +#define LM3631_SLOPE_SHIFT 4 + +#define LM3631_REG_LDO_CTRL1 0x0A +#define LM3631_EN_OREF_MASK BIT(0) +#define LM3631_EN_VNEG_MASK BIT(1) +#define LM3631_EN_VPOS_MASK BIT(2) + +#define LM3631_REG_LDO_CTRL2 0x0B +#define LM3631_EN_CONT_MASK BIT(0) + +#define LM3631_REG_VOUT_CONT 0x0C +#define LM3631_VOUT_CONT_MASK (BIT(6) | BIT(7)) + +#define LM3631_REG_VOUT_BOOST 0x0C +#define LM3631_REG_VOUT_POS 0x0D +#define LM3631_REG_VOUT_NEG 0x0E +#define LM3631_REG_VOUT_OREF 0x0F +#define LM3631_VOUT_MASK 0x3F + +#define LM3631_REG_ENTIME_VCONT 0x0B +#define LM3631_ENTIME_CONT_MASK 0x70 + +#define LM3631_REG_ENTIME_VOREF 0x0F +#define LM3631_REG_ENTIME_VPOS 0x10 +#define LM3631_REG_ENTIME_VNEG 0x11 +#define LM3631_ENTIME_MASK 0xF0 +#define LM3631_ENTIME_SHIFT 4 + +#define LM3631_MAX_REG 0x16 + +/* LM3632 */ +#define LM3632_REG_CONFIG1 0x02 +#define LM3632_OVP_MASK (BIT(5) | BIT(6) | BIT(7)) +#define LM3632_OVP_25V BIT(6) + +#define LM3632_REG_CONFIG2 0x03 +#define LM3632_SWFREQ_MASK BIT(7) +#define LM3632_SWFREQ_1MHZ BIT(7) + +#define LM3632_REG_BRT_LSB 0x04 +#define LM3632_REG_BRT_MSB 0x05 + +#define LM3632_REG_IO_CTRL 0x09 +#define LM3632_PWM_MASK BIT(6) +#define LM3632_I2C_MODE 0 +#define LM3632_PWM_MODE BIT(6) + +#define LM3632_REG_ENABLE 0x0A +#define LM3632_BL_EN_MASK BIT(0) +#define LM3632_BL_CHANNEL_MASK (BIT(3) | BIT(4)) +#define LM3632_BL_SINGLE_CHANNEL BIT(4) +#define LM3632_BL_DUAL_CHANNEL BIT(3) + +#define LM3632_REG_BIAS_CONFIG 0x0C +#define LM3632_EXT_EN_MASK BIT(0) +#define LM3632_EN_VNEG_MASK BIT(1) +#define LM3632_EN_VPOS_MASK BIT(2) + +#define LM3632_REG_VOUT_BOOST 0x0D +#define LM3632_REG_VOUT_POS 0x0E +#define LM3632_REG_VOUT_NEG 0x0F +#define LM3632_VOUT_MASK 0x3F + +#define LM3632_MAX_REG 0x10 + +/* LM3633 */ +#define LM3633_REG_HVLED_OUTPUT_CFG 0x10 +#define LM3633_HVLED1_CFG_MASK BIT(0) +#define LM3633_HVLED2_CFG_MASK BIT(1) +#define LM3633_HVLED3_CFG_MASK BIT(2) +#define LM3633_HVLED1_CFG_SHIFT 0 +#define LM3633_HVLED2_CFG_SHIFT 1 +#define LM3633_HVLED3_CFG_SHIFT 2 + +#define LM3633_REG_BANK_SEL 0x11 + +#define LM3633_REG_BL0_RAMP 0x12 +#define LM3633_REG_BL1_RAMP 0x13 +#define LM3633_BL_RAMPUP_MASK 0xF0 +#define LM3633_BL_RAMPUP_SHIFT 4 +#define LM3633_BL_RAMPDN_MASK 0x0F +#define LM3633_BL_RAMPDN_SHIFT 0 + +#define LM3633_REG_BL_RAMP_CONF 0x1B +#define LM3633_BL_RAMP_MASK 0x0F +#define LM3633_BL_RAMP_EACH 0x05 + +#define LM3633_REG_PTN0_RAMP 0x1C +#define LM3633_REG_PTN1_RAMP 0x1D +#define LM3633_PTN_RAMPUP_MASK 0x70 +#define LM3633_PTN_RAMPUP_SHIFT 4 +#define LM3633_PTN_RAMPDN_MASK 0x07 +#define LM3633_PTN_RAMPDN_SHIFT 0 + +#define LM3633_REG_LED_MAPPING_MODE 0x1F +#define LM3633_LED_EXPONENTIAL BIT(1) + +#define LM3633_REG_IMAX_HVLED_A 0x20 +#define LM3633_REG_IMAX_HVLED_B 0x21 +#define LM3633_REG_IMAX_LVLED_BASE 0x22 + +#define LM3633_REG_BL_FEEDBACK_ENABLE 0x28 + +#define LM3633_REG_ENABLE 0x2B +#define LM3633_LED_BANK_OFFSET 2 + +#define LM3633_REG_PATTERN 0x2C + +#define LM3633_REG_BOOST_CFG 0x2D +#define LM3633_OVP_MASK (BIT(1) | BIT(2)) +#define LM3633_OVP_40V 0x6 + +#define LM3633_REG_PWM_CFG 0x2F +#define LM3633_PWM_A_MASK BIT(0) +#define LM3633_PWM_B_MASK BIT(1) + +#define LM3633_REG_BRT_HVLED_A_LSB 0x40 +#define LM3633_REG_BRT_HVLED_A_MSB 0x41 +#define LM3633_REG_BRT_HVLED_B_LSB 0x42 +#define LM3633_REG_BRT_HVLED_B_MSB 0x43 + +#define LM3633_REG_BRT_LVLED_BASE 0x44 + +#define LM3633_REG_PTN_DELAY 0x50 + +#define LM3633_REG_PTN_LOWTIME 0x51 + +#define LM3633_REG_PTN_HIGHTIME 0x52 + +#define LM3633_REG_PTN_LOWBRT 0x53 + +#define LM3633_REG_PTN_HIGHBRT LM3633_REG_BRT_LVLED_BASE + +#define LM3633_REG_BL_OPEN_FAULT_STATUS 0xB0 + +#define LM3633_REG_BL_SHORT_FAULT_STATUS 0xB2 + +#define LM3633_REG_MONITOR_ENABLE 0xB4 + +#define LM3633_MAX_REG 0xB4 + +/* LM3695 */ +#define LM3695_REG_GP 0x10 +#define LM3695_BL_CHANNEL_MASK BIT(3) +#define LM3695_BL_DUAL_CHANNEL 0 +#define LM3695_BL_SINGLE_CHANNEL BIT(3) +#define LM3695_BRT_RW_MASK BIT(2) +#define LM3695_BL_EN_MASK BIT(0) + +#define LM3695_REG_BRT_LSB 0x13 +#define LM3695_REG_BRT_MSB 0x14 + +#define LM3695_MAX_REG 0x14 + +/* LM3697 */ +#define LM3697_REG_HVLED_OUTPUT_CFG 0x10 +#define LM3697_HVLED1_CFG_MASK BIT(0) +#define LM3697_HVLED2_CFG_MASK BIT(1) +#define LM3697_HVLED3_CFG_MASK BIT(2) +#define LM3697_HVLED1_CFG_SHIFT 0 +#define LM3697_HVLED2_CFG_SHIFT 1 +#define LM3697_HVLED3_CFG_SHIFT 2 + +#define LM3697_REG_BL0_RAMP 0x11 +#define LM3697_REG_BL1_RAMP 0x12 +#define LM3697_RAMPUP_MASK 0xF0 +#define LM3697_RAMPUP_SHIFT 4 +#define LM3697_RAMPDN_MASK 0x0F +#define LM3697_RAMPDN_SHIFT 0 + +#define LM3697_REG_RAMP_CONF 0x14 +#define LM3697_RAMP_MASK 0x0F +#define LM3697_RAMP_EACH 0x05 + +#define LM3697_REG_PWM_CFG 0x1C +#define LM3697_PWM_A_MASK BIT(0) +#define LM3697_PWM_B_MASK BIT(1) + +#define LM3697_REG_IMAX_A 0x17 +#define LM3697_REG_IMAX_B 0x18 + +#define LM3697_REG_FEEDBACK_ENABLE 0x19 + +#define LM3697_REG_BRT_A_LSB 0x20 +#define LM3697_REG_BRT_A_MSB 0x21 +#define LM3697_REG_BRT_B_LSB 0x22 +#define LM3697_REG_BRT_B_MSB 0x23 + +#define LM3697_REG_ENABLE 0x24 + +#define LM3697_REG_OPEN_FAULT_STATUS 0xB0 + +#define LM3697_REG_SHORT_FAULT_STATUS 0xB2 + +#define LM3697_REG_MONITOR_ENABLE 0xB4 + +#define LM3697_MAX_REG 0xB4 +#endif diff --git a/include/linux/mfd/ti-lmu.h b/include/linux/mfd/ti-lmu.h new file mode 100644 index 000000000000..09d5f30384e5 --- /dev/null +++ b/include/linux/mfd/ti-lmu.h @@ -0,0 +1,87 @@ +/* + * TI LMU (Lighting Management Unit) Devices + * + * Copyright 2017 Texas Instruments + * + * Author: Milo Kim + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __MFD_TI_LMU_H__ +#define __MFD_TI_LMU_H__ + +#include +#include +#include + +/* Notifier event */ +#define LMU_EVENT_MONITOR_DONE 0x01 + +enum ti_lmu_id { + LM3532, + LM3631, + LM3632, + LM3633, + LM3695, + LM3697, + LMU_MAX_ID, +}; + +enum ti_lmu_max_current { + LMU_IMAX_5mA, + LMU_IMAX_6mA, + LMU_IMAX_7mA = 0x03, + LMU_IMAX_8mA, + LMU_IMAX_9mA, + LMU_IMAX_10mA = 0x07, + LMU_IMAX_11mA, + LMU_IMAX_12mA, + LMU_IMAX_13mA, + LMU_IMAX_14mA, + LMU_IMAX_15mA = 0x0D, + LMU_IMAX_16mA, + LMU_IMAX_17mA, + LMU_IMAX_18mA, + LMU_IMAX_19mA, + LMU_IMAX_20mA = 0x13, + LMU_IMAX_21mA, + LMU_IMAX_22mA, + LMU_IMAX_23mA = 0x17, + LMU_IMAX_24mA, + LMU_IMAX_25mA, + LMU_IMAX_26mA, + LMU_IMAX_27mA = 0x1C, + LMU_IMAX_28mA, + LMU_IMAX_29mA, + LMU_IMAX_30mA, +}; + +enum lm363x_regulator_id { + LM3631_BOOST, /* Boost output */ + LM3631_LDO_CONT, /* Display panel controller */ + LM3631_LDO_OREF, /* Gamma reference */ + LM3631_LDO_POS, /* Positive display bias output */ + LM3631_LDO_NEG, /* Negative display bias output */ + LM3632_BOOST, /* Boost output */ + LM3632_LDO_POS, /* Positive display bias output */ + LM3632_LDO_NEG, /* Negative display bias output */ +}; + +/** + * struct ti_lmu + * + * @dev: Parent device pointer + * @regmap: Used for i2c communcation on accessing registers + * @en_gpio: GPIO for HWEN pin [Optional] + * @notifier: Notifier for reporting hwmon event + */ +struct ti_lmu { + struct device *dev; + struct regmap *regmap; + int en_gpio; + struct blocking_notifier_head notifier; +}; +#endif -- cgit v1.2.3 From ed7311f0d089553f39ff3e1e2d9f55f94324c42f Mon Sep 17 00:00:00 2001 From: Quentin Schulz Date: Mon, 20 Mar 2017 09:16:45 +0100 Subject: mfd: axp20x: Correct name of temperature data ADC registers The registers 0x56 and 0x57 of AXP22X PMIC store the value of the internal temperature of the PMIC. This patch modifies the name of these registers from AXP22X_PMIC_ADC_H/L to AXP22X_PMIC_TEMP_H/L so their purpose is clearer. Signed-off-by: Quentin Schulz Acked-by: Chen-Yu Tsai Acked-by: Maxime Ripard Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index 0d9a1ff38393..dc8798cf2a24 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -228,8 +228,8 @@ enum axp20x_variants { #define AXP20X_OCV_MAX 0xf /* AXP22X specific registers */ -#define AXP22X_PMIC_ADC_H 0x56 -#define AXP22X_PMIC_ADC_L 0x57 +#define AXP22X_PMIC_TEMP_H 0x56 +#define AXP22X_PMIC_TEMP_L 0x57 #define AXP22X_TS_ADC_H 0x58 #define AXP22X_TS_ADC_L 0x59 #define AXP22X_BATLOW_THRES1 0xe6 -- cgit v1.2.3 From f1e34ad849ad78770af067fd8e409e61b018f9d0 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 Mar 2017 17:37:14 +0200 Subject: mfd: intel_soc_pmic_bxtwc: Move inclusion to c-file There is no need to include intel_soc_pmic.h into header which doesn't require it. Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones --- include/linux/mfd/intel_bxtwc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/intel_bxtwc.h b/include/linux/mfd/intel_bxtwc.h index 1a0ee9d6efe9..240d6752ec64 100644 --- a/include/linux/mfd/intel_bxtwc.h +++ b/include/linux/mfd/intel_bxtwc.h @@ -13,8 +13,6 @@ * more details. */ -#include - #ifndef __INTEL_BXTWC_H__ #define __INTEL_BXTWC_H__ -- cgit v1.2.3 From 0c227c51b98c03c6e7fb4f342f930cf576292064 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 17 Mar 2017 17:37:15 +0200 Subject: mfd: intel_soc_pmic_bxtwc: Rename header to follow c-file For better understanding of relationship between headers and modules rename: intel_bxtwc.h -> intel_soc_pmic_bxtwc.h While here, remove file name from the file itself. Signed-off-by: Andy Shevchenko Signed-off-by: Lee Jones --- include/linux/mfd/intel_bxtwc.h | 67 -------------------------------- include/linux/mfd/intel_soc_pmic_bxtwc.h | 67 ++++++++++++++++++++++++++++++++ 2 files changed, 67 insertions(+), 67 deletions(-) delete mode 100644 include/linux/mfd/intel_bxtwc.h create mode 100644 include/linux/mfd/intel_soc_pmic_bxtwc.h (limited to 'include/linux') diff --git a/include/linux/mfd/intel_bxtwc.h b/include/linux/mfd/intel_bxtwc.h deleted file mode 100644 index 240d6752ec64..000000000000 --- a/include/linux/mfd/intel_bxtwc.h +++ /dev/null @@ -1,67 +0,0 @@ -/* - * intel_bxtwc.h - Header file for Intel Broxton Whiskey Cove PMIC - * - * Copyright (C) 2015 Intel Corporation. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. - */ - -#ifndef __INTEL_BXTWC_H__ -#define __INTEL_BXTWC_H__ - -/* BXT WC devices */ -#define BXTWC_DEVICE1_ADDR 0x4E -#define BXTWC_DEVICE2_ADDR 0x4F -#define BXTWC_DEVICE3_ADDR 0x5E - -/* device1 Registers */ -#define BXTWC_CHIPID 0x4E00 -#define BXTWC_CHIPVER 0x4E01 - -#define BXTWC_SCHGRIRQ0_ADDR 0x5E1A -#define BXTWC_CHGRCTRL0_ADDR 0x5E16 -#define BXTWC_CHGRCTRL1_ADDR 0x5E17 -#define BXTWC_CHGRCTRL2_ADDR 0x5E18 -#define BXTWC_CHGRSTATUS_ADDR 0x5E19 -#define BXTWC_THRMBATZONE_ADDR 0x4F22 - -#define BXTWC_USBPATH_ADDR 0x5E19 -#define BXTWC_USBPHYCTRL_ADDR 0x5E07 -#define BXTWC_USBIDCTRL_ADDR 0x5E05 -#define BXTWC_USBIDEN_MASK 0x01 -#define BXTWC_USBIDSTAT_ADDR 0x00FF -#define BXTWC_USBSRCDETSTATUS_ADDR 0x5E29 - -#define BXTWC_DBGUSBBC1_ADDR 0x5FE0 -#define BXTWC_DBGUSBBC2_ADDR 0x5FE1 -#define BXTWC_DBGUSBBCSTAT_ADDR 0x5FE2 - -#define BXTWC_WAKESRC_ADDR 0x4E22 -#define BXTWC_WAKESRC2_ADDR 0x4EE5 -#define BXTWC_CHRTTADDR_ADDR 0x5E22 -#define BXTWC_CHRTTDATA_ADDR 0x5E23 - -#define BXTWC_STHRMIRQ0_ADDR 0x4F19 -#define WC_MTHRMIRQ1_ADDR 0x4E12 -#define WC_STHRMIRQ1_ADDR 0x4F1A -#define WC_STHRMIRQ2_ADDR 0x4F1B - -#define BXTWC_THRMZN0H_ADDR 0x4F44 -#define BXTWC_THRMZN0L_ADDR 0x4F45 -#define BXTWC_THRMZN1H_ADDR 0x4F46 -#define BXTWC_THRMZN1L_ADDR 0x4F47 -#define BXTWC_THRMZN2H_ADDR 0x4F48 -#define BXTWC_THRMZN2L_ADDR 0x4F49 -#define BXTWC_THRMZN3H_ADDR 0x4F4A -#define BXTWC_THRMZN3L_ADDR 0x4F4B -#define BXTWC_THRMZN4H_ADDR 0x4F4C -#define BXTWC_THRMZN4L_ADDR 0x4F4D - -#endif diff --git a/include/linux/mfd/intel_soc_pmic_bxtwc.h b/include/linux/mfd/intel_soc_pmic_bxtwc.h new file mode 100644 index 000000000000..0c351bc85d2d --- /dev/null +++ b/include/linux/mfd/intel_soc_pmic_bxtwc.h @@ -0,0 +1,67 @@ +/* + * Header file for Intel Broxton Whiskey Cove PMIC + * + * Copyright (C) 2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __INTEL_BXTWC_H__ +#define __INTEL_BXTWC_H__ + +/* BXT WC devices */ +#define BXTWC_DEVICE1_ADDR 0x4E +#define BXTWC_DEVICE2_ADDR 0x4F +#define BXTWC_DEVICE3_ADDR 0x5E + +/* device1 Registers */ +#define BXTWC_CHIPID 0x4E00 +#define BXTWC_CHIPVER 0x4E01 + +#define BXTWC_SCHGRIRQ0_ADDR 0x5E1A +#define BXTWC_CHGRCTRL0_ADDR 0x5E16 +#define BXTWC_CHGRCTRL1_ADDR 0x5E17 +#define BXTWC_CHGRCTRL2_ADDR 0x5E18 +#define BXTWC_CHGRSTATUS_ADDR 0x5E19 +#define BXTWC_THRMBATZONE_ADDR 0x4F22 + +#define BXTWC_USBPATH_ADDR 0x5E19 +#define BXTWC_USBPHYCTRL_ADDR 0x5E07 +#define BXTWC_USBIDCTRL_ADDR 0x5E05 +#define BXTWC_USBIDEN_MASK 0x01 +#define BXTWC_USBIDSTAT_ADDR 0x00FF +#define BXTWC_USBSRCDETSTATUS_ADDR 0x5E29 + +#define BXTWC_DBGUSBBC1_ADDR 0x5FE0 +#define BXTWC_DBGUSBBC2_ADDR 0x5FE1 +#define BXTWC_DBGUSBBCSTAT_ADDR 0x5FE2 + +#define BXTWC_WAKESRC_ADDR 0x4E22 +#define BXTWC_WAKESRC2_ADDR 0x4EE5 +#define BXTWC_CHRTTADDR_ADDR 0x5E22 +#define BXTWC_CHRTTDATA_ADDR 0x5E23 + +#define BXTWC_STHRMIRQ0_ADDR 0x4F19 +#define WC_MTHRMIRQ1_ADDR 0x4E12 +#define WC_STHRMIRQ1_ADDR 0x4F1A +#define WC_STHRMIRQ2_ADDR 0x4F1B + +#define BXTWC_THRMZN0H_ADDR 0x4F44 +#define BXTWC_THRMZN0L_ADDR 0x4F45 +#define BXTWC_THRMZN1H_ADDR 0x4F46 +#define BXTWC_THRMZN1L_ADDR 0x4F47 +#define BXTWC_THRMZN2H_ADDR 0x4F48 +#define BXTWC_THRMZN2L_ADDR 0x4F49 +#define BXTWC_THRMZN3H_ADDR 0x4F4A +#define BXTWC_THRMZN3L_ADDR 0x4F4B +#define BXTWC_THRMZN4H_ADDR 0x4F4C +#define BXTWC_THRMZN4L_ADDR 0x4F4D + +#endif -- cgit v1.2.3 From fe9d7cb22ef3a26a74e49730c0efdbdae4b17d4b Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 16 Mar 2017 09:30:28 +0100 Subject: mfd: syscon: atmel-smc: Add new helpers to ease SMC regs manipulation These new helpers + macro definitions are meant to replace the old ones which are unpractical to use. Note that the macros and function prefixes have been intentionally changed to ATMEL_[H]SMC_XX and atmel_[h]smc_ to reflect the fact that this IP is also embedded in avr32 SoCs (and not only in at91 ones). Signed-off-by: Boris Brezillon Acked-by: Nicolas Ferre Signed-off-by: Lee Jones --- include/linux/mfd/syscon/atmel-smc.h | 87 ++++++++++++++++++++++++++++++++++++ 1 file changed, 87 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/atmel-smc.h b/include/linux/mfd/syscon/atmel-smc.h index be6ebe64eebe..00e6e3c8ee6f 100644 --- a/include/linux/mfd/syscon/atmel-smc.h +++ b/include/linux/mfd/syscon/atmel-smc.h @@ -69,6 +69,93 @@ #define AT91_SMC_PS_16 (2 << 28) #define AT91_SMC_PS_32 (3 << 28) +#define ATMEL_SMC_SETUP(cs) (((cs) * 0x10)) +#define ATMEL_HSMC_SETUP(cs) (0x600 + ((cs) * 0x14)) +#define ATMEL_SMC_PULSE(cs) (((cs) * 0x10) + 0x4) +#define ATMEL_HSMC_PULSE(cs) (0x600 + ((cs) * 0x14) + 0x4) +#define ATMEL_SMC_CYCLE(cs) (((cs) * 0x10) + 0x8) +#define ATMEL_HSMC_CYCLE(cs) (0x600 + ((cs) * 0x14) + 0x8) +#define ATMEL_SMC_NWE_SHIFT 0 +#define ATMEL_SMC_NCS_WR_SHIFT 8 +#define ATMEL_SMC_NRD_SHIFT 16 +#define ATMEL_SMC_NCS_RD_SHIFT 24 + +#define ATMEL_SMC_MODE(cs) (((cs) * 0x10) + 0xc) +#define ATMEL_HSMC_MODE(cs) (0x600 + ((cs) * 0x14) + 0x10) +#define ATMEL_SMC_MODE_READMODE_MASK BIT(0) +#define ATMEL_SMC_MODE_READMODE_NCS (0 << 0) +#define ATMEL_SMC_MODE_READMODE_NRD (1 << 0) +#define ATMEL_SMC_MODE_WRITEMODE_MASK BIT(1) +#define ATMEL_SMC_MODE_WRITEMODE_NCS (0 << 1) +#define ATMEL_SMC_MODE_WRITEMODE_NWE (1 << 1) +#define ATMEL_SMC_MODE_EXNWMODE_MASK GENMASK(5, 4) +#define ATMEL_SMC_MODE_EXNWMODE_DISABLE (0 << 4) +#define ATMEL_SMC_MODE_EXNWMODE_FROZEN (2 << 4) +#define ATMEL_SMC_MODE_EXNWMODE_READY (3 << 4) +#define ATMEL_SMC_MODE_BAT_MASK BIT(8) +#define ATMEL_SMC_MODE_BAT_SELECT (0 << 8) +#define ATMEL_SMC_MODE_BAT_WRITE (1 << 8) +#define ATMEL_SMC_MODE_DBW_MASK GENMASK(13, 12) +#define ATMEL_SMC_MODE_DBW_8 (0 << 12) +#define ATMEL_SMC_MODE_DBW_16 (1 << 12) +#define ATMEL_SMC_MODE_DBW_32 (2 << 12) +#define ATMEL_SMC_MODE_TDF_MASK GENMASK(19, 16) +#define ATMEL_SMC_MODE_TDF(x) (((x) - 1) << 16) +#define ATMEL_SMC_MODE_TDF_MAX 16 +#define ATMEL_SMC_MODE_TDF_MIN 1 +#define ATMEL_SMC_MODE_TDFMODE_OPTIMIZED BIT(20) +#define ATMEL_SMC_MODE_PMEN BIT(24) +#define ATMEL_SMC_MODE_PS_MASK GENMASK(29, 28) +#define ATMEL_SMC_MODE_PS_4 (0 << 28) +#define ATMEL_SMC_MODE_PS_8 (1 << 28) +#define ATMEL_SMC_MODE_PS_16 (2 << 28) +#define ATMEL_SMC_MODE_PS_32 (3 << 28) + +#define ATMEL_HSMC_TIMINGS(cs) (0x600 + ((cs) * 0x14) + 0xc) +#define ATMEL_HSMC_TIMINGS_OCMS BIT(12) +#define ATMEL_HSMC_TIMINGS_RBNSEL(x) ((x) << 28) +#define ATMEL_HSMC_TIMINGS_NFSEL BIT(31) +#define ATMEL_HSMC_TIMINGS_TCLR_SHIFT 0 +#define ATMEL_HSMC_TIMINGS_TADL_SHIFT 4 +#define ATMEL_HSMC_TIMINGS_TAR_SHIFT 8 +#define ATMEL_HSMC_TIMINGS_TRR_SHIFT 16 +#define ATMEL_HSMC_TIMINGS_TWB_SHIFT 24 + +/** + * struct atmel_smc_cs_conf - SMC CS config as described in the datasheet. + * @setup: NCS/NWE/NRD setup timings (not applicable to at91rm9200) + * @pulse: NCS/NWE/NRD pulse timings (not applicable to at91rm9200) + * @cycle: NWE/NRD cycle timings (not applicable to at91rm9200) + * @timings: advanced NAND related timings (only applicable to HSMC) + * @mode: all kind of config parameters (see the fields definition above). + * The mode fields are different on at91rm9200 + */ +struct atmel_smc_cs_conf { + u32 setup; + u32 pulse; + u32 cycle; + u32 timings; + u32 mode; +}; + +void atmel_smc_cs_conf_init(struct atmel_smc_cs_conf *conf); +int atmel_smc_cs_conf_set_timing(struct atmel_smc_cs_conf *conf, + unsigned int shift, + unsigned int ncycles); +int atmel_smc_cs_conf_set_setup(struct atmel_smc_cs_conf *conf, + unsigned int shift, unsigned int ncycles); +int atmel_smc_cs_conf_set_pulse(struct atmel_smc_cs_conf *conf, + unsigned int shift, unsigned int ncycles); +int atmel_smc_cs_conf_set_cycle(struct atmel_smc_cs_conf *conf, + unsigned int shift, unsigned int ncycles); +void atmel_smc_cs_conf_apply(struct regmap *regmap, int cs, + const struct atmel_smc_cs_conf *conf); +void atmel_hsmc_cs_conf_apply(struct regmap *regmap, int cs, + const struct atmel_smc_cs_conf *conf); +void atmel_smc_cs_conf_get(struct regmap *regmap, int cs, + struct atmel_smc_cs_conf *conf); +void atmel_hsmc_cs_conf_get(struct regmap *regmap, int cs, + struct atmel_smc_cs_conf *conf); /* * This function converts a setup timing expressed in nanoseconds into an -- cgit v1.2.3 From 0d69080d9e01d5d60f1887def2080ce3f66f5856 Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Thu, 16 Mar 2017 09:30:31 +0100 Subject: mfd: syscon: atmel-smc: Remove unused helpers/macros All macros prefixed with AT91[SAM9]_SMC have been replaced by equivalent definitions prefixed with ATMEL_SMC, and the at91sam9_smc_xxxx() helpers are no longer used. Drop these definitions before someone starts using them again. Signed-off-by: Boris Brezillon Acked-by: Nicolas Ferre Signed-off-by: Lee Jones --- include/linux/mfd/syscon/atmel-smc.h | 152 ----------------------------------- 1 file changed, 152 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/atmel-smc.h b/include/linux/mfd/syscon/atmel-smc.h index 00e6e3c8ee6f..afa266169800 100644 --- a/include/linux/mfd/syscon/atmel-smc.h +++ b/include/linux/mfd/syscon/atmel-smc.h @@ -17,58 +17,6 @@ #include #include -#define AT91SAM9_SMC_GENERIC 0x00 -#define AT91SAM9_SMC_GENERIC_BLK_SZ 0x10 - -#define SAMA5_SMC_GENERIC 0x600 -#define SAMA5_SMC_GENERIC_BLK_SZ 0x14 - -#define AT91SAM9_SMC_SETUP(o) ((o) + 0x00) -#define AT91SAM9_SMC_NWESETUP(x) (x) -#define AT91SAM9_SMC_NCS_WRSETUP(x) ((x) << 8) -#define AT91SAM9_SMC_NRDSETUP(x) ((x) << 16) -#define AT91SAM9_SMC_NCS_NRDSETUP(x) ((x) << 24) - -#define AT91SAM9_SMC_PULSE(o) ((o) + 0x04) -#define AT91SAM9_SMC_NWEPULSE(x) (x) -#define AT91SAM9_SMC_NCS_WRPULSE(x) ((x) << 8) -#define AT91SAM9_SMC_NRDPULSE(x) ((x) << 16) -#define AT91SAM9_SMC_NCS_NRDPULSE(x) ((x) << 24) - -#define AT91SAM9_SMC_CYCLE(o) ((o) + 0x08) -#define AT91SAM9_SMC_NWECYCLE(x) (x) -#define AT91SAM9_SMC_NRDCYCLE(x) ((x) << 16) - -#define AT91SAM9_SMC_MODE(o) ((o) + 0x0c) -#define SAMA5_SMC_MODE(o) ((o) + 0x10) -#define AT91_SMC_READMODE BIT(0) -#define AT91_SMC_READMODE_NCS (0 << 0) -#define AT91_SMC_READMODE_NRD (1 << 0) -#define AT91_SMC_WRITEMODE BIT(1) -#define AT91_SMC_WRITEMODE_NCS (0 << 1) -#define AT91_SMC_WRITEMODE_NWE (1 << 1) -#define AT91_SMC_EXNWMODE GENMASK(5, 4) -#define AT91_SMC_EXNWMODE_DISABLE (0 << 4) -#define AT91_SMC_EXNWMODE_FROZEN (2 << 4) -#define AT91_SMC_EXNWMODE_READY (3 << 4) -#define AT91_SMC_BAT BIT(8) -#define AT91_SMC_BAT_SELECT (0 << 8) -#define AT91_SMC_BAT_WRITE (1 << 8) -#define AT91_SMC_DBW GENMASK(13, 12) -#define AT91_SMC_DBW_8 (0 << 12) -#define AT91_SMC_DBW_16 (1 << 12) -#define AT91_SMC_DBW_32 (2 << 12) -#define AT91_SMC_TDF GENMASK(19, 16) -#define AT91_SMC_TDF_(x) ((((x) - 1) << 16) & AT91_SMC_TDF) -#define AT91_SMC_TDF_MAX 16 -#define AT91_SMC_TDFMODE_OPTIMIZED BIT(20) -#define AT91_SMC_PMEN BIT(24) -#define AT91_SMC_PS GENMASK(29, 28) -#define AT91_SMC_PS_4 (0 << 28) -#define AT91_SMC_PS_8 (1 << 28) -#define AT91_SMC_PS_16 (2 << 28) -#define AT91_SMC_PS_32 (3 << 28) - #define ATMEL_SMC_SETUP(cs) (((cs) * 0x10)) #define ATMEL_HSMC_SETUP(cs) (0x600 + ((cs) * 0x14)) #define ATMEL_SMC_PULSE(cs) (((cs) * 0x10) + 0x4) @@ -157,104 +105,4 @@ void atmel_smc_cs_conf_get(struct regmap *regmap, int cs, void atmel_hsmc_cs_conf_get(struct regmap *regmap, int cs, struct atmel_smc_cs_conf *conf); -/* - * This function converts a setup timing expressed in nanoseconds into an - * encoded value that can be written in the SMC_SETUP register. - * - * The following formula is described in atmel datasheets (section - * "SMC Setup Register"): - * - * setup length = (128* SETUP[5] + SETUP[4:0]) - * - * where setup length is the timing expressed in cycles. - */ -static inline u32 at91sam9_smc_setup_ns_to_cycles(unsigned int clk_rate, - u32 timing_ns) -{ - u32 clk_period = DIV_ROUND_UP(NSEC_PER_SEC, clk_rate); - u32 coded_cycles = 0; - u32 cycles; - - cycles = DIV_ROUND_UP(timing_ns, clk_period); - if (cycles / 32) { - coded_cycles |= 1 << 5; - if (cycles < 128) - cycles = 0; - } - - coded_cycles |= cycles % 32; - - return coded_cycles; -} - -/* - * This function converts a pulse timing expressed in nanoseconds into an - * encoded value that can be written in the SMC_PULSE register. - * - * The following formula is described in atmel datasheets (section - * "SMC Pulse Register"): - * - * pulse length = (256* PULSE[6] + PULSE[5:0]) - * - * where pulse length is the timing expressed in cycles. - */ -static inline u32 at91sam9_smc_pulse_ns_to_cycles(unsigned int clk_rate, - u32 timing_ns) -{ - u32 clk_period = DIV_ROUND_UP(NSEC_PER_SEC, clk_rate); - u32 coded_cycles = 0; - u32 cycles; - - cycles = DIV_ROUND_UP(timing_ns, clk_period); - if (cycles / 64) { - coded_cycles |= 1 << 6; - if (cycles < 256) - cycles = 0; - } - - coded_cycles |= cycles % 64; - - return coded_cycles; -} - -/* - * This function converts a cycle timing expressed in nanoseconds into an - * encoded value that can be written in the SMC_CYCLE register. - * - * The following formula is described in atmel datasheets (section - * "SMC Cycle Register"): - * - * cycle length = (CYCLE[8:7]*256 + CYCLE[6:0]) - * - * where cycle length is the timing expressed in cycles. - */ -static inline u32 at91sam9_smc_cycle_ns_to_cycles(unsigned int clk_rate, - u32 timing_ns) -{ - u32 clk_period = DIV_ROUND_UP(NSEC_PER_SEC, clk_rate); - u32 coded_cycles = 0; - u32 cycles; - - cycles = DIV_ROUND_UP(timing_ns, clk_period); - if (cycles / 128) { - coded_cycles = cycles / 256; - cycles %= 256; - if (cycles >= 128) { - coded_cycles++; - cycles = 0; - } - - if (coded_cycles > 0x3) { - coded_cycles = 0x3; - cycles = 0x7f; - } - - coded_cycles <<= 7; - } - - coded_cycles |= cycles % 128; - - return coded_cycles; -} - #endif /* _LINUX_MFD_SYSCON_ATMEL_SMC_H_ */ -- cgit v1.2.3 From 656211b1dfb9e0b68d4e634931432e29a6facf46 Mon Sep 17 00:00:00 2001 From: Steve Twiss Date: Mon, 3 Apr 2017 15:46:40 +0100 Subject: mfd: Add support for DA9061 MFD support for DA9061 is provided as part of the DA9062 device driver. The registers header file adds two new chip variant IDs defined in DA9061 and DA9062 hardware. The core header file adds new software enumerations for listing the valid DA9061 IRQs and a da9062_compatible_types enumeration for distinguishing between DA9061/62 devices in software. The core source code adds a new .compatible of_device_id entry. This is extended from DA9062 to support both "dlg,da9061" and "dlg,da9062". The .data entry now holds a reference to the enumerated device type. A new regmap_irq_chip model is added for DA9061 and this supports the new list of regmap_irq entries. A new mfd_cell da9061_devs[] array lists the new sub system components for DA9061. Support is added for a new DA9061 regmap_config which lists the correct readable, writable and volatile ranges for this chip. The probe function uses the device tree compatible string to switch on the da9062_compatible_types and configure the correct mfd cells, irq chip and regmap config. Kconfig is updated to reflect support for DA9061 and DA9062 PMICs. Signed-off-by: Steve Twiss Signed-off-by: Lee Jones --- include/linux/mfd/da9062/core.h | 29 +++++++++++++++++++++++++++-- include/linux/mfd/da9062/registers.h | 5 +++-- 2 files changed, 30 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9062/core.h b/include/linux/mfd/da9062/core.h index 376ba84366a0..74d33a01ddae 100644 --- a/include/linux/mfd/da9062/core.h +++ b/include/linux/mfd/da9062/core.h @@ -1,5 +1,5 @@ /* - * Copyright (C) 2015 Dialog Semiconductor Ltd. + * Copyright (C) 2015-2017 Dialog Semiconductor * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,7 +18,31 @@ #include #include -/* Interrupts */ +enum da9062_compatible_types { + COMPAT_TYPE_DA9061 = 1, + COMPAT_TYPE_DA9062, +}; + +enum da9061_irqs { + /* IRQ A */ + DA9061_IRQ_ONKEY, + DA9061_IRQ_WDG_WARN, + DA9061_IRQ_SEQ_RDY, + /* IRQ B*/ + DA9061_IRQ_TEMP, + DA9061_IRQ_LDO_LIM, + DA9061_IRQ_DVC_RDY, + DA9061_IRQ_VDD_WARN, + /* IRQ C */ + DA9061_IRQ_GPI0, + DA9061_IRQ_GPI1, + DA9061_IRQ_GPI2, + DA9061_IRQ_GPI3, + DA9061_IRQ_GPI4, + + DA9061_NUM_IRQ, +}; + enum da9062_irqs { /* IRQ A */ DA9062_IRQ_ONKEY, @@ -45,6 +69,7 @@ struct da9062 { struct device *dev; struct regmap *regmap; struct regmap_irq_chip_data *regmap_irq; + enum da9062_compatible_types chip_type; }; #endif /* __MFD_DA9062_CORE_H__ */ diff --git a/include/linux/mfd/da9062/registers.h b/include/linux/mfd/da9062/registers.h index 97790d1b02c5..18d576aed902 100644 --- a/include/linux/mfd/da9062/registers.h +++ b/include/linux/mfd/da9062/registers.h @@ -1,6 +1,5 @@ /* - * registers.h - REGISTERS H for DA9062 - * Copyright (C) 2015 Dialog Semiconductor Ltd. + * Copyright (C) 2015-2017 Dialog Semiconductor * * This program is free software; you can redistribute it and/or * modify it under the terms of the GNU General Public License @@ -18,6 +17,8 @@ #define DA9062_PMIC_DEVICE_ID 0x62 #define DA9062_PMIC_VARIANT_MRC_AA 0x01 +#define DA9062_PMIC_VARIANT_VRC_DA9061 0x01 +#define DA9062_PMIC_VARIANT_VRC_DA9062 0x02 #define DA9062_I2C_PAGE_SEL_SHIFT 1 -- cgit v1.2.3 From addebf1588ab812b891651ef5fba194659f71ea5 Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Thu, 23 Mar 2017 09:03:24 +0100 Subject: mfd: exynos-lpass: Remove pad retention control Pad retention should be controlled from pin control driver, so remove it from Exynos LPASS driver. After this change, no more access to PMU regmap is needed, so remove also the code for handling PMU regmap. Signed-off-by: Marek Szyprowski Acked-by: Krzysztof Kozlowski Acked-by: Sylwester Nawrocki Acked-by: Rob Herring Acked-for-MFD-by: Lee Jones Signed-off-by: Lee Jones --- include/linux/mfd/syscon/exynos5-pmu.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/exynos5-pmu.h b/include/linux/mfd/syscon/exynos5-pmu.h index c28ff21ca4d2..0622ae86f9db 100644 --- a/include/linux/mfd/syscon/exynos5-pmu.h +++ b/include/linux/mfd/syscon/exynos5-pmu.h @@ -46,7 +46,4 @@ #define EXYNOS5_MIPI_PHY_S_RESETN BIT(1) #define EXYNOS5_MIPI_PHY_M_RESETN BIT(2) -#define EXYNOS5433_PAD_RETENTION_AUD_OPTION (0x3028) -#define EXYNOS5433_PAD_INITIATE_WAKEUP_FROM_LOWPWR BIT(28) - #endif /* _LINUX_MFD_SYSCON_PMU_EXYNOS5_H_ */ -- cgit v1.2.3 From 80f18379a7c350c011d30332658aa15fe49a8fa5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 27 Apr 2017 09:42:24 +0200 Subject: fs: add a VALID_OPEN_FLAGS Add a central define for all valid open flags, and use it in the uniqueness check. Signed-off-by: Christoph Hellwig Signed-off-by: Al Viro --- include/linux/fcntl.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fcntl.h b/include/linux/fcntl.h index 76ce329e656d..1b48d9c9a561 100644 --- a/include/linux/fcntl.h +++ b/include/linux/fcntl.h @@ -3,6 +3,12 @@ #include +/* list of all valid flags for the open/openat flags argument: */ +#define VALID_OPEN_FLAGS \ + (O_RDONLY | O_WRONLY | O_RDWR | O_CREAT | O_EXCL | O_NOCTTY | O_TRUNC | \ + O_APPEND | O_NDELAY | O_NONBLOCK | O_NDELAY | __O_SYNC | O_DSYNC | \ + FASYNC | O_DIRECT | O_LARGEFILE | O_DIRECTORY | O_NOFOLLOW | \ + O_NOATIME | O_CLOEXEC | O_PATH | __O_TMPFILE) #ifndef force_o_largefile #define force_o_largefile() (BITS_PER_LONG != 32) -- cgit v1.2.3 From e8245c1b1a3bb8474f91c69ccd13637d3589bb2c Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Apr 2017 15:34:06 +0200 Subject: iommu: Include device.h in iommu.h We make use of 'struct device' in iommu.h, so include device.h to make it available explicitly. Re-order the other headers while at it. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 6a6de187ddc0..3b4fe4b79d20 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -19,11 +19,13 @@ #ifndef __LINUX_IOMMU_H #define __LINUX_IOMMU_H +#include +#include +#include #include #include #include -#include -#include + #include #define IOMMU_READ (1 << 0) -- cgit v1.2.3 From 207c6e36f122ebb1164d611c9f34f128313f47d5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Apr 2017 15:39:28 +0200 Subject: iommu: Move report_iommu_fault() to iommu.c The function is in no fast-path, there is no need for it to be static inline in a header file. This also removes the need to include iommu trace-points in iommu.h. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 41 ++--------------------------------------- 1 file changed, 2 insertions(+), 39 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index 3b4fe4b79d20..abaa0ca848bc 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -330,46 +330,9 @@ extern int iommu_domain_window_enable(struct iommu_domain *domain, u32 wnd_nr, phys_addr_t offset, u64 size, int prot); extern void iommu_domain_window_disable(struct iommu_domain *domain, u32 wnd_nr); -/** - * report_iommu_fault() - report about an IOMMU fault to the IOMMU framework - * @domain: the iommu domain where the fault has happened - * @dev: the device where the fault has happened - * @iova: the faulting address - * @flags: mmu fault flags (e.g. IOMMU_FAULT_READ/IOMMU_FAULT_WRITE/...) - * - * This function should be called by the low-level IOMMU implementations - * whenever IOMMU faults happen, to allow high-level users, that are - * interested in such events, to know about them. - * - * This event may be useful for several possible use cases: - * - mere logging of the event - * - dynamic TLB/PTE loading - * - if restarting of the faulting device is required - * - * Returns 0 on success and an appropriate error code otherwise (if dynamic - * PTE/TLB loading will one day be supported, implementations will be able - * to tell whether it succeeded or not according to this return value). - * - * Specifically, -ENOSYS is returned if a fault handler isn't installed - * (though fault handlers can also return -ENOSYS, in case they want to - * elicit the default behavior of the IOMMU drivers). - */ -static inline int report_iommu_fault(struct iommu_domain *domain, - struct device *dev, unsigned long iova, int flags) -{ - int ret = -ENOSYS; - /* - * if upper layers showed interest and installed a fault handler, - * invoke it. - */ - if (domain->handler) - ret = domain->handler(domain, dev, iova, flags, - domain->handler_token); - - trace_io_page_fault(dev, iova, flags); - return ret; -} +extern int report_iommu_fault(struct iommu_domain *domain, struct device *dev, + unsigned long iova, int flags); static inline size_t iommu_map_sg(struct iommu_domain *domain, unsigned long iova, struct scatterlist *sg, -- cgit v1.2.3 From 1578353e05cd23b10a9e5e8d1626e5bd0849d873 Mon Sep 17 00:00:00 2001 From: Icenowy Zheng Date: Mon, 17 Apr 2017 19:57:40 +0800 Subject: mfd: axp20x: Support AXP803 variant AXP803 is a new PMIC chip produced by X-Powers, usually paired with A64 via RSB bus. The PMIC itself is like AXP288, but with RSB support and dedicated VBUS and ACIN. Add support for it in the axp20x mfd driver. Currently only power key function is supported. Signed-off-by: Icenowy Zheng Signed-off-by: Lee Jones --- include/linux/mfd/axp20x.h | 40 +++++++++++++++++++++++++++++++++++++++- 1 file changed, 39 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index dc8798cf2a24..cde56cfe8446 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -20,6 +20,7 @@ enum axp20x_variants { AXP221_ID, AXP223_ID, AXP288_ID, + AXP803_ID, AXP806_ID, AXP809_ID, NR_AXP20X_VARIANTS, @@ -234,7 +235,7 @@ enum axp20x_variants { #define AXP22X_TS_ADC_L 0x59 #define AXP22X_BATLOW_THRES1 0xe6 -/* AXP288 specific registers */ +/* AXP288/AXP803 specific registers */ #define AXP288_POWER_REASON 0x02 #define AXP288_BC_GLOBAL 0x2c #define AXP288_BC_VBUS_CNTL 0x2d @@ -475,6 +476,43 @@ enum axp288_irqs { AXP288_IRQ_BC_USB_CHNG, }; +enum axp803_irqs { + AXP803_IRQ_ACIN_OVER_V = 1, + AXP803_IRQ_ACIN_PLUGIN, + AXP803_IRQ_ACIN_REMOVAL, + AXP803_IRQ_VBUS_OVER_V, + AXP803_IRQ_VBUS_PLUGIN, + AXP803_IRQ_VBUS_REMOVAL, + AXP803_IRQ_BATT_PLUGIN, + AXP803_IRQ_BATT_REMOVAL, + AXP803_IRQ_BATT_ENT_ACT_MODE, + AXP803_IRQ_BATT_EXIT_ACT_MODE, + AXP803_IRQ_CHARG, + AXP803_IRQ_CHARG_DONE, + AXP803_IRQ_BATT_CHG_TEMP_HIGH, + AXP803_IRQ_BATT_CHG_TEMP_HIGH_END, + AXP803_IRQ_BATT_CHG_TEMP_LOW, + AXP803_IRQ_BATT_CHG_TEMP_LOW_END, + AXP803_IRQ_BATT_ACT_TEMP_HIGH, + AXP803_IRQ_BATT_ACT_TEMP_HIGH_END, + AXP803_IRQ_BATT_ACT_TEMP_LOW, + AXP803_IRQ_BATT_ACT_TEMP_LOW_END, + AXP803_IRQ_DIE_TEMP_HIGH, + AXP803_IRQ_GPADC, + AXP803_IRQ_LOW_PWR_LVL1, + AXP803_IRQ_LOW_PWR_LVL2, + AXP803_IRQ_TIMER, + AXP803_IRQ_PEK_RIS_EDGE, + AXP803_IRQ_PEK_FAL_EDGE, + AXP803_IRQ_PEK_SHORT, + AXP803_IRQ_PEK_LONG, + AXP803_IRQ_PEK_OVER_OFF, + AXP803_IRQ_GPIO1_INPUT, + AXP803_IRQ_GPIO0_INPUT, + AXP803_IRQ_BC_USB_CHNG, + AXP803_IRQ_MV_CHNG, +}; + enum axp806_irqs { AXP806_IRQ_DIE_TEMP_HIGH_LV1, AXP806_IRQ_DIE_TEMP_HIGH_LV2, -- cgit v1.2.3 From 5af50993850a48ba749b122173d789ea90976c72 Mon Sep 17 00:00:00 2001 From: Benjamin Herrenschmidt Date: Wed, 5 Apr 2017 17:54:56 +1000 Subject: KVM: PPC: Book3S HV: Native usage of the XIVE interrupt controller This patch makes KVM capable of using the XIVE interrupt controller to provide the standard PAPR "XICS" style hypercalls. It is necessary for proper operations when the host uses XIVE natively. This has been lightly tested on an actual system, including PCI pass-through with a TG3 device. Signed-off-by: Benjamin Herrenschmidt [mpe: Cleanup pr_xxx(), unsplit pr_xxx() strings, etc., fix build failures by adding KVM_XIVE which depends on KVM_XICS and XIVE, and adding empty stubs for the kvm_xive_xxx() routines, fixup subject, integrate fixes from Paul for building PR=y HV=n] Signed-off-by: Michael Ellerman --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c14ad9809da..d1a6e554ee68 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1165,7 +1165,6 @@ int kvm_register_device_ops(struct kvm_device_ops *ops, u32 type); void kvm_unregister_device_ops(u32 type); extern struct kvm_device_ops kvm_mpic_ops; -extern struct kvm_device_ops kvm_xics_ops; extern struct kvm_device_ops kvm_arm_vgic_v2_ops; extern struct kvm_device_ops kvm_arm_vgic_v3_ops; -- cgit v1.2.3 From 72875d8a4d92f6f37e051be522b2252fd49bd50e Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 26 Apr 2017 22:32:19 +0200 Subject: KVM: add kvm_{test,clear}_request to replace {test,clear}_bit MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Users were expected to use kvm_check_request() for testing and clearing, but request have expanded their use since then and some users want to only test or do a faster clear. Make sure that requests are not directly accessed with bit operations. Reviewed-by: Christian Borntraeger Signed-off-by: Radim Krčmář Reviewed-by: Andrew Jones Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 397b7b5b1933..374fa92c7657 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1079,10 +1079,20 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) set_bit(req, &vcpu->requests); } +static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) +{ + return test_bit(req, &vcpu->requests); +} + +static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu) +{ + clear_bit(req, &vcpu->requests); +} + static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) { - if (test_bit(req, &vcpu->requests)) { - clear_bit(req, &vcpu->requests); + if (kvm_test_request(req, vcpu)) { + kvm_clear_request(req, vcpu); /* * Ensure the rest of the request is visible to kvm_check_request's -- cgit v1.2.3 From 930f7fd6da77ed9476a538345513460fd304aaf5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 26 Apr 2017 22:32:22 +0200 Subject: KVM: mark requests that do not need a wakeup MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some operations must ensure that the guest is not running with stale data, but if the guest is halted, then the update can wait until another event happens. kvm_make_all_requests() currently doesn't wake up, so we can mark all requests used with it. First 8 bits were arbitrarily reserved for request numbers. Most uses of requests have the request type as a constant, so a compiler will optimize the '&'. An alternative would be to have an inline function that would return whether the request needs a wake-up or not, but I like this one better even though it might produce worse assembly. Signed-off-by: Radim Krčmář Reviewed-by: Andrew Jones Reviewed-by: Cornelia Huck Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 374fa92c7657..a805ddcb7eb0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -115,12 +115,14 @@ static inline bool is_error_page(struct page *page) return IS_ERR(page); } +#define KVM_REQUEST_MASK GENMASK(7,0) +#define KVM_REQUEST_NO_WAKEUP BIT(8) /* * Architecture-independent vcpu->requests bit members * Bits 4-7 are reserved for more arch-independent bits. */ -#define KVM_REQ_TLB_FLUSH 0 -#define KVM_REQ_MMU_RELOAD 1 +#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_NO_WAKEUP) #define KVM_REQ_PENDING_TIMER 2 #define KVM_REQ_UNHALT 3 @@ -1076,17 +1078,17 @@ static inline void kvm_make_request(int req, struct kvm_vcpu *vcpu) * caller. Paired with the smp_mb__after_atomic in kvm_check_request. */ smp_wmb(); - set_bit(req, &vcpu->requests); + set_bit(req & KVM_REQUEST_MASK, &vcpu->requests); } static inline bool kvm_test_request(int req, struct kvm_vcpu *vcpu) { - return test_bit(req, &vcpu->requests); + return test_bit(req & KVM_REQUEST_MASK, &vcpu->requests); } static inline void kvm_clear_request(int req, struct kvm_vcpu *vcpu) { - clear_bit(req, &vcpu->requests); + clear_bit(req & KVM_REQUEST_MASK, &vcpu->requests); } static inline bool kvm_check_request(int req, struct kvm_vcpu *vcpu) -- cgit v1.2.3 From cde9af6e79046e12cd08d161139b1d5e57e9fbac Mon Sep 17 00:00:00 2001 From: Andrew Jones Date: Wed, 26 Apr 2017 22:32:24 +0200 Subject: KVM: add explicit barrier to kvm_vcpu_kick kvm_vcpu_kick() must issue a general memory barrier prior to reading vcpu->mode in order to ensure correctness of the mutual-exclusion memory barrier pattern used with vcpu->requests. While the cmpxchg called from kvm_vcpu_kick(): kvm_vcpu_kick kvm_arch_vcpu_should_kick kvm_vcpu_exiting_guest_mode cmpxchg implies general memory barriers before and after the operation, that implication is only valid when cmpxchg succeeds. We need an explicit barrier for when it fails, otherwise a VCPU thread on its entry path that reads zero for vcpu->requests does not exclude the possibility the requesting thread sees !IN_GUEST_MODE when it reads vcpu->mode. kvm_make_all_cpus_request already had a barrier, so we remove it, as now it would be redundant. Signed-off-by: Andrew Jones Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a805ddcb7eb0..84c5396564f7 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -270,6 +270,12 @@ struct kvm_vcpu { static inline int kvm_vcpu_exiting_guest_mode(struct kvm_vcpu *vcpu) { + /* + * The memory barrier ensures a previous write to vcpu->requests cannot + * be reordered with the read of vcpu->mode. It pairs with the general + * memory barrier following the write of vcpu->mode in VCPU RUN. + */ + smp_mb__before_atomic(); return cmpxchg(&vcpu->mode, IN_GUEST_MODE, EXITING_GUEST_MODE); } -- cgit v1.2.3 From 178f02ffafafc59d4d4b135242e5cc1515743680 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Radim=20Kr=C4=8Dm=C3=A1=C5=99?= Date: Wed, 26 Apr 2017 22:32:26 +0200 Subject: KVM: return if kvm_vcpu_wake_up() did wake up the VCPU MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit No need to kick a VCPU that we have just woken up. Signed-off-by: Radim Krčmář Reviewed-by: Andrew Jones Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 84c5396564f7..f4a2c00092f8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -690,7 +690,7 @@ void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); -void kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); +bool kvm_vcpu_wake_up(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 7a97cec26b94c909f4cbad2dc3186af3e457a522 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 27 Apr 2017 14:33:43 +0200 Subject: KVM: mark requests that need synchronization MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit kvm_make_all_requests() provides a synchronization that waits until all kicked VCPUs have acknowledged the kick. This is important for KVM_REQ_MMU_RELOAD as it prevents freeing while lockless paging is underway. This patch adds the synchronization property into all requests that are currently being used with kvm_make_all_requests() in order to preserve the current behavior and only introduce a new framework. Removing it from requests where it is not necessary is left for future patches. Signed-off-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index f4a2c00092f8..a5bfffa8c8d4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -117,14 +117,15 @@ static inline bool is_error_page(struct page *page) #define KVM_REQUEST_MASK GENMASK(7,0) #define KVM_REQUEST_NO_WAKEUP BIT(8) +#define KVM_REQUEST_WAIT BIT(9) /* * Architecture-independent vcpu->requests bit members * Bits 4-7 are reserved for more arch-independent bits. */ -#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_NO_WAKEUP) -#define KVM_REQ_PENDING_TIMER 2 -#define KVM_REQ_UNHALT 3 +#define KVM_REQ_TLB_FLUSH (0 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_MMU_RELOAD (1 | KVM_REQUEST_WAIT | KVM_REQUEST_NO_WAKEUP) +#define KVM_REQ_PENDING_TIMER 2 +#define KVM_REQ_UNHALT 3 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From 6d684e54690caef45cf14051ddeb7c71beeb681b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Thu, 27 Apr 2017 13:44:51 +0800 Subject: rhashtable: Cap total number of entries to 2^31 When max_size is not set or if it set to a sufficiently large value, the nelems counter can overflow. This would cause havoc with the automatic shrinking as it would then attempt to fit a huge number of entries into a tiny hash table. This patch fixes this by adding max_elems to struct rhashtable to cap the number of elements. This is set to 2^31 as nelems is not a precise count. This is sufficiently smaller than UINT_MAX that it should be safe. When max_size is set max_elems will be lowered to at most twice max_size as is the status quo. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index ae93b65d13d7..45f89369c4c8 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -155,6 +155,7 @@ struct rhashtable_params { * @nelems: Number of elements in table * @key_len: Key length for hashfn * @p: Configuration parameters + * @max_elems: Maximum number of elements in table * @rhlist: True if this is an rhltable * @run_work: Deferred worker to expand/shrink asynchronously * @mutex: Mutex to protect current/future table swapping @@ -165,6 +166,7 @@ struct rhashtable { atomic_t nelems; unsigned int key_len; struct rhashtable_params p; + unsigned int max_elems; bool rhlist; struct work_struct run_work; struct mutex mutex; @@ -327,8 +329,7 @@ static inline bool rht_grow_above_100(const struct rhashtable *ht, static inline bool rht_grow_above_max(const struct rhashtable *ht, const struct bucket_table *tbl) { - return ht->p.max_size && - (atomic_read(&ht->nelems) / 2u) >= ht->p.max_size; + return atomic_read(&ht->nelems) >= ht->max_elems; } /* The bucket lock is selected based on the hash and protects mutations -- cgit v1.2.3 From 8ecbc40ada116f2f7d6b61cd646802c87b7c5c7d Mon Sep 17 00:00:00 2001 From: Zhang Shengju Date: Wed, 26 Apr 2017 11:05:12 +0800 Subject: net: update comment for netif_dormant() function This patch updates the comment for netif_dormant() function to reflect the intended usage. Signed-off-by: Zhang Shengju Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8c5c8cdc7b97..6847714a5ae3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3408,10 +3408,10 @@ static inline void netif_dormant_off(struct net_device *dev) } /** - * netif_dormant - test if carrier present + * netif_dormant - test if device is dormant * @dev: network device * - * Check if carrier is present on device + * Check if device is dormant. */ static inline bool netif_dormant(const struct net_device *dev) { -- cgit v1.2.3 From 99f906e9ad7b6e79ffeda30f45906a8448b9d6a2 Mon Sep 17 00:00:00 2001 From: Mike Manning Date: Wed, 26 Apr 2017 14:48:09 +0100 Subject: bridge: add per-port broadcast flood flag Support for l2 multicast flood control was added in commit b6cb5ac8331b ("net: bridge: add per-port multicast flood flag"). It allows broadcast as it was introduced specifically for unknown multicast flood control. But as broadcast is a special case of multicast, this may also need to be disabled. For this purpose, introduce a flag to disable the flooding of received l2 broadcasts. This approach is backwards compatible and provides flexibility in filtering for the desired packet types. Cc: Nikolay Aleksandrov Signed-off-by: Mike Manning Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index c5847dc75a93..0c16866a7aac 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -48,6 +48,7 @@ struct br_ip_list { #define BR_MCAST_FLOOD BIT(11) #define BR_MULTICAST_TO_UNICAST BIT(12) #define BR_VLAN_TUNNEL BIT(13) +#define BR_BCAST_FLOOD BIT(14) #define BR_DEFAULT_AGEING_TIME (300 * HZ) -- cgit v1.2.3 From 7e0d574f2683a2346c978613a72ff07afc89b17a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 27 Apr 2017 10:11:23 -0700 Subject: dm: introduce enum dm_queue_mode to cleanup related code Introduce an enumeration type for the queue mode. This patch does not change any functionality but makes the DM code easier to read. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 14 ++++++++------ 1 file changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 98f981026e4e..1ce4036224eb 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -22,11 +22,13 @@ struct bio_vec; /* * Type of table, mapped_device's mempool and request_queue */ -#define DM_TYPE_NONE 0 -#define DM_TYPE_BIO_BASED 1 -#define DM_TYPE_REQUEST_BASED 2 -#define DM_TYPE_MQ_REQUEST_BASED 3 -#define DM_TYPE_DAX_BIO_BASED 4 +enum dm_queue_mode { + DM_TYPE_NONE = 0, + DM_TYPE_BIO_BASED = 1, + DM_TYPE_REQUEST_BASED = 2, + DM_TYPE_MQ_REQUEST_BASED = 3, + DM_TYPE_DAX_BIO_BASED = 4, +}; typedef enum { STATUSTYPE_INFO, STATUSTYPE_TABLE } status_type_t; @@ -476,7 +478,7 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback * Useful for "hybrid" target (supports both bio-based * and request-based). */ -void dm_table_set_type(struct dm_table *t, unsigned type); +void dm_table_set_type(struct dm_table *t, enum dm_queue_mode type); /* * Finally call this to make the table ready for use. -- cgit v1.2.3 From ed6473ddc704a2005b9900ca08e236ebb2d8540a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Wed, 26 Apr 2017 11:55:27 -0400 Subject: NFSv4: Fix callback server shutdown We want to use kthread_stop() in order to ensure the threads are shut down before we tear down the nfs_callback_info in nfs_callback_down. Tested-and-reviewed-by: Kinglong Mee Reported-by: Kinglong Mee Fixes: bb6aeba736ba9 ("NFSv4.x: Switch to using svc_set_num_threads()...") Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 6ef19cf658b4..94631026f79c 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -473,6 +473,7 @@ void svc_pool_map_put(void); struct svc_serv * svc_create_pooled(struct svc_program *, unsigned int, struct svc_serv_ops *); int svc_set_num_threads(struct svc_serv *, struct svc_pool *, int); +int svc_set_num_threads_sync(struct svc_serv *, struct svc_pool *, int); int svc_pool_stats_open(struct svc_serv *serv, struct file *file); void svc_destroy(struct svc_serv *); void svc_shutdown_net(struct svc_serv *, struct net *); -- cgit v1.2.3 From 7b4ccb3c466f62bbf2f4dd5d6a143d945a6f3051 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 25 Apr 2017 19:36:25 +0200 Subject: soc: renesas: Provide dummy rcar_rst_read_mode_pins() for compile-testing If the R-Car RST driver is not included, compile-testing R-Car clock drivers fails with a link error: undefined reference to `rcar_rst_read_mode_pins' To fix this, provide a dummy version. Use the exact same test logic as in drivers/soc/renesas/Makefile, as there is no Kconfig symbol (yet) to control compilation of the R-Car RST driver. Fixes: 527c02f66d263d2e ("soc: renesas: Add R-Car RST driver") Signed-off-by: Geert Uytterhoeven Signed-off-by: Simon Horman --- include/linux/soc/renesas/rcar-rst.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/renesas/rcar-rst.h b/include/linux/soc/renesas/rcar-rst.h index a18e0783946b..787e7ad53d45 100644 --- a/include/linux/soc/renesas/rcar-rst.h +++ b/include/linux/soc/renesas/rcar-rst.h @@ -1,6 +1,11 @@ #ifndef __LINUX_SOC_RENESAS_RCAR_RST_H__ #define __LINUX_SOC_RENESAS_RCAR_RST_H__ +#if defined(CONFIG_ARCH_RCAR_GEN1) || defined(CONFIG_ARCH_RCAR_GEN2) || \ + defined(CONFIG_ARCH_R8A7795) || defined(CONFIG_ARCH_R8A7796) int rcar_rst_read_mode_pins(u32 *mode); +#else +static inline int rcar_rst_read_mode_pins(u32 *mode) { return -ENODEV; } +#endif #endif /* __LINUX_SOC_RENESAS_RCAR_RST_H__ */ -- cgit v1.2.3 From e38a017bf080d47376db340e94b9c2ffc47eb9b4 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Wed, 26 Apr 2017 10:58:47 +0300 Subject: mac80211: Add support for BSS max idle period element Parse the BSS max idle period element and set the BSS configuration accordingly so the driver can use this information to configure the max idle period and to use protected management frames for keep alive when required. The BSS max idle period element is defined in IEEE802.11-2016, section 9.4.2.79 Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 28 +++++++++++++++++++++++++++- 1 file changed, 27 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 639e77abf064..69033353d0d1 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -7,7 +7,7 @@ * Copyright (c) 2005, Devicescape Software, Inc. * Copyright (c) 2006, Michael Wu * Copyright (c) 2013 - 2014 Intel Mobile Communications GmbH - * Copyright (c) 2016 Intel Deutschland GmbH + * Copyright (c) 2016 - 2017 Intel Deutschland GmbH * * This program is free software; you can redistribute it and/or modify * it under the terms of the GNU General Public License version 2 as @@ -2316,6 +2316,32 @@ struct ieee80211_timeout_interval_ie { __le32 value; } __packed; +/** + * enum ieee80211_idle_options - BSS idle options + * @WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE: the station should send an RSN + * protected frame to the AP to reset the idle timer at the AP for + * the station. + */ +enum ieee80211_idle_options { + WLAN_IDLE_OPTIONS_PROTECTED_KEEP_ALIVE = BIT(0), +}; + +/** + * struct ieee80211_bss_max_idle_period_ie + * + * This structure refers to "BSS Max idle period element" + * + * @max_idle_period: indicates the time period during which a station can + * refrain from transmitting frames to its associated AP without being + * disassociated. In units of 1000 TUs. + * @idle_options: indicates the options associated with the BSS idle capability + * as specified in &enum ieee80211_idle_options. + */ +struct ieee80211_bss_max_idle_period_ie { + __le16 max_idle_period; + u8 idle_options; +} __packed; + /* BACK action code */ enum ieee80211_back_actioncode { WLAN_ACTION_ADDBA_REQ = 0, -- cgit v1.2.3 From 9f993737906b30d7b2454a38637d1f70ffd60f2f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Apr 2017 09:54:54 -0600 Subject: blk-mq: unify hctx delayed_run_work and run_work They serve the exact same purpose. Get rid of the non-delayed work variant, and just run it without delay for the normal case. Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 32bd8eb5ba67..c7cc90328426 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -15,7 +15,7 @@ struct blk_mq_hw_ctx { unsigned long state; /* BLK_MQ_S_* flags */ } ____cacheline_aligned_in_smp; - struct work_struct run_work; + struct delayed_work run_work; cpumask_var_t cpumask; int next_cpu; int next_cpu_batch; @@ -51,7 +51,6 @@ struct blk_mq_hw_ctx { atomic_t nr_active; - struct delayed_work delayed_run_work; struct delayed_work delay_work; struct hlist_node cpuhp_dead; -- cgit v1.2.3 From 818cd1cbaa7b00bbc35452a76bebc681a65f1912 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Apr 2017 09:54:55 -0600 Subject: block: add kblock_mod_delayed_work_on() This modifies (or adds, if not currently pending) an existing delayed work item. Reviewed-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6c247861cb66..d098c66b3ab0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1685,6 +1685,7 @@ int kblockd_schedule_work(struct work_struct *work); int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); +int kblockd_mod_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP /* -- cgit v1.2.3 From 21c6e939a9f6bb06fe616a87defec0f92a7c3df0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 10 Apr 2017 09:54:56 -0600 Subject: blk-mq: unify hctx delay_work and run_work The only difference between ->run_work and ->delay_work, is that the latter is used to defer running a queue. This is done by marking the queue stopped, and scheduling ->delay_work to run sometime in the future. While the queue is stopped, direct runs or runs through ->run_work will not run the queue. If we combine the handlers, then we need to handle two things: 1) If a delayed/stopped run is scheduled, then we should not run the queue before that has been completed. 2) If a queue is delayed/stopped, the handler needs to restart the queue. Normally a run of a queue with the stopped bit set would be a no-op. Case 1 is handled by modifying a currently pending queue run to the deadline set by the caller of blk_mq_delay_queue(). Subsequent attempts to queue a queue run will find the work item already pending, and direct runs will see a stopped queue as before. Case 2 is handled by adding a new bit, BLK_MQ_S_START_ON_RUN, that tells the work handler that it should clear a stopped queue and run the handler. Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c7cc90328426..f3e5e1de1bdb 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -51,8 +51,6 @@ struct blk_mq_hw_ctx { atomic_t nr_active; - struct delayed_work delay_work; - struct hlist_node cpuhp_dead; struct kobject kobj; @@ -168,6 +166,7 @@ enum { BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, BLK_MQ_S_TAG_WAITING = 3, + BLK_MQ_S_START_ON_RUN = 4, BLK_MQ_MAX_DEPTH = 10240, -- cgit v1.2.3 From 984c307878f8924d743c419c79fdebbc19f1285e Mon Sep 17 00:00:00 2001 From: Kishon Vijay Abraham I Date: Mon, 27 Mar 2017 15:15:13 +0530 Subject: PCI: Add device IDs for DRA74x and DRA72x Add device IDs for DRA74x and DRA72x devices. These devices have configurable PCI endpoint. Signed-off-by: Kishon Vijay Abraham I Signed-off-by: Bjorn Helgaas --- include/linux/pci_ids.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci_ids.h b/include/linux/pci_ids.h index a4f77feecbb0..5f6b71d15393 100644 --- a/include/linux/pci_ids.h +++ b/include/linux/pci_ids.h @@ -862,6 +862,8 @@ #define PCI_DEVICE_ID_TI_X620 0xac8d #define PCI_DEVICE_ID_TI_X420 0xac8e #define PCI_DEVICE_ID_TI_XX20_FM 0xac8f +#define PCI_DEVICE_ID_TI_DRA74x 0xb500 +#define PCI_DEVICE_ID_TI_DRA72x 0xb501 #define PCI_VENDOR_ID_SONY 0x104d -- cgit v1.2.3 From 140c91b26ebc48d80c6ac3ef06953b17d7fb3785 Mon Sep 17 00:00:00 2001 From: Kuppuswamy Sathyanarayanan Date: Sun, 9 Apr 2017 15:00:19 -0700 Subject: watchdog: iTCO_wdt: Add PMC specific noreboot update api In some SoCs, setting noreboot bit needs modification to PMC GC registers. But not all PMC drivers allow other drivers to memory map their GC region. This could create mem request conflict in watchdog driver. So this patch adds facility to allow PMC drivers to pass noreboot update function to watchdog drivers via platform data. Signed-off-by: Kuppuswamy Sathyanarayanan Acked-by: Guenter Roeck Signed-off-by: Andy Shevchenko --- include/linux/platform_data/itco_wdt.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/itco_wdt.h b/include/linux/platform_data/itco_wdt.h index f16542c77ff7..0e95527edf25 100644 --- a/include/linux/platform_data/itco_wdt.h +++ b/include/linux/platform_data/itco_wdt.h @@ -14,6 +14,10 @@ struct itco_wdt_platform_data { char name[32]; unsigned int version; + /* private data to be passed to update_no_reboot_bit API */ + void *no_reboot_priv; + /* pointer for platform specific no reboot update function */ + int (*update_no_reboot_bit)(void *priv, bool set); }; #endif /* _ITCO_WDT_H_ */ -- cgit v1.2.3 From 63ccc191649eb0f14a761074291551d0d2f85389 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 25 Apr 2017 14:26:52 +0200 Subject: libata: remove SCT WRITE SAME support This was already disabled a while ago because it caused I/O errors, and it's severly getting into the way of the discard / write zeroes rework. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Signed-off-by: Tejun Heo --- include/linux/ata.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ata.h b/include/linux/ata.h index af6859b3a93d..ad7d9ee89ff0 100644 --- a/include/linux/ata.h +++ b/include/linux/ata.h @@ -817,11 +817,6 @@ static inline bool ata_id_sct_error_recovery_ctrl(const u16 *id) return id[ATA_ID_SCT_CMD_XPORT] & (1 << 3) ? true : false; } -static inline bool ata_id_sct_write_same(const u16 *id) -{ - return id[ATA_ID_SCT_CMD_XPORT] & (1 << 2) ? true : false; -} - static inline bool ata_id_sct_long_sector_access(const u16 *id) { return id[ATA_ID_SCT_CMD_XPORT] & (1 << 1) ? true : false; -- cgit v1.2.3 From 461a6946b1f93f6720577fb06aa78e8cbd9291c9 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Apr 2017 15:46:20 +0200 Subject: iommu: Remove pci.h include from trace/events/iommu.h The include file does not need any PCI specifics, so remove that include. Also fix the places that relied on it. Signed-off-by: Joerg Roedel --- include/linux/dma-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h index 5725c94b1f12..abd946569515 100644 --- a/include/linux/dma-iommu.h +++ b/include/linux/dma-iommu.h @@ -20,6 +20,7 @@ #include #ifdef CONFIG_IOMMU_DMA +#include #include #include -- cgit v1.2.3 From 208480bb273e15f42711bd47f70dc0fbfa2570b8 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 26 Apr 2017 15:49:57 +0200 Subject: iommu: Remove trace-events include from iommu.h It is not needed there anymore. All places needing it are fixed. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index abaa0ca848bc..dda8717545e9 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -26,8 +26,6 @@ #include #include -#include - #define IOMMU_READ (1 << 0) #define IOMMU_WRITE (1 << 1) #define IOMMU_CACHE (1 << 2) /* DMA cache coherency */ -- cgit v1.2.3 From 23f4984483623cf8621246004228f08fcabf51e4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 29 Apr 2017 15:24:03 -0700 Subject: libnvdimm: rework region badblocks clearing Toshi noticed that the new support for a region-level badblocks missed the case where errors are cleared due to BTT I/O. An initial attempt to fix this ran into a "sleeping while atomic" warning due to taking the nvdimm_bus_lock() in the BTT I/O path to satisfy the locking requirements of __nvdimm_bus_badblocks_clear(). However, that lock is not needed since we are not acting on any data that is subject to change under that lock. The badblocks instance has its own internal lock to handle mutations of the error list. So, in order to make it clear that we are just acting on region devices, rename __nvdimm_bus_badblocks_clear() to nvdimm_clear_badblocks_regions(). Eliminate the lock and consolidate all support routines for the new nvdimm_account_cleared_poison() in drivers/nvdimm/bus.c. Finally, to the opportunity to cleanup to some unnecessary casts, make the calling convention of nvdimm_clear_badblocks_regions() clearer by replacing struct resource with the minimal struct clear_badblocks_context, and use the DEVICE_ATTR macro. Cc: Dave Jiang Cc: Vishal Verma Reported-by: Toshi Kani Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 98b207611b06..f07b1b14159a 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -162,7 +162,4 @@ void nd_region_release_lane(struct nd_region *nd_region, unsigned int lane); u64 nd_fletcher64(void *addr, size_t len, bool le); void nvdimm_flush(struct nd_region *nd_region); int nvdimm_has_flush(struct nd_region *nd_region); -int nvdimm_region_badblocks_clear(struct device *dev, void *data); -void __nvdimm_bus_badblocks_clear(struct nvdimm_bus *nvdimm_bus, - struct resource *res); #endif /* __LIBNVDIMM_H__ */ -- cgit v1.2.3 From 4bfd036221c0bf75a0f475b05e22f7be9abc3101 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 4 Apr 2017 14:43:34 -0700 Subject: fscrypt: remove fscrypt_symlink_data_len() fscrypt_symlink_data_len() is never called and can be removed. Signed-off-by: Eric Biggers Reviewed-by: Richard Weinberger Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_common.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h index 10c1abfbac6c..0a30c106c1e5 100644 --- a/include/linux/fscrypt_common.h +++ b/include/linux/fscrypt_common.h @@ -46,17 +46,6 @@ struct fscrypt_symlink_data { char encrypted_path[1]; } __packed; -/** - * This function is used to calculate the disk space required to - * store a filename of length l in encrypted symlink format. - */ -static inline u32 fscrypt_symlink_data_len(u32 l) -{ - if (l < FS_CRYPTO_BLOCK_SIZE) - l = FS_CRYPTO_BLOCK_SIZE; - return (l + sizeof(struct fscrypt_symlink_data) - 1); -} - struct fscrypt_str { unsigned char *name; u32 len; -- cgit v1.2.3 From f6dfb4c3f2161c23ab2939dd1b5f133dcdf147c6 Mon Sep 17 00:00:00 2001 From: Hadar Hen Zion Date: Fri, 24 Feb 2017 12:16:33 +0200 Subject: net/mlx5e: Update neighbour 'used' state using HW flow rules counters When IP tunnel encapsulation rules are offloaded, the kernel can't see the traffic of the offloaded flow. The neighbour for the IP tunnel destination of the offloaded flow can mistakenly become STALE and deleted by the kernel since its 'used' value wasn't changed. To make sure that a neighbour which is used by the HW won't become STALE, we proactively update the neighbour 'used' value every DELAY_PROBE_TIME period, when packets were matched and counted by the HW for one of the tunnel encap flows related to this neighbour. The periodic task that updates the used neighbours is scheduled when a tunnel encap rule is successfully offloaded into HW and keeps re-scheduling itself as long as the representor's neighbours list isn't empty. Add, remove, lookup and status change operations done over the representor's neighbours list or the neighbour hash entry encaps list are all serialized by RTNL lock. Signed-off-by: Hadar Hen Zion Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index f50864626230..3fece51dcf13 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -540,6 +540,7 @@ struct mlx5_fc_stats { struct workqueue_struct *wq; struct delayed_work work; unsigned long next_query; + unsigned long sampling_interval; /* jiffies */ }; struct mlx5_eswitch; -- cgit v1.2.3 From 71389703839ebe9cb426c72d5f0bd549592e583c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 28 Apr 2017 10:23:37 -0700 Subject: mm, zone_device: Replace {get, put}_zone_device_page() with a single reference to fix pmem crash MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The x86 conversion to the generic GUP code included a small change which causes crashes and data corruption in the pmem code - not good. The root cause is that the /dev/pmem driver code implicitly relies on the x86 get_user_pages() implementation doing a get_page() on the page refcount, because get_page() does a get_zone_device_page() which properly refcounts pmem's separate page struct arrays that are not present in the regular page struct structures. (The pmem driver does this because it can cover huge memory areas.) But the x86 conversion to the generic GUP code changed the get_page() to page_cache_get_speculative() which is faster but doesn't do the get_zone_device_page() call the pmem code relies on. One way to solve the regression would be to change the generic GUP code to use get_page(), but that would slow things down a bit and punish other generic-GUP using architectures for an x86-ism they did not care about. (Arguably the pmem driver was probably not working reliably for them: but nvdimm is an Intel feature, so non-x86 exposure is probably still limited.) So restructure the pmem code's interface with the MM instead: get rid of the get/put_zone_device_page() distinction, integrate put_zone_device_page() into __put_page() and and restructure the pmem completion-wait and teardown machinery: Kirill points out that the calls to {get,put}_dev_pagemap() can be removed from the mm fast path if we take a single get_dev_pagemap() reference to signify that the page is alive and use the final put of the page to drop that reference. This does require some care to make sure that any waits for the percpu_ref to drop to zero occur *after* devm_memremap_page_release(), since it now maintains its own elevated reference. This speeds up things while also making the pmem refcounting more robust going forward. Suggested-by: Kirill Shutemov Tested-by: Kirill Shutemov Signed-off-by: Dan Williams Reviewed-by: Logan Gunthorpe Cc: Andrew Morton Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Brian Gerst Cc: Denys Vlasenko Cc: H. Peter Anvin Cc: Josh Poimboeuf Cc: Jérôme Glisse Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-mm@kvack.org Link: http://lkml.kernel.org/r/149339998297.24933.1129582806028305912.stgit@dwillia2-desk3.amr.corp.intel.com Signed-off-by: Ingo Molnar --- include/linux/mm.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a835edd2db34..695da2a19b4c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -762,19 +762,11 @@ static inline enum zone_type page_zonenum(const struct page *page) } #ifdef CONFIG_ZONE_DEVICE -void get_zone_device_page(struct page *page); -void put_zone_device_page(struct page *page); static inline bool is_zone_device_page(const struct page *page) { return page_zonenum(page) == ZONE_DEVICE; } #else -static inline void get_zone_device_page(struct page *page) -{ -} -static inline void put_zone_device_page(struct page *page) -{ -} static inline bool is_zone_device_page(const struct page *page) { return false; @@ -790,9 +782,6 @@ static inline void get_page(struct page *page) */ VM_BUG_ON_PAGE(page_ref_count(page) <= 0, page); page_ref_inc(page); - - if (unlikely(is_zone_device_page(page))) - get_zone_device_page(page); } static inline void put_page(struct page *page) @@ -801,9 +790,6 @@ static inline void put_page(struct page *page) if (put_page_testzero(page)) __put_page(page); - - if (unlikely(is_zone_device_page(page))) - put_zone_device_page(page); } #if defined(CONFIG_SPARSEMEM) && !defined(CONFIG_SPARSEMEM_VMEMMAP) -- cgit v1.2.3 From 917362135b8a5c0680acf08807e9fc6179eb6c79 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 14 Apr 2017 20:32:49 +0200 Subject: power: supply: max17042_battery: Add default platform_data fallback data Some x86 machines use a max17047 fuel-gauge and x86 might be missing platform_data if not provided by SFI. This commit adds default platform_data as fallback option so that the driver can work on boards where no platform_data is provided. Since not all boards have a thermistor hooked up, set temp_min to 0 and change the health checks from temp <= temp_min to temp < temp_min to not trigger on such boards (where temp reads 0). Signed-off-by: Hans de Goede Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel --- include/linux/power/max17042_battery.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index 522757ac9cd4..3489fb0f9099 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -24,8 +24,12 @@ #define __MAX17042_BATTERY_H_ #define MAX17042_STATUS_BattAbsent (1 << 3) -#define MAX17042_BATTERY_FULL (100) +#define MAX17042_BATTERY_FULL (95) /* Recommend. FullSOCThr value */ #define MAX17042_DEFAULT_SNS_RESISTOR (10000) +#define MAX17042_DEFAULT_VMIN (3000) +#define MAX17042_DEFAULT_VMAX (4500) /* LiHV cell max */ +#define MAX17042_DEFAULT_TEMP_MIN (0) /* For sys without temp sensor */ +#define MAX17042_DEFAULT_TEMP_MAX (700) /* 70 degrees Celcius */ #define MAX17042_CHARACTERIZATION_DATA_SIZE 48 -- cgit v1.2.3 From a9df22c00d7c2c9c2944c62f1b819de6c214660f Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Fri, 14 Apr 2017 20:32:51 +0200 Subject: power: supply: max17042_battery: Add support for the STATUS property Userspace prefers the driver having a status property over having to guess itself. Specifically this will properly make the GNOME3 UI (and likely others) properly show discharging / charging / full status, instead of always showing discharging as status. Note that in the case there is no charger driver supplying the max17042, then a status of unknown will get returned. At least upower treats this the same as not having a status attribute, so in this case nothing changes from a userspace pov. Signed-off-by: Hans de Goede Reviewed-by: Krzysztof Kozlowski Signed-off-by: Sebastian Reichel --- include/linux/power/max17042_battery.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/power/max17042_battery.h b/include/linux/power/max17042_battery.h index 3489fb0f9099..a7ed29baf44a 100644 --- a/include/linux/power/max17042_battery.h +++ b/include/linux/power/max17042_battery.h @@ -31,6 +31,9 @@ #define MAX17042_DEFAULT_TEMP_MIN (0) /* For sys without temp sensor */ #define MAX17042_DEFAULT_TEMP_MAX (700) /* 70 degrees Celcius */ +/* Consider RepCap which is less then 10 units below FullCAP full */ +#define MAX17042_FULL_THRESHOLD 10 + #define MAX17042_CHARACTERIZATION_DATA_SIZE 48 enum max17042_register { -- cgit v1.2.3 From 73a757e63114dfd765f1c5d1ff7e994f123d0234 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 1 May 2017 09:35:09 -0400 Subject: ring-buffer: Return reader page back into existing ring buffer When reading the ring buffer for consuming, it is optimized for splice, where a page is taken out of the ring buffer (zero copy) and sent to the reading consumer. When the read is finished with the page, it calls ring_buffer_free_read_page(), which simply frees the page. The next time the reader needs to get a page from the ring buffer, it must call ring_buffer_alloc_read_page() which allocates and initializes a reader page for the ring buffer to be swapped into the ring buffer for a new filled page for the reader. The problem is that there's no reason to actually free the page when it is passed back to the ring buffer. It can hold it off and reuse it for the next iteration. This completely removes the interaction with the page_alloc mechanism. Using the trace-cmd utility to record all events (causing trace-cmd to require reading lots of pages from the ring buffer, and calling ring_buffer_alloc/free_read_page() several times), and also assigning a stack trace trigger to the mm_page_alloc event, we can see how many times the ring_buffer_alloc_read_page() needed to allocate a page for the ring buffer. Before this change: # trace-cmd record -e all -e mem_page_alloc -R stacktrace sleep 1 # trace-cmd report |grep ring_buffer_alloc_read_page | wc -l 9968 After this change: # trace-cmd record -e all -e mem_page_alloc -R stacktrace sleep 1 # trace-cmd report |grep ring_buffer_alloc_read_page | wc -l 4 Signed-off-by: Steven Rostedt (VMware) --- include/linux/ring_buffer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index b6d4568795a7..ee9b461af095 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -185,7 +185,7 @@ size_t ring_buffer_page_len(void *page); void *ring_buffer_alloc_read_page(struct ring_buffer *buffer, int cpu); -void ring_buffer_free_read_page(struct ring_buffer *buffer, void *data); +void ring_buffer_free_read_page(struct ring_buffer *buffer, int cpu, void *data); int ring_buffer_read_page(struct ring_buffer *buffer, void **data_page, size_t len, int cpu, int full); -- cgit v1.2.3 From 45d9b378e85f1b00ac047626827c68589168936c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 30 Apr 2017 21:46:45 -0700 Subject: netlink: add NULL-friendly helper for setting extended ACK message As we propagate extended ack reporting throughout various paths in the kernel it may be that the same function is called with the extended ack parameter passed as NULL. One place where that happens is in drivers which have a centralized reconfiguration function called both from ndos and from ethtool_ops. Add a new helper for setting the error message in such conditions. Existing helper is left as is to encourage propagating the ext act fully wherever possible. It also makes it clear in the code which messages may be lost due to ext ack being NULL. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netlink.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 8d2a8924705c..c20395edf2de 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -92,6 +92,14 @@ struct netlink_ext_ack { (extack)->_msg = _msg; \ } while (0) +#define NL_MOD_TRY_SET_ERR_MSG(extack, msg) do { \ + static const char _msg[] = KBUILD_MODNAME ": " msg; \ + struct netlink_ext_ack *_extack = (extack); \ + \ + if (_extack) \ + _extack->_msg = _msg; \ +} while (0) + extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); extern int netlink_change_ngroups(struct sock *sk, unsigned int groups); -- cgit v1.2.3 From ddf9f970764f4390aba767e77fddaaced4a6760d Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Sun, 30 Apr 2017 21:46:46 -0700 Subject: xdp: propagate extended ack to XDP setup Drivers usually have a number of restrictions for running XDP - most common being buffer sizes, LRO and number of rings. Even though some drivers try to be helpful and print error messages experience shows that users don't often consult kernel logs on netlink errors. Try to use the new extended ack mechanism to carry the message back to user space. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6847714a5ae3..9c23bd2efb56 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -813,11 +813,16 @@ enum xdp_netdev_command { XDP_QUERY_PROG, }; +struct netlink_ext_ack; + struct netdev_xdp { enum xdp_netdev_command command; union { /* XDP_SETUP_PROG */ - struct bpf_prog *prog; + struct { + struct bpf_prog *prog; + struct netlink_ext_ack *extack; + }; /* XDP_QUERY_PROG */ bool prog_attached; }; @@ -3291,7 +3296,8 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_xdp_fd(struct net_device *dev, int fd, u32 flags); +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, u32 flags); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -- cgit v1.2.3 From 20b1bd96e9f4feeffc9206284df3c6a4438e9ca8 Mon Sep 17 00:00:00 2001 From: Ram Amrani Date: Sun, 30 Apr 2017 11:49:10 +0300 Subject: qed: output the DPM status and WID count Output to the RDMA driver whether DPM mode is enabled or disabled in the HW and if so what is the number of WIDs it supports Signed-off-by: Ram Amrani Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_roce_if.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_roce_if.h b/include/linux/qed/qed_roce_if.h index f742d4312c9d..cbb2ff0ce4bc 100644 --- a/include/linux/qed/qed_roce_if.h +++ b/include/linux/qed/qed_roce_if.h @@ -240,6 +240,7 @@ struct qed_rdma_add_user_out_params { u64 dpi_addr; u64 dpi_phys_addr; u32 dpi_size; + u16 wid_count; }; enum roce_mode { @@ -533,6 +534,7 @@ enum qed_rdma_type { struct qed_dev_rdma_info { struct qed_dev_info common; enum qed_rdma_type rdma_type; + u8 user_dpm_enabled; }; struct qed_rdma_ops { -- cgit v1.2.3 From 133bea04ff6fd715d8140edca9d6c7337249571b Mon Sep 17 00:00:00 2001 From: Tim Wright Date: Mon, 1 May 2017 17:30:08 +0100 Subject: IB/mlx5: Add port_xmit_wait to counter registers read Add port_xmit_wait to the error counters read by mlx5_ib_process_mad to ensure sysfs port counter provides correct value for PortXmitWait. Otherwise the sysfs port_xmit_wait file always contains zero. The previous MAD_IFC implementation populated this counter, but it was removed during the migration to PPCNT for error counters (32-bit only). Signed-off-by: Tim Wright Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 954f42c268a4..32de0724b400 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1456,7 +1456,9 @@ struct mlx5_ifc_ib_port_cntrs_grp_data_layout_bits { u8 vl_15_dropped[0x10]; - u8 reserved_at_a0[0xa0]; + u8 reserved_at_a0[0x80]; + + u8 port_xmit_wait[0x20]; }; struct mlx5_ifc_eth_per_traffic_grp_data_layout_bits { -- cgit v1.2.3 From 48e75b430670ebdbb00ba008e1d3690f61ab9824 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 1 May 2017 22:18:01 +0200 Subject: rhashtable: compact struct rhashtable_params By using smaller datatypes this (rather large) struct shrinks considerably (80 -> 48 bytes on x86_64). As this is embedded in other structs, this also rerduces size of several others, e.g. cls_fl_head or nft_hash. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/rhashtable.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rhashtable.h b/include/linux/rhashtable.h index 45f89369c4c8..7d56a7ea2b2e 100644 --- a/include/linux/rhashtable.h +++ b/include/linux/rhashtable.h @@ -127,23 +127,23 @@ struct rhashtable; * @head_offset: Offset of rhash_head in struct to be hashed * @max_size: Maximum size while expanding * @min_size: Minimum size while shrinking - * @nulls_base: Base value to generate nulls marker - * @automatic_shrinking: Enable automatic shrinking of tables * @locks_mul: Number of bucket locks to allocate per cpu (default: 128) + * @automatic_shrinking: Enable automatic shrinking of tables + * @nulls_base: Base value to generate nulls marker * @hashfn: Hash function (default: jhash2 if !(key_len % 4), or jhash) * @obj_hashfn: Function to hash object * @obj_cmpfn: Function to compare key with object */ struct rhashtable_params { - size_t nelem_hint; - size_t key_len; - size_t key_offset; - size_t head_offset; + u16 nelem_hint; + u16 key_len; + u16 key_offset; + u16 head_offset; unsigned int max_size; - unsigned int min_size; - u32 nulls_base; + u16 min_size; bool automatic_shrinking; - size_t locks_mul; + u8 locks_mul; + u32 nulls_base; rht_hashfn_t hashfn; rht_obj_hashfn_t obj_hashfn; rht_obj_cmpfn_t obj_cmpfn; -- cgit v1.2.3 From 7ed8578a96ad98231d8bf6388f776e034673e18a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Apr 2017 09:40:37 +0200 Subject: dm rq: change ->rq_end_io calling conventions Instead of returning either a DM_ENDIO_* constant or an error code, add a new DM_ENDIO_DONE value that means keep errno as is. This allows us to easily keep the existing error code in case where we can't push back, and it also preparares for the new block level status codes with strict type checking. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 925b63cdef52..5a02fc0ff311 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -593,6 +593,7 @@ extern struct ratelimit_state dm_ratelimit_state; /* * Definitions of return values from target end_io function. */ +#define DM_ENDIO_DONE 0 #define DM_ENDIO_INCOMPLETE 1 #define DM_ENDIO_REQUEUE 2 -- cgit v1.2.3 From 412445acb6cad4cef026daae37c4765fb9942c60 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Apr 2017 09:40:39 +0200 Subject: dm: introduce a new DM_MAPIO_KILL return value This untangles the DM_MAPIO_* values returned from ->clone_and_map_rq from the error codes used by the block layer. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 5a02fc0ff311..78ad0624cdae 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -604,6 +604,7 @@ extern struct ratelimit_state dm_ratelimit_state; #define DM_MAPIO_REMAPPED 1 #define DM_MAPIO_REQUEUE DM_ENDIO_REQUEUE #define DM_MAPIO_DELAY_REQUEUE 3 +#define DM_MAPIO_KILL 4 #define dm_sector_div64(x, y)( \ { \ -- cgit v1.2.3 From 5c0aea0e8d98e38858fbb3a09870ed8487a01da2 Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 28 Apr 2017 17:06:20 +0200 Subject: KVM: x86: don't hold kvm->lock in KVM_SET_GSI_ROUTING MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We needed the lock to avoid racing with creation of the irqchip on x86. As kvm_set_irq_routing() calls srcu_synchronize_expedited(), this lock might be held for a longer time. Let's introduce an arch specific callback to check if we can actually add irq routes. For x86, all we have to do is check if we have an irqchip in the kernel. We don't need kvm->lock at that point as the irqchip is marked as inititalized only when actually fully created. Reported-by: Steve Rutherford Reviewed-by: Radim Krčmář Fixes: 1df6ddede10a ("KVM: x86: race between KVM_SET_GSI_ROUTING and KVM_CREATE_IRQCHIP") Signed-off-by: David Hildenbrand Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index a5bfffa8c8d4..25cf258a1c9b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1018,6 +1018,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #define KVM_MAX_IRQ_ROUTES 1024 #endif +bool kvm_arch_can_set_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, -- cgit v1.2.3 From 45753c5f315749711b935a2506ee5c10eef5c23d Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 2 May 2017 10:31:18 +0200 Subject: srcu: Debloat the header Linus noticed that the has huge inline functions which should not be inline at all. As a first step in cleaning this up, move them all to kernel/rcu/ and only keep an absolute minimum of data type defines in the header: before: -rw-r--r-- 1 mingo mingo 22284 May 2 10:25 include/linux/rcu_segcblist.h after: -rw-r--r-- 1 mingo mingo 3180 May 2 10:22 include/linux/rcu_segcblist.h More can be done, such as uninlining the large functions, which inlining is unjustified even if it's an RCU internal matter. Reported-by: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar Signed-off-by: Paul E. McKenney --- include/linux/rcu_segcblist.h | 628 +----------------------------------------- 1 file changed, 3 insertions(+), 625 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcu_segcblist.h b/include/linux/rcu_segcblist.h index ced8f313fd05..ba4d2621d9ca 100644 --- a/include/linux/rcu_segcblist.h +++ b/include/linux/rcu_segcblist.h @@ -20,8 +20,8 @@ * Authors: Paul E. McKenney */ -#ifndef __KERNEL_RCU_SEGCBLIST_H -#define __KERNEL_RCU_SEGCBLIST_H +#ifndef __INCLUDE_LINUX_RCU_SEGCBLIST_H +#define __INCLUDE_LINUX_RCU_SEGCBLIST_H /* Simple unsegmented callback lists. */ struct rcu_cblist { @@ -33,102 +33,6 @@ struct rcu_cblist { #define RCU_CBLIST_INITIALIZER(n) { .head = NULL, .tail = &n.head } -/* Initialize simple callback list. */ -static inline void rcu_cblist_init(struct rcu_cblist *rclp) -{ - rclp->head = NULL; - rclp->tail = &rclp->head; - rclp->len = 0; - rclp->len_lazy = 0; -} - -/* Is simple callback list empty? */ -static inline bool rcu_cblist_empty(struct rcu_cblist *rclp) -{ - return !rclp->head; -} - -/* Return number of callbacks in simple callback list. */ -static inline long rcu_cblist_n_cbs(struct rcu_cblist *rclp) -{ - return rclp->len; -} - -/* Return number of lazy callbacks in simple callback list. */ -static inline long rcu_cblist_n_lazy_cbs(struct rcu_cblist *rclp) -{ - return rclp->len_lazy; -} - -/* - * Debug function to actually count the number of callbacks. - * If the number exceeds the limit specified, return -1. - */ -static inline long rcu_cblist_count_cbs(struct rcu_cblist *rclp, long lim) -{ - int cnt = 0; - struct rcu_head **rhpp = &rclp->head; - - for (;;) { - if (!*rhpp) - return cnt; - if (++cnt > lim) - return -1; - rhpp = &(*rhpp)->next; - } -} - -/* - * Dequeue the oldest rcu_head structure from the specified callback - * list. This function assumes that the callback is non-lazy, but - * the caller can later invoke rcu_cblist_dequeued_lazy() if it - * finds otherwise (and if it cares about laziness). This allows - * different users to have different ways of determining laziness. - */ -static inline struct rcu_head *rcu_cblist_dequeue(struct rcu_cblist *rclp) -{ - struct rcu_head *rhp; - - rhp = rclp->head; - if (!rhp) - return NULL; - rclp->len--; - rclp->head = rhp->next; - if (!rclp->head) - rclp->tail = &rclp->head; - return rhp; -} - -/* - * Account for the fact that a previously dequeued callback turned out - * to be marked as lazy. - */ -static inline void rcu_cblist_dequeued_lazy(struct rcu_cblist *rclp) -{ - rclp->len_lazy--; -} - -/* - * Interim function to return rcu_cblist head pointer. Longer term, the - * rcu_cblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head *rcu_cblist_head(struct rcu_cblist *rclp) -{ - return rclp->head; -} - -/* - * Interim function to return rcu_cblist head pointer. Longer term, the - * rcu_cblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head **rcu_cblist_tail(struct rcu_cblist *rclp) -{ - WARN_ON_ONCE(rcu_cblist_empty(rclp)); - return rclp->tail; -} - /* Complicated segmented callback lists. ;-) */ /* @@ -183,530 +87,4 @@ struct rcu_segcblist { .tails[RCU_NEXT_TAIL] = &n.head, \ } -/* - * Initialize an rcu_segcblist structure. - */ -static inline void rcu_segcblist_init(struct rcu_segcblist *rsclp) -{ - int i; - - BUILD_BUG_ON(RCU_NEXT_TAIL + 1 != ARRAY_SIZE(rsclp->gp_seq)); - BUILD_BUG_ON(ARRAY_SIZE(rsclp->tails) != ARRAY_SIZE(rsclp->gp_seq)); - rsclp->head = NULL; - for (i = 0; i < RCU_CBLIST_NSEGS; i++) - rsclp->tails[i] = &rsclp->head; - rsclp->len = 0; - rsclp->len_lazy = 0; -} - -/* - * Is the specified rcu_segcblist structure empty? - * - * But careful! The fact that the ->head field is NULL does not - * necessarily imply that there are no callbacks associated with - * this structure. When callbacks are being invoked, they are - * removed as a group. If callback invocation must be preempted, - * the remaining callbacks will be added back to the list. Either - * way, the counts are updated later. - * - * So it is often the case that rcu_segcblist_n_cbs() should be used - * instead. - */ -static inline bool rcu_segcblist_empty(struct rcu_segcblist *rsclp) -{ - return !rsclp->head; -} - -/* Return number of callbacks in segmented callback list. */ -static inline long rcu_segcblist_n_cbs(struct rcu_segcblist *rsclp) -{ - return READ_ONCE(rsclp->len); -} - -/* Return number of lazy callbacks in segmented callback list. */ -static inline long rcu_segcblist_n_lazy_cbs(struct rcu_segcblist *rsclp) -{ - return rsclp->len_lazy; -} - -/* Return number of lazy callbacks in segmented callback list. */ -static inline long rcu_segcblist_n_nonlazy_cbs(struct rcu_segcblist *rsclp) -{ - return rsclp->len - rsclp->len_lazy; -} - -/* - * Is the specified rcu_segcblist enabled, for example, not corresponding - * to an offline or callback-offloaded CPU? - */ -static inline bool rcu_segcblist_is_enabled(struct rcu_segcblist *rsclp) -{ - return !!rsclp->tails[RCU_NEXT_TAIL]; -} - -/* - * Disable the specified rcu_segcblist structure, so that callbacks can - * no longer be posted to it. This structure must be empty. - */ -static inline void rcu_segcblist_disable(struct rcu_segcblist *rsclp) -{ - WARN_ON_ONCE(!rcu_segcblist_empty(rsclp)); - WARN_ON_ONCE(rcu_segcblist_n_cbs(rsclp)); - WARN_ON_ONCE(rcu_segcblist_n_lazy_cbs(rsclp)); - rsclp->tails[RCU_NEXT_TAIL] = NULL; -} - -/* - * Is the specified segment of the specified rcu_segcblist structure - * empty of callbacks? - */ -static inline bool rcu_segcblist_segempty(struct rcu_segcblist *rsclp, int seg) -{ - if (seg == RCU_DONE_TAIL) - return &rsclp->head == rsclp->tails[RCU_DONE_TAIL]; - return rsclp->tails[seg - 1] == rsclp->tails[seg]; -} - -/* - * Are all segments following the specified segment of the specified - * rcu_segcblist structure empty of callbacks? (The specified - * segment might well contain callbacks.) - */ -static inline bool rcu_segcblist_restempty(struct rcu_segcblist *rsclp, int seg) -{ - return !*rsclp->tails[seg]; -} - -/* - * Does the specified rcu_segcblist structure contain callbacks that - * are ready to be invoked? - */ -static inline bool rcu_segcblist_ready_cbs(struct rcu_segcblist *rsclp) -{ - return rcu_segcblist_is_enabled(rsclp) && - &rsclp->head != rsclp->tails[RCU_DONE_TAIL]; -} - -/* - * Does the specified rcu_segcblist structure contain callbacks that - * are still pending, that is, not yet ready to be invoked? - */ -static inline bool rcu_segcblist_pend_cbs(struct rcu_segcblist *rsclp) -{ - return rcu_segcblist_is_enabled(rsclp) && - !rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL); -} - -/* - * Dequeue and return the first ready-to-invoke callback. If there - * are no ready-to-invoke callbacks, return NULL. Disables interrupts - * to avoid interference. Does not protect from interference from other - * CPUs or tasks. - */ -static inline struct rcu_head * -rcu_segcblist_dequeue(struct rcu_segcblist *rsclp) -{ - unsigned long flags; - int i; - struct rcu_head *rhp; - - local_irq_save(flags); - if (!rcu_segcblist_ready_cbs(rsclp)) { - local_irq_restore(flags); - return NULL; - } - rhp = rsclp->head; - BUG_ON(!rhp); - rsclp->head = rhp->next; - for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) { - if (rsclp->tails[i] != &rhp->next) - break; - rsclp->tails[i] = &rsclp->head; - } - smp_mb(); /* Dequeue before decrement for rcu_barrier(). */ - WRITE_ONCE(rsclp->len, rsclp->len - 1); - local_irq_restore(flags); - return rhp; -} - -/* - * Account for the fact that a previously dequeued callback turned out - * to be marked as lazy. - */ -static inline void rcu_segcblist_dequeued_lazy(struct rcu_segcblist *rsclp) -{ - unsigned long flags; - - local_irq_save(flags); - rsclp->len_lazy--; - local_irq_restore(flags); -} - -/* - * Return a pointer to the first callback in the specified rcu_segcblist - * structure. This is useful for diagnostics. - */ -static inline struct rcu_head * -rcu_segcblist_first_cb(struct rcu_segcblist *rsclp) -{ - if (rcu_segcblist_is_enabled(rsclp)) - return rsclp->head; - return NULL; -} - -/* - * Return a pointer to the first pending callback in the specified - * rcu_segcblist structure. This is useful just after posting a given - * callback -- if that callback is the first pending callback, then - * you cannot rely on someone else having already started up the required - * grace period. - */ -static inline struct rcu_head * -rcu_segcblist_first_pend_cb(struct rcu_segcblist *rsclp) -{ - if (rcu_segcblist_is_enabled(rsclp)) - return *rsclp->tails[RCU_DONE_TAIL]; - return NULL; -} - -/* - * Does the specified rcu_segcblist structure contain callbacks that - * have not yet been processed beyond having been posted, that is, - * does it contain callbacks in its last segment? - */ -static inline bool rcu_segcblist_new_cbs(struct rcu_segcblist *rsclp) -{ - return rcu_segcblist_is_enabled(rsclp) && - !rcu_segcblist_restempty(rsclp, RCU_NEXT_READY_TAIL); -} - -/* - * Enqueue the specified callback onto the specified rcu_segcblist - * structure, updating accounting as needed. Note that the ->len - * field may be accessed locklessly, hence the WRITE_ONCE(). - * The ->len field is used by rcu_barrier() and friends to determine - * if it must post a callback on this structure, and it is OK - * for rcu_barrier() to sometimes post callbacks needlessly, but - * absolutely not OK for it to ever miss posting a callback. - */ -static inline void rcu_segcblist_enqueue(struct rcu_segcblist *rsclp, - struct rcu_head *rhp, bool lazy) -{ - WRITE_ONCE(rsclp->len, rsclp->len + 1); /* ->len sampled locklessly. */ - if (lazy) - rsclp->len_lazy++; - smp_mb(); /* Ensure counts are updated before callback is enqueued. */ - rhp->next = NULL; - *rsclp->tails[RCU_NEXT_TAIL] = rhp; - rsclp->tails[RCU_NEXT_TAIL] = &rhp->next; -} - -/* - * Entrain the specified callback onto the specified rcu_segcblist at - * the end of the last non-empty segment. If the entire rcu_segcblist - * is empty, make no change, but return false. - * - * This is intended for use by rcu_barrier()-like primitives, -not- - * for normal grace-period use. IMPORTANT: The callback you enqueue - * will wait for all prior callbacks, NOT necessarily for a grace - * period. You have been warned. - */ -static inline bool rcu_segcblist_entrain(struct rcu_segcblist *rsclp, - struct rcu_head *rhp, bool lazy) -{ - int i; - - if (rcu_segcblist_n_cbs(rsclp) == 0) - return false; - WRITE_ONCE(rsclp->len, rsclp->len + 1); - if (lazy) - rsclp->len_lazy++; - smp_mb(); /* Ensure counts are updated before callback is entrained. */ - rhp->next = NULL; - for (i = RCU_NEXT_TAIL; i > RCU_DONE_TAIL; i--) - if (rsclp->tails[i] != rsclp->tails[i - 1]) - break; - *rsclp->tails[i] = rhp; - for (; i <= RCU_NEXT_TAIL; i++) - rsclp->tails[i] = &rhp->next; - return true; -} - -/* - * Extract only the counts from the specified rcu_segcblist structure, - * and place them in the specified rcu_cblist structure. This function - * supports both callback orphaning and invocation, hence the separation - * of counts and callbacks. (Callbacks ready for invocation must be - * orphaned and adopted separately from pending callbacks, but counts - * apply to all callbacks. Locking must be used to make sure that - * both orphaned-callbacks lists are consistent.) - */ -static inline void rcu_segcblist_extract_count(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - rclp->len_lazy += rsclp->len_lazy; - rclp->len += rsclp->len; - rsclp->len_lazy = 0; - WRITE_ONCE(rsclp->len, 0); /* ->len sampled locklessly. */ -} - -/* - * Extract only those callbacks ready to be invoked from the specified - * rcu_segcblist structure and place them in the specified rcu_cblist - * structure. - */ -static inline void rcu_segcblist_extract_done_cbs(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - int i; - - if (!rcu_segcblist_ready_cbs(rsclp)) - return; /* Nothing to do. */ - *rclp->tail = rsclp->head; - rsclp->head = *rsclp->tails[RCU_DONE_TAIL]; - *rsclp->tails[RCU_DONE_TAIL] = NULL; - rclp->tail = rsclp->tails[RCU_DONE_TAIL]; - for (i = RCU_CBLIST_NSEGS - 1; i >= RCU_DONE_TAIL; i--) - if (rsclp->tails[i] == rsclp->tails[RCU_DONE_TAIL]) - rsclp->tails[i] = &rsclp->head; -} - -/* - * Extract only those callbacks still pending (not yet ready to be - * invoked) from the specified rcu_segcblist structure and place them in - * the specified rcu_cblist structure. Note that this loses information - * about any callbacks that might have been partway done waiting for - * their grace period. Too bad! They will have to start over. - */ -static inline void -rcu_segcblist_extract_pend_cbs(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - int i; - - if (!rcu_segcblist_pend_cbs(rsclp)) - return; /* Nothing to do. */ - *rclp->tail = *rsclp->tails[RCU_DONE_TAIL]; - rclp->tail = rsclp->tails[RCU_NEXT_TAIL]; - *rsclp->tails[RCU_DONE_TAIL] = NULL; - for (i = RCU_DONE_TAIL + 1; i < RCU_CBLIST_NSEGS; i++) - rsclp->tails[i] = rsclp->tails[RCU_DONE_TAIL]; -} - -/* - * Move the entire contents of the specified rcu_segcblist structure, - * counts, callbacks, and all, to the specified rcu_cblist structure. - * @@@ Why do we need this??? Moving early-boot CBs to NOCB lists? - * @@@ Memory barrier needed? (Not if only used at boot time...) - */ -static inline void rcu_segcblist_extract_all(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - rcu_segcblist_extract_done_cbs(rsclp, rclp); - rcu_segcblist_extract_pend_cbs(rsclp, rclp); - rcu_segcblist_extract_count(rsclp, rclp); -} - -/* - * Insert counts from the specified rcu_cblist structure in the - * specified rcu_segcblist structure. - */ -static inline void rcu_segcblist_insert_count(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - rsclp->len_lazy += rclp->len_lazy; - /* ->len sampled locklessly. */ - WRITE_ONCE(rsclp->len, rsclp->len + rclp->len); - rclp->len_lazy = 0; - rclp->len = 0; -} - -/* - * Move callbacks from the specified rcu_cblist to the beginning of the - * done-callbacks segment of the specified rcu_segcblist. - */ -static inline void rcu_segcblist_insert_done_cbs(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - int i; - - if (!rclp->head) - return; /* No callbacks to move. */ - *rclp->tail = rsclp->head; - rsclp->head = rclp->head; - for (i = RCU_DONE_TAIL; i < RCU_CBLIST_NSEGS; i++) - if (&rsclp->head == rsclp->tails[i]) - rsclp->tails[i] = rclp->tail; - else - break; - rclp->head = NULL; - rclp->tail = &rclp->head; -} - -/* - * Move callbacks from the specified rcu_cblist to the end of the - * new-callbacks segment of the specified rcu_segcblist. - */ -static inline void rcu_segcblist_insert_pend_cbs(struct rcu_segcblist *rsclp, - struct rcu_cblist *rclp) -{ - if (!rclp->head) - return; /* Nothing to do. */ - *rsclp->tails[RCU_NEXT_TAIL] = rclp->head; - rsclp->tails[RCU_NEXT_TAIL] = rclp->tail; - rclp->head = NULL; - rclp->tail = &rclp->head; -} - -/* - * Advance the callbacks in the specified rcu_segcblist structure based - * on the current value passed in for the grace-period counter. - */ -static inline void rcu_segcblist_advance(struct rcu_segcblist *rsclp, - unsigned long seq) -{ - int i, j; - - WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); - if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) - return; - - /* - * Find all callbacks whose ->gp_seq numbers indicate that they - * are ready to invoke, and put them into the RCU_DONE_TAIL segment. - */ - for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) { - if (ULONG_CMP_LT(seq, rsclp->gp_seq[i])) - break; - rsclp->tails[RCU_DONE_TAIL] = rsclp->tails[i]; - } - - /* If no callbacks moved, nothing more need be done. */ - if (i == RCU_WAIT_TAIL) - return; - - /* Clean up tail pointers that might have been misordered above. */ - for (j = RCU_WAIT_TAIL; j < i; j++) - rsclp->tails[j] = rsclp->tails[RCU_DONE_TAIL]; - - /* - * Callbacks moved, so clean up the misordered ->tails[] pointers - * that now point into the middle of the list of ready-to-invoke - * callbacks. The overall effect is to copy down the later pointers - * into the gap that was created by the now-ready segments. - */ - for (j = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++, j++) { - if (rsclp->tails[j] == rsclp->tails[RCU_NEXT_TAIL]) - break; /* No more callbacks. */ - rsclp->tails[j] = rsclp->tails[i]; - rsclp->gp_seq[j] = rsclp->gp_seq[i]; - } -} - -/* - * "Accelerate" callbacks based on more-accurate grace-period information. - * The reason for this is that RCU does not synchronize the beginnings and - * ends of grace periods, and that callbacks are posted locally. This in - * turn means that the callbacks must be labelled conservatively early - * on, as getting exact information would degrade both performance and - * scalability. When more accurate grace-period information becomes - * available, previously posted callbacks can be "accelerated", marking - * them to complete at the end of the earlier grace period. - * - * This function operates on an rcu_segcblist structure, and also the - * grace-period sequence number seq at which new callbacks would become - * ready to invoke. Returns true if there are callbacks that won't be - * ready to invoke until seq, false otherwise. - */ -static inline bool rcu_segcblist_accelerate(struct rcu_segcblist *rsclp, - unsigned long seq) -{ - int i; - - WARN_ON_ONCE(!rcu_segcblist_is_enabled(rsclp)); - if (rcu_segcblist_restempty(rsclp, RCU_DONE_TAIL)) - return false; - - /* - * Find the segment preceding the oldest segment of callbacks - * whose ->gp_seq[] completion is at or after that passed in via - * "seq", skipping any empty segments. This oldest segment, along - * with any later segments, can be merged in with any newly arrived - * callbacks in the RCU_NEXT_TAIL segment, and assigned "seq" - * as their ->gp_seq[] grace-period completion sequence number. - */ - for (i = RCU_NEXT_READY_TAIL; i > RCU_DONE_TAIL; i--) - if (rsclp->tails[i] != rsclp->tails[i - 1] && - ULONG_CMP_LT(rsclp->gp_seq[i], seq)) - break; - - /* - * If all the segments contain callbacks that correspond to - * earlier grace-period sequence numbers than "seq", leave. - * Assuming that the rcu_segcblist structure has enough - * segments in its arrays, this can only happen if some of - * the non-done segments contain callbacks that really are - * ready to invoke. This situation will get straightened - * out by the next call to rcu_segcblist_advance(). - * - * Also advance to the oldest segment of callbacks whose - * ->gp_seq[] completion is at or after that passed in via "seq", - * skipping any empty segments. - */ - if (++i >= RCU_NEXT_TAIL) - return false; - - /* - * Merge all later callbacks, including newly arrived callbacks, - * into the segment located by the for-loop above. Assign "seq" - * as the ->gp_seq[] value in order to correctly handle the case - * where there were no pending callbacks in the rcu_segcblist - * structure other than in the RCU_NEXT_TAIL segment. - */ - for (; i < RCU_NEXT_TAIL; i++) { - rsclp->tails[i] = rsclp->tails[RCU_NEXT_TAIL]; - rsclp->gp_seq[i] = seq; - } - return true; -} - -/* - * Scan the specified rcu_segcblist structure for callbacks that need - * a grace period later than the one specified by "seq". We don't look - * at the RCU_DONE_TAIL or RCU_NEXT_TAIL segments because they don't - * have a grace-period sequence number. - */ -static inline bool rcu_segcblist_future_gp_needed(struct rcu_segcblist *rsclp, - unsigned long seq) -{ - int i; - - for (i = RCU_WAIT_TAIL; i < RCU_NEXT_TAIL; i++) - if (rsclp->tails[i - 1] != rsclp->tails[i] && - ULONG_CMP_LT(seq, rsclp->gp_seq[i])) - return true; - return false; -} - -/* - * Interim function to return rcu_segcblist head pointer. Longer term, the - * rcu_segcblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head *rcu_segcblist_head(struct rcu_segcblist *rsclp) -{ - return rsclp->head; -} - -/* - * Interim function to return rcu_segcblist head pointer. Longer term, the - * rcu_segcblist will be used more pervasively, removing the need for this - * function. - */ -static inline struct rcu_head **rcu_segcblist_tail(struct rcu_segcblist *rsclp) -{ - WARN_ON_ONCE(rcu_segcblist_empty(rsclp)); - return rsclp->tails[RCU_NEXT_TAIL]; -} - -#endif /* __KERNEL_RCU_SEGCBLIST_H */ +#endif /* __INCLUDE_LINUX_RCU_SEGCBLIST_H */ -- cgit v1.2.3 From c0332694903a37cf8ecdc9102d5c9e09cf8643d0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Mon, 1 May 2017 08:58:49 -0700 Subject: block: Remove elevator_change() Since commit 84253394927c ("remove the mg_disk driver") removed the only caller of elevator_change(), also remove the elevator_change() function itself. Signed-off-by: Bart Van Assche Cc: Christoph Hellwig Cc: Markus Trippelsdorf Signed-off-by: Jens Axboe --- include/linux/elevator.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 3a216318ae73..d44840368ee7 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -214,7 +214,6 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct request_queue *, struct elevator_queue *); -extern int elevator_change(struct request_queue *, const char *); extern bool elv_bio_merge_ok(struct request *, struct bio *); extern struct elevator_queue *elevator_alloc(struct request_queue *, struct elevator_type *); -- cgit v1.2.3 From d6296d39e90c9075bc2fc15f1e86dac44930d4b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 1 May 2017 10:19:08 -0600 Subject: blk-mq: update ->init_request and ->exit_request prototypes Remove the request_idx parameter, which can't be used safely now that we support I/O schedulers with blk-mq. Except for a superflous check in mtip32xx it was unused anyway. Also pass the tag_set instead of just the driver data - this allows drivers to avoid some code duplication in a follow on cleanup. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f3e5e1de1bdb..a104832e7ae5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -86,9 +86,9 @@ typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); -typedef int (init_request_fn)(void *, struct request *, unsigned int, +typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int, unsigned int); -typedef void (exit_request_fn)(void *, struct request *, unsigned int, +typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int); typedef int (reinit_request_fn)(void *, struct request *); -- cgit v1.2.3 From 45a0642b4d021a2f50d5db9c191b5bfe60bfa1c7 Mon Sep 17 00:00:00 2001 From: Paul Moore Date: Tue, 2 May 2017 10:16:05 -0400 Subject: audit: kernel generated netlink traffic should have a portid of 0 We were setting the portid incorrectly in the netlink message headers, fix that to always be 0 (nlmsg_pid = 0). Signed-off-by: Paul Moore Reviewed-by: Richard Guy Briggs --- include/linux/audit.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 504e784b7ffa..cc0497c39472 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -163,8 +163,7 @@ extern void audit_log_task_info(struct audit_buffer *ab, extern int audit_update_lsm_rules(void); /* Private API (for audit.c only) */ -extern int audit_rule_change(int type, __u32 portid, int seq, - void *data, size_t datasz); +extern int audit_rule_change(int type, int seq, void *data, size_t datasz); extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); extern u32 audit_enabled; -- cgit v1.2.3 From 2115bb250f260089743e26decfb5f271ba71ca37 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Tue, 2 May 2017 10:16:05 -0400 Subject: audit: Use timespec64 to represent audit timestamps struct timespec is not y2038 safe. Audit timestamps are recorded in string format into an audit buffer for a given context. These mark the entry timestamps for the syscalls. Use y2038 safe struct timespec64 to represent the times. The log strings can handle this transition as strings can hold upto 1024 characters. Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Acked-by: Paul Moore Acked-by: Richard Guy Briggs Signed-off-by: Paul Moore --- include/linux/audit.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index cc0497c39472..2150bdccfbab 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -331,7 +331,7 @@ static inline void audit_ptrace(struct task_struct *t) /* Private API (for audit.c only) */ extern unsigned int audit_serial(void); extern int auditsc_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial); + struct timespec64 *t, unsigned int *serial); extern int audit_set_loginuid(kuid_t loginuid); static inline kuid_t audit_get_loginuid(struct task_struct *tsk) @@ -510,7 +510,7 @@ static inline void __audit_seccomp(unsigned long syscall, long signr, int code) static inline void audit_seccomp(unsigned long syscall, long signr, int code) { } static inline int auditsc_get_stamp(struct audit_context *ctx, - struct timespec *t, unsigned int *serial) + struct timespec64 *t, unsigned int *serial) { return 0; } -- cgit v1.2.3 From 8d3f87d8cd0a16c58ae7e4410938528866c1c0db Mon Sep 17 00:00:00 2001 From: "sudarsana.kalluru@cavium.com" Date: Tue, 2 May 2017 01:11:03 -0700 Subject: qed*: Fix issues in the ptp filter config implementation. PTP hardware filter configuration performed by the driver for a given user requested config is not correct for some of the PTP modes. Following changes are needed for PTP config-filter implementation. 1. NIG_REG_TX_PTP_EN register - Bits 0/1/2 respectively enables TimeSync/"V1 frame format support"/"V2 frame format support" on the TX side. Set the associated bits based on the user request. 2. ptp4l application fails to operate in Peer Delay mode. Following changes are needed to fix this, a. Driver should enable (set to 0) DA #1-related bits for IPv4, IPv6 and MAC destination addresses in these registers: NIG_REG_TX_LLH_PTP_RULE_MASK NIG_REG_LLH_PTP_RULE_MASK b. NIG_REG_LLH_PTP_PARAM_MASK/NIG_REG_TX_LLH_PTP_PARAM_MASK should be set to 0x0 in all modes. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 23 +++++++++++++++++------ 1 file changed, 17 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 15fa7c6e4c6f..d66d16a559e1 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -164,10 +164,21 @@ struct qed_eth_cb_ops { #define QED_MAX_PHC_DRIFT_PPB 291666666 enum qed_ptp_filter_type { - QED_PTP_FILTER_L2, - QED_PTP_FILTER_IPV4, - QED_PTP_FILTER_IPV4_IPV6, - QED_PTP_FILTER_L2_IPV4_IPV6 + QED_PTP_FILTER_NONE, + QED_PTP_FILTER_ALL, + QED_PTP_FILTER_V1_L4_EVENT, + QED_PTP_FILTER_V1_L4_GEN, + QED_PTP_FILTER_V2_L4_EVENT, + QED_PTP_FILTER_V2_L4_GEN, + QED_PTP_FILTER_V2_L2_EVENT, + QED_PTP_FILTER_V2_L2_GEN, + QED_PTP_FILTER_V2_EVENT, + QED_PTP_FILTER_V2_GEN +}; + +enum qed_ptp_hwtstamp_tx_type { + QED_PTP_HWTSTAMP_TX_OFF, + QED_PTP_HWTSTAMP_TX_ON, }; #ifdef CONFIG_DCB @@ -230,8 +241,8 @@ struct qed_eth_dcbnl_ops { #endif struct qed_eth_ptp_ops { - int (*hwtstamp_tx_on)(struct qed_dev *); - int (*cfg_rx_filters)(struct qed_dev *, enum qed_ptp_filter_type); + int (*cfg_filters)(struct qed_dev *, enum qed_ptp_filter_type, + enum qed_ptp_hwtstamp_tx_type); int (*read_rx_ts)(struct qed_dev *, u64 *); int (*read_tx_ts)(struct qed_dev *, u64 *); int (*read_cc)(struct qed_dev *, u64 *); -- cgit v1.2.3 From 9b2bbdb227588455afcc3b03475fa9b0a35d83af Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 6 Mar 2017 18:19:39 +0200 Subject: virtio: wrap find_vqs We are going to add more parameters to find_vqs, let's wrap the call so we don't need to tweak all drivers every time. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 8355bab175e1..47f3d805c290 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -179,6 +179,15 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, return vq; } +static inline +int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], + struct irq_affinity *desc) +{ + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, desc); +} + /** * virtio_device_ready - enable vq use in probe function * @vdev: the device -- cgit v1.2.3 From f94682dde5ed23eed13533a37dfce942e60ade4e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Mon, 6 Mar 2017 18:32:29 +0200 Subject: virtio: add context flag to find vqs Allows maintaining extra context per vq. For ease of use, passing in NULL is legal and disables the feature for all vqs. Includes fixes by Christian for s390, acked by Cornelia. Signed-off-by: Christian Borntraeger Acked-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- include/linux/virtio_config.h | 18 +++++++++++++++--- include/linux/virtio_ring.h | 3 +++ 2 files changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/virtio_config.h b/include/linux/virtio_config.h index 47f3d805c290..0133d8a12ccd 100644 --- a/include/linux/virtio_config.h +++ b/include/linux/virtio_config.h @@ -72,7 +72,8 @@ struct virtio_config_ops { void (*reset)(struct virtio_device *vdev); int (*find_vqs)(struct virtio_device *, unsigned nvqs, struct virtqueue *vqs[], vq_callback_t *callbacks[], - const char * const names[], struct irq_affinity *desc); + const char * const names[], const bool *ctx, + struct irq_affinity *desc); void (*del_vqs)(struct virtio_device *); u64 (*get_features)(struct virtio_device *vdev); int (*finalize_features)(struct virtio_device *vdev); @@ -173,7 +174,8 @@ struct virtqueue *virtio_find_single_vq(struct virtio_device *vdev, vq_callback_t *callbacks[] = { c }; const char *names[] = { n }; struct virtqueue *vq; - int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL); + int err = vdev->config->find_vqs(vdev, 1, &vq, callbacks, names, NULL, + NULL); if (err < 0) return ERR_PTR(err); return vq; @@ -185,7 +187,17 @@ int virtio_find_vqs(struct virtio_device *vdev, unsigned nvqs, const char * const names[], struct irq_affinity *desc) { - return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, desc); + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, NULL, desc); +} + +static inline +int virtio_find_vqs_ctx(struct virtio_device *vdev, unsigned nvqs, + struct virtqueue *vqs[], vq_callback_t *callbacks[], + const char * const names[], const bool *ctx, + struct irq_affinity *desc) +{ + return vdev->config->find_vqs(vdev, nvqs, vqs, callbacks, names, ctx, + desc); } /** diff --git a/include/linux/virtio_ring.h b/include/linux/virtio_ring.h index e8d36938f09a..270cfa81830e 100644 --- a/include/linux/virtio_ring.h +++ b/include/linux/virtio_ring.h @@ -71,6 +71,7 @@ struct virtqueue *vring_create_virtqueue(unsigned int index, struct virtio_device *vdev, bool weak_barriers, bool may_reduce_num, + bool ctx, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), const char *name); @@ -80,6 +81,7 @@ struct virtqueue *__vring_new_virtqueue(unsigned int index, struct vring vring, struct virtio_device *vdev, bool weak_barriers, + bool ctx, bool (*notify)(struct virtqueue *), void (*callback)(struct virtqueue *), const char *name); @@ -93,6 +95,7 @@ struct virtqueue *vring_new_virtqueue(unsigned int index, unsigned int vring_align, struct virtio_device *vdev, bool weak_barriers, + bool ctx, void *pages, bool (*notify)(struct virtqueue *vq), void (*callback)(struct virtqueue *vq), -- cgit v1.2.3 From 5a08b04f637921e44ba767c07c74b0535504ab71 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 7 Feb 2017 06:15:13 +0200 Subject: virtio: allow extra context per descriptor Allow extra context per descriptor. To avoid slow down for data path, this disables use of indirect descriptors for this vq. Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index 7edfbdb55a99..ed04753278d4 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -44,6 +44,12 @@ int virtqueue_add_inbuf(struct virtqueue *vq, void *data, gfp_t gfp); +int virtqueue_add_inbuf_ctx(struct virtqueue *vq, + struct scatterlist sg[], unsigned int num, + void *data, + void *ctx, + gfp_t gfp); + int virtqueue_add_sgs(struct virtqueue *vq, struct scatterlist *sgs[], unsigned int out_sgs, @@ -59,6 +65,9 @@ bool virtqueue_notify(struct virtqueue *vq); void *virtqueue_get_buf(struct virtqueue *vq, unsigned int *len); +void *virtqueue_get_buf_ctx(struct virtqueue *vq, unsigned int *len, + void **ctx); + void virtqueue_disable_cb(struct virtqueue *vq); bool virtqueue_enable_cb(struct virtqueue *vq); -- cgit v1.2.3 From b9dd46188edc2f0d1f37328637860bb65a771124 Mon Sep 17 00:00:00 2001 From: Jin Qian Date: Tue, 25 Apr 2017 16:28:48 -0700 Subject: f2fs: sanity check segment count F2FS uses 4 bytes to represent block address. As a result, supported size of disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments. Signed-off-by: Jin Qian Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 639cbdf65e2b..093549e10ee2 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -301,6 +301,12 @@ struct f2fs_nat_block { #define SIT_VBLOCK_MAP_SIZE 64 #define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) +/* + * F2FS uses 4 bytes to represent block address. As a result, supported size of + * disk is 16 TB and it equals to 16 * 1024 * 1024 / 2 segments. + */ +#define F2FS_MAX_SEGMENT ((16 * 1024 * 1024) / 2) + /* * Note that f2fs_sit_entry->vblocks has the following bit-field information. * [15:10] : allocation type such as CURSEG_XXXX_TYPE -- cgit v1.2.3 From 4d463c4dbc5c1c5d73e488d52faeec05570443a0 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 3 May 2017 00:39:17 +0200 Subject: xdp: use common helper for netlink extended ack reporting Small follow-up to d74a32acd59a ("xdp: use netlink extended ACK reporting") in order to let drivers all use the same NL_SET_ERR_MSG_MOD() helper macro for reporting. This also ensures that we consistently add the driver's prefix for dumping the report in user space to indicate that the error message is driver specific and not coming from core code. Furthermore, NL_SET_ERR_MSG_MOD() now reuses NL_SET_ERR_MSG() and thus makes all macros check the pointer as suggested. References: https://www.spinics.net/lists/netdev/msg433267.html Signed-off-by: Daniel Borkmann Acked-by: Jakub Kicinski Reviewed-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netlink.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index c20395edf2de..5fff5ba5964e 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -86,19 +86,16 @@ struct netlink_ext_ack { * Currently string formatting is not supported (due * to the lack of an output buffer.) */ -#define NL_SET_ERR_MSG(extack, msg) do { \ - static const char _msg[] = (msg); \ - \ - (extack)->_msg = _msg; \ +#define NL_SET_ERR_MSG(extack, msg) do { \ + static const char __msg[] = (msg); \ + struct netlink_ext_ack *__extack = (extack); \ + \ + if (__extack) \ + __extack->_msg = __msg; \ } while (0) -#define NL_MOD_TRY_SET_ERR_MSG(extack, msg) do { \ - static const char _msg[] = KBUILD_MODNAME ": " msg; \ - struct netlink_ext_ack *_extack = (extack); \ - \ - if (_extack) \ - _extack->_msg = _msg; \ -} while (0) +#define NL_SET_ERR_MSG_MOD(extack, msg) \ + NL_SET_ERR_MSG((extack), KBUILD_MODNAME ": " msg) extern void netlink_kernel_release(struct sock *sk); extern int __netlink_change_ngroups(struct sock *sk, unsigned int groups); -- cgit v1.2.3 From 4e335d9e7ddbcf83d03e7fbe65797ebed2272c18 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Tue, 2 May 2017 16:20:18 +0200 Subject: Revert "KVM: Support vCPU-based gfn->hva cache" This reverts commit bbd6411513aa8ef3ea02abab61318daf87c1af1e. I've been sitting on this revert for too long and it unfortunately missed 4.11. It's also the reason why I haven't merged ring-based dirty tracking for 4.12. Using kvm_vcpu_memslots in kvm_gfn_to_hva_cache_init and kvm_vcpu_write_guest_offset_cached means that the MSR value can now be used to access SMRAM, simply by making it point to an SMRAM physical address. This is problematic because it lets the guest OS overwrite memory that it shouldn't be able to touch. Cc: stable@vger.kernel.org Fixes: bbd6411513aa8ef3ea02abab61318daf87c1af1e Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 25cf258a1c9b..3727afdf614d 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -650,18 +650,18 @@ int kvm_read_guest_page(struct kvm *kvm, gfn_t gfn, void *data, int offset, int kvm_read_guest_atomic(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); int kvm_read_guest(struct kvm *kvm, gpa_t gpa, void *data, unsigned long len); -int kvm_vcpu_read_guest_cached(struct kvm_vcpu *vcpu, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len); +int kvm_read_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len); int kvm_write_guest_page(struct kvm *kvm, gfn_t gfn, const void *data, int offset, int len); int kvm_write_guest(struct kvm *kvm, gpa_t gpa, const void *data, unsigned long len); -int kvm_vcpu_write_guest_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, - void *data, unsigned long len); -int kvm_vcpu_write_guest_offset_cached(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, - void *data, int offset, unsigned long len); -int kvm_vcpu_gfn_to_hva_cache_init(struct kvm_vcpu *v, struct gfn_to_hva_cache *ghc, - gpa_t gpa, unsigned long len); +int kvm_write_guest_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, unsigned long len); +int kvm_write_guest_offset_cached(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + void *data, int offset, unsigned long len); +int kvm_gfn_to_hva_cache_init(struct kvm *kvm, struct gfn_to_hva_cache *ghc, + gpa_t gpa, unsigned long len); int kvm_clear_guest_page(struct kvm *kvm, gfn_t gfn, int offset, int len); int kvm_clear_guest(struct kvm *kvm, gpa_t gpa, unsigned long len); struct kvm_memory_slot *gfn_to_memslot(struct kvm *kvm, gfn_t gfn); -- cgit v1.2.3 From 1f43e2ad7bff54f7c82a084a57e5c90da0d3f4d9 Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Fri, 28 Apr 2017 13:56:08 +0800 Subject: f2fs: introduce CP_TRIMMED_FLAG to avoid unneeded discard Introduce CP_TRIMMED_FLAG to indicate all invalid block were trimmed before umount, so once we do mount with image which contain the flag, we don't record invalid blocks as undiscard one, when fstrim is being triggered, we can avoid issuing redundant discard commands. Signed-off-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 093549e10ee2..b6feed6547ce 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -114,6 +114,7 @@ struct f2fs_super_block { /* * For checkpoint */ +#define CP_TRIMMED_FLAG 0x00000100 #define CP_NAT_BITS_FLAG 0x00000080 #define CP_CRC_RECOVERY_FLAG 0x00000040 #define CP_FASTBOOT_FLAG 0x00000020 -- cgit v1.2.3 From c73322d098e4b6f5f0f0fa1330bf57e218775539 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:51:51 -0700 Subject: mm: fix 100% CPU kswapd busyloop on unreclaimable nodes Patch series "mm: kswapd spinning on unreclaimable nodes - fixes and cleanups". Jia reported a scenario in which the kswapd of a node indefinitely spins at 100% CPU usage. We have seen similar cases at Facebook. The kernel's current method of judging its ability to reclaim a node (or whether to back off and sleep) is based on the amount of scanned pages in proportion to the amount of reclaimable pages. In Jia's and our scenarios, there are no reclaimable pages in the node, however, and the condition for backing off is never met. Kswapd busyloops in an attempt to restore the watermarks while having nothing to work with. This series reworks the definition of an unreclaimable node based not on scanning but on whether kswapd is able to actually reclaim pages in MAX_RECLAIM_RETRIES (16) consecutive runs. This is the same criteria the page allocator uses for giving up on direct reclaim and invoking the OOM killer. If it cannot free any pages, kswapd will go to sleep and leave further attempts to direct reclaim invocations, which will either make progress and re-enable kswapd, or invoke the OOM killer. Patch #1 fixes the immediate problem Jia reported, the remainder are smaller fixlets, cleanups, and overall phasing out of the old method. Patch #6 is the odd one out. It's a nice cleanup to get_scan_count(), and directly related to #5, but in itself not relevant to the series. If the whole series is too ambitious for 4.11, I would consider the first three patches fixes, the rest cleanups. This patch (of 9): Jia He reports a problem with kswapd spinning at 100% CPU when requesting more hugepages than memory available in the system: $ echo 4000 >/proc/sys/vm/nr_hugepages top - 13:42:59 up 3:37, 1 user, load average: 1.09, 1.03, 1.01 Tasks: 1 total, 1 running, 0 sleeping, 0 stopped, 0 zombie %Cpu(s): 0.0 us, 12.5 sy, 0.0 ni, 85.5 id, 2.0 wa, 0.0 hi, 0.0 si, 0.0 st KiB Mem: 31371520 total, 30915136 used, 456384 free, 320 buffers KiB Swap: 6284224 total, 115712 used, 6168512 free. 48192 cached Mem PID USER PR NI VIRT RES SHR S %CPU %MEM TIME+ COMMAND 76 root 20 0 0 0 0 R 100.0 0.000 217:17.29 kswapd3 At that time, there are no reclaimable pages left in the node, but as kswapd fails to restore the high watermarks it refuses to go to sleep. Kswapd needs to back away from nodes that fail to balance. Up until commit 1d82de618ddd ("mm, vmscan: make kswapd reclaim in terms of nodes") kswapd had such a mechanism. It considered zones whose theoretically reclaimable pages it had reclaimed six times over as unreclaimable and backed away from them. This guard was erroneously removed as the patch changed the definition of a balanced node. However, simply restoring this code wouldn't help in the case reported here: there *are* no reclaimable pages that could be scanned until the threshold is met. Kswapd would stay awake anyway. Introduce a new and much simpler way of backing off. If kswapd runs through MAX_RECLAIM_RETRIES (16) cycles without reclaiming a single page, make it back off from the node. This is the same number of shots direct reclaim takes before declaring OOM. Kswapd will go to sleep on that node until a direct reclaimer manages to reclaim some pages, thus proving the node reclaimable again. [hannes@cmpxchg.org: check kswapd failure against the cumulative nr_reclaimed count] Link: http://lkml.kernel.org/r/20170306162410.GB2090@cmpxchg.org [shakeelb@google.com: fix condition for throttle_direct_reclaim] Link: http://lkml.kernel.org/r/20170314183228.20152-1-shakeelb@google.com Link: http://lkml.kernel.org/r/20170228214007.5621-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Signed-off-by: Shakeel Butt Reported-by: Jia He Tested-by: Jia He Acked-by: Michal Hocko Acked-by: Hillf Danton Acked-by: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 8e02b3750fe0..d2c50ab6ae40 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -630,6 +630,8 @@ typedef struct pglist_data { int kswapd_order; enum zone_type kswapd_classzone_idx; + int kswapd_failures; /* Number of 'reclaimed == 0' runs */ + #ifdef CONFIG_COMPACTION int kcompactd_max_order; enum zone_type kcompactd_classzone_idx; -- cgit v1.2.3 From c822f6223d03c2c5b026a21da09c6b6d523258cd Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:52:10 -0700 Subject: mm: delete NR_PAGES_SCANNED and pgdat_reclaimable() NR_PAGES_SCANNED counts number of pages scanned since the last page free event in the allocator. This was used primarily to measure the reclaimability of zones and nodes, and determine when reclaim should give up on them. In that role, it has been replaced in the preceding patches by a different mechanism. Being implemented as an efficient vmstat counter, it was automatically exported to userspace as well. It's however unlikely that anyone outside the kernel is using this counter in any meaningful way. Remove the counter and the unused pgdat_reclaimable(). Link: http://lkml.kernel.org/r/20170228214007.5621-8-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Hillf Danton Acked-by: Michal Hocko Cc: Jia He Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d2c50ab6ae40..04e0969966f6 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -149,7 +149,6 @@ enum node_stat_item { NR_UNEVICTABLE, /* " " " " " */ NR_ISOLATED_ANON, /* Temporary isolated pages from anon lru */ NR_ISOLATED_FILE, /* Temporary isolated pages from file lru */ - NR_PAGES_SCANNED, /* pages scanned since last reclaim */ WORKINGSET_REFAULT, WORKINGSET_ACTIVATE, WORKINGSET_NODERECLAIM, -- cgit v1.2.3 From a128ca71fb29ed4444b80f38a0148b468826e19b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 3 May 2017 14:52:22 -0700 Subject: mm: delete unnecessary TTU_* flags Patch series "mm: fix some MADV_FREE issues", v5. We are trying to use MADV_FREE in jemalloc. Several issues are found. Without solving the issues, jemalloc can't use the MADV_FREE feature. - Doesn't support system without swap enabled. Because if swap is off, we can't or can't efficiently age anonymous pages. And since MADV_FREE pages are mixed with other anonymous pages, we can't reclaim MADV_FREE pages. In current implementation, MADV_FREE will fallback to MADV_DONTNEED without swap enabled. But in our environment, a lot of machines don't enable swap. This will prevent our setup using MADV_FREE. - Increases memory pressure. page reclaim bias file pages reclaim against anonymous pages. This doesn't make sense for MADV_FREE pages, because those pages could be freed easily and refilled with very slight penality. Even page reclaim doesn't bias file pages, there is still an issue, because MADV_FREE pages and other anonymous pages are mixed together. To reclaim a MADV_FREE page, we probably must scan a lot of other anonymous pages, which is inefficient. In our test, we usually see oom with MADV_FREE enabled and nothing without it. - Accounting. There are two accounting problems. We don't have a global accounting. If the system is abnormal, we don't know if it's a problem from MADV_FREE side. The other problem is RSS accounting. MADV_FREE pages are accounted as normal anon pages and reclaimed lazily, so application's RSS becomes bigger. This confuses our workloads. We have monitoring daemon running and if it finds applications' RSS becomes abnormal, the daemon will kill the applications even kernel can reclaim the memory easily. To address the first the two issues, we can either put MADV_FREE pages into a separate LRU list (Minchan's previous patches and V1 patches), or put them into LRU_INACTIVE_FILE list (suggested by Johannes). The patchset use the second idea. The reason is LRU_INACTIVE_FILE list is tiny nowadays and should be full of used once file pages. So we can still efficiently reclaim MADV_FREE pages there without interference with other anon and active file pages. Putting the pages into inactive file list also has an advantage which allows page reclaim to prioritize MADV_FREE pages and used once file pages. MADV_FREE pages are put into the lru list and clear SwapBacked flag, so PageAnon(page) && !PageSwapBacked(page) will indicate a MADV_FREE pages. These pages will directly freed without pageout if they are clean, otherwise normal swap will reclaim them. For the third issue, the previous post adds global accounting and a separate RSS count for MADV_FREE pages. The problem is we never get accurate accounting for MADV_FREE pages. The pages are mapped to userspace, can be dirtied without notice from kernel side. To get accurate accounting, we could write protect the page, but then there is extra page fault overhead, which people don't want to pay. Jemalloc guys have concerns about the inaccurate accounting, so this post drops the accounting patches temporarily. The info exported to /proc/pid/smaps for MADV_FREE pages are kept, which is the only place we can get accurate accounting right now. This patch (of 6): Johannes pointed out TTU_LZFREE is unnecessary. It's true because we always have the flag set if we want to do an unmap. For cases we don't do an unmap, the TTU_LZFREE part of code should never run. Also the TTU_UNMAP is unnecessary. If no other flags set (for example, TTU_MIGRATION), an unmap is implied. The patch includes Johannes's cleanup and dead TTU_ACTION macro removal code Link: http://lkml.kernel.org/r/4be3ea1bc56b26fd98a54d0a6f70bec63f6d8980.1487965799.git.shli@fb.com Signed-off-by: Shaohua Li Suggested-by: Johannes Weiner Acked-by: Johannes Weiner Acked-by: Minchan Kim Acked-by: Hillf Danton Acked-by: Michal Hocko Cc: Hugh Dickins Cc: Rik van Riel Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 8c89e902df3e..7a3941492856 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -83,19 +83,17 @@ struct anon_vma_chain { }; enum ttu_flags { - TTU_UNMAP = 1, /* unmap mode */ - TTU_MIGRATION = 2, /* migration mode */ - TTU_MUNLOCK = 4, /* munlock mode */ - TTU_LZFREE = 8, /* lazy free mode */ - TTU_SPLIT_HUGE_PMD = 16, /* split huge PMD if any */ - - TTU_IGNORE_MLOCK = (1 << 8), /* ignore mlock */ - TTU_IGNORE_ACCESS = (1 << 9), /* don't age */ - TTU_IGNORE_HWPOISON = (1 << 10),/* corrupted page is recoverable */ - TTU_BATCH_FLUSH = (1 << 11), /* Batch TLB flushes where possible + TTU_MIGRATION = 0x1, /* migration mode */ + TTU_MUNLOCK = 0x2, /* munlock mode */ + + TTU_SPLIT_HUGE_PMD = 0x4, /* split huge PMD if any */ + TTU_IGNORE_MLOCK = 0x8, /* ignore mlock */ + TTU_IGNORE_ACCESS = 0x10, /* don't age */ + TTU_IGNORE_HWPOISON = 0x20, /* corrupted page is recoverable */ + TTU_BATCH_FLUSH = 0x40, /* Batch TLB flushes where possible * and caller guarantees they will * do a final flush if necessary */ - TTU_RMAP_LOCKED = (1 << 12) /* do not grab rmap lock: + TTU_RMAP_LOCKED = 0x80 /* do not grab rmap lock: * caller holds it */ }; @@ -193,8 +191,6 @@ static inline void page_dup_rmap(struct page *page, bool compound) int page_referenced(struct page *, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags); -#define TTU_ACTION(x) ((x) & TTU_ACTION_MASK) - int try_to_unmap(struct page *, enum ttu_flags flags); /* Avoid racy checks */ -- cgit v1.2.3 From f7ad2a6cb9f7c4040004bedee84a70a9b985583e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 3 May 2017 14:52:29 -0700 Subject: mm: move MADV_FREE pages into LRU_INACTIVE_FILE list madv()'s MADV_FREE indicate pages are 'lazyfree'. They are still anonymous pages, but they can be freed without pageout. To distinguish these from normal anonymous pages, we clear their SwapBacked flag. MADV_FREE pages could be freed without pageout, so they pretty much like used once file pages. For such pages, we'd like to reclaim them once there is memory pressure. Also it might be unfair reclaiming MADV_FREE pages always before used once file pages and we definitively want to reclaim the pages before other anonymous and file pages. To speed up MADV_FREE pages reclaim, we put the pages into LRU_INACTIVE_FILE list. The rationale is LRU_INACTIVE_FILE list is tiny nowadays and should be full of used once file pages. Reclaiming MADV_FREE pages will not have much interfere of anonymous and active file pages. And the inactive file pages and MADV_FREE pages will be reclaimed according to their age, so we don't reclaim too many MADV_FREE pages too. Putting the MADV_FREE pages into LRU_INACTIVE_FILE_LIST also means we can reclaim the pages without swap support. This idea is suggested by Johannes. This patch doesn't move MADV_FREE pages to LRU_INACTIVE_FILE list yet to avoid bisect failure, next patch will do it. The patch is based on Minchan's original patch. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/2f87063c1e9354677b7618c647abde77b07561e5.1487965799.git.shli@fb.com Signed-off-by: Shaohua Li Suggested-by: Johannes Weiner Acked-by: Johannes Weiner Acked-by: Minchan Kim Acked-by: Michal Hocko Acked-by: Hillf Danton Cc: Hugh Dickins Cc: Rik van Riel Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 2 +- include/linux/vm_event_item.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 45e91dd6716d..486494e6b2fc 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -279,7 +279,7 @@ extern void lru_add_drain_cpu(int cpu); extern void lru_add_drain_all(void); extern void rotate_reclaimable_page(struct page *page); extern void deactivate_file_page(struct page *page); -extern void deactivate_page(struct page *page); +extern void mark_page_lazyfree(struct page *page); extern void swap_setup(void); extern void add_page_to_unevictable_list(struct page *page); diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index a80b7b59cf33..d84ae90ccd5c 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -25,7 +25,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGALLOC), FOR_ALL_ZONES(ALLOCSTALL), FOR_ALL_ZONES(PGSCAN_SKIP), - PGFREE, PGACTIVATE, PGDEACTIVATE, + PGFREE, PGACTIVATE, PGDEACTIVATE, PGLAZYFREE, PGFAULT, PGMAJFAULT, PGLAZYFREED, PGREFILL, -- cgit v1.2.3 From 802a3a92ad7ac0b9be9df229dee530a1f0a8039b Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 3 May 2017 14:52:32 -0700 Subject: mm: reclaim MADV_FREE pages When memory pressure is high, we free MADV_FREE pages. If the pages are not dirty in pte, the pages could be freed immediately. Otherwise we can't reclaim them. We put the pages back to anonumous LRU list (by setting SwapBacked flag) and the pages will be reclaimed in normal swapout way. We use normal page reclaim policy. Since MADV_FREE pages are put into inactive file list, such pages and inactive file pages are reclaimed according to their age. This is expected, because we don't want to reclaim too many MADV_FREE pages before used once pages. Based on Minchan's original patch [minchan@kernel.org: clean up lazyfree page handling] Link: http://lkml.kernel.org/r/20170303025237.GB3503@bbox Link: http://lkml.kernel.org/r/14b8eb1d3f6bf6cc492833f183ac8c304e560484.1487965799.git.shli@fb.com Signed-off-by: Shaohua Li Signed-off-by: Minchan Kim Acked-by: Minchan Kim Acked-by: Michal Hocko Acked-by: Johannes Weiner Acked-by: Hillf Danton Cc: Hugh Dickins Cc: Rik van Riel Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 7a3941492856..fee10d744ebd 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -298,6 +298,6 @@ static inline int page_mkclean(struct page *page) #define SWAP_AGAIN 1 #define SWAP_FAIL 2 #define SWAP_MLOCK 3 -#define SWAP_LZFREE 4 +#define SWAP_DIRTY 4 #endif /* _LINUX_RMAP_H */ -- cgit v1.2.3 From 9a4caf1e9fa4864ce21ba9584a2c336bfbc72740 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:52:45 -0700 Subject: mm: memcontrol: provide shmem statistics Cgroups currently don't report how much shmem they use, which can be useful data to have, in particular since shmem is included in the cache/file item while being reclaimed like anonymous memory. Add a counter to track shmem pages during charging and uncharging. Link: http://lkml.kernel.org/r/20170221164343.32252-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Reported-by: Chris Down Cc: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bb7250c45cb8..c5ebb32fef49 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -46,6 +46,7 @@ enum mem_cgroup_stat_index { MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ MEM_CGROUP_STAT_RSS_HUGE, /* # of pages charged as anon huge */ + MEM_CGROUP_STAT_SHMEM, /* # of pages charged as shmem */ MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ MEM_CGROUP_STAT_DIRTY, /* # of dirty pages in page cache */ MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ -- cgit v1.2.3 From a6ffdc07847e74cc244c02ab6d0351a4a5d77281 Mon Sep 17 00:00:00 2001 From: Xishi Qiu Date: Wed, 3 May 2017 14:52:52 -0700 Subject: mm: use is_migrate_highatomic() to simplify the code Introduce two helpers, is_migrate_highatomic() and is_migrate_highatomic_page(). Simplify the code, no functional changes. [akpm@linux-foundation.org: use static inlines rather than macros, per mhocko] Link: http://lkml.kernel.org/r/58B94F15.6060606@huawei.com Signed-off-by: Xishi Qiu Acked-by: Michal Hocko Cc: Vlastimil Babka Cc: Mel Gorman Cc: Minchan Kim Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 04e0969966f6..446cf68c1c09 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -35,7 +35,7 @@ */ #define PAGE_ALLOC_COSTLY_ORDER 3 -enum { +enum migratetype { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, -- cgit v1.2.3 From 7e7844226f1053236b6f6d5d122a06509fb14fd9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 3 May 2017 14:53:09 -0700 Subject: lockdep: allow to disable reclaim lockup detection The current implementation of the reclaim lockup detection can lead to false positives and those even happen and usually lead to tweak the code to silence the lockdep by using GFP_NOFS even though the context can use __GFP_FS just fine. See http://lkml.kernel.org/r/20160512080321.GA18496@dastard as an example. ================================= [ INFO: inconsistent lock state ] 4.5.0-rc2+ #4 Tainted: G O --------------------------------- inconsistent {RECLAIM_FS-ON-R} -> {IN-RECLAIM_FS-W} usage. kswapd0/543 [HC0[0]:SC0[0]:HE1:SE1] takes: (&xfs_nondir_ilock_class){++++-+}, at: xfs_ilock+0x177/0x200 [xfs] {RECLAIM_FS-ON-R} state was registered at: mark_held_locks+0x79/0xa0 lockdep_trace_alloc+0xb3/0x100 kmem_cache_alloc+0x33/0x230 kmem_zone_alloc+0x81/0x120 [xfs] xfs_refcountbt_init_cursor+0x3e/0xa0 [xfs] __xfs_refcount_find_shared+0x75/0x580 [xfs] xfs_refcount_find_shared+0x84/0xb0 [xfs] xfs_getbmap+0x608/0x8c0 [xfs] xfs_vn_fiemap+0xab/0xc0 [xfs] do_vfs_ioctl+0x498/0x670 SyS_ioctl+0x79/0x90 entry_SYSCALL_64_fastpath+0x12/0x6f CPU0 ---- lock(&xfs_nondir_ilock_class); lock(&xfs_nondir_ilock_class); *** DEADLOCK *** 3 locks held by kswapd0/543: stack backtrace: CPU: 0 PID: 543 Comm: kswapd0 Tainted: G O 4.5.0-rc2+ #4 Call Trace: lock_acquire+0xd8/0x1e0 down_write_nested+0x5e/0xc0 xfs_ilock+0x177/0x200 [xfs] xfs_reflink_cancel_cow_range+0x150/0x300 [xfs] xfs_fs_evict_inode+0xdc/0x1e0 [xfs] evict+0xc5/0x190 dispose_list+0x39/0x60 prune_icache_sb+0x4b/0x60 super_cache_scan+0x14f/0x1a0 shrink_slab.part.63.constprop.79+0x1e9/0x4e0 shrink_zone+0x15e/0x170 kswapd+0x4f1/0xa80 kthread+0xf2/0x110 ret_from_fork+0x3f/0x70 To quote Dave: "Ignoring whether reflink should be doing anything or not, that's a "xfs_refcountbt_init_cursor() gets called both outside and inside transactions" lockdep false positive case. The problem here is lockdep has seen this allocation from within a transaction, hence a GFP_NOFS allocation, and now it's seeing it in a GFP_KERNEL context. Also note that we have an active reference to this inode. So, because the reclaim annotations overload the interrupt level detections and it's seen the inode ilock been taken in reclaim ("interrupt") context, this triggers a reclaim context warning where it thinks it is unsafe to do this allocation in GFP_KERNEL context holding the inode ilock..." This sounds like a fundamental problem of the reclaim lock detection. It is really impossible to annotate such a special usecase IMHO unless the reclaim lockup detection is reworked completely. Until then it is much better to provide a way to add "I know what I am doing flag" and mark problematic places. This would prevent from abusing GFP_NOFS flag which has a runtime effect even on configurations which have lockdep disabled. Introduce __GFP_NOLOCKDEP flag which tells the lockdep gfp tracking to skip the current allocation request. While we are at it also make sure that the radix tree doesn't accidentaly override tags stored in the upper part of the gfp_mask. Link: http://lkml.kernel.org/r/20170306131408.9828-3-mhocko@kernel.org Signed-off-by: Michal Hocko Suggested-by: Peter Zijlstra Acked-by: Peter Zijlstra (Intel) Acked-by: Vlastimil Babka Cc: Dave Chinner Cc: Theodore Ts'o Cc: Chris Mason Cc: David Sterba Cc: Jan Kara Cc: Brian Foster Cc: Darrick J. Wong Cc: Nikolay Borisov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index db373b9d3223..978232a3b4ae 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -40,6 +40,11 @@ struct vm_area_struct; #define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_WRITE 0x800000u #define ___GFP_KSWAPD_RECLAIM 0x1000000u +#ifdef CONFIG_LOCKDEP +#define ___GFP_NOLOCKDEP 0x4000000u +#else +#define ___GFP_NOLOCKDEP 0 +#endif /* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* @@ -179,8 +184,11 @@ struct vm_area_struct; #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) +/* Disable lockdep for GFP context tracking */ +#define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) + /* Room for N __GFP_FOO bits */ -#define __GFP_BITS_SHIFT 25 +#define __GFP_BITS_SHIFT (25 + IS_ENABLED(CONFIG_LOCKDEP)) #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* -- cgit v1.2.3 From 9070733b4efac4bf17f299a81b01c15e206f9ff5 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 3 May 2017 14:53:12 -0700 Subject: xfs: abstract PF_FSTRANS to PF_MEMALLOC_NOFS xfs has defined PF_FSTRANS to declare a scope GFP_NOFS semantic quite some time ago. We would like to make this concept more generic and use it for other filesystems as well. Let's start by giving the flag a more generic name PF_MEMALLOC_NOFS which is in line with an exiting PF_MEMALLOC_NOIO already used for the same purpose for GFP_NOIO contexts. Replace all PF_FSTRANS usage from the xfs code in the first step before we introduce a full API for it as xfs uses the flag directly anyway. This patch doesn't introduce any functional change. Link: http://lkml.kernel.org/r/20170306131408.9828-4-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Darrick J. Wong Reviewed-by: Brian Foster Acked-by: Vlastimil Babka Cc: Dave Chinner Cc: Theodore Ts'o Cc: Chris Mason Cc: David Sterba Cc: Jan Kara Cc: Nikolay Borisov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d4fa448223f..8ac11465ac5b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1237,6 +1237,8 @@ extern struct pid *cad_pid; #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ +#define PF_MEMALLOC_NOFS PF_FSTRANS /* Transition to a more generic GFP_NOFS scope semantic */ + /* * Only the _current_ task can read/write to tsk->flags, but other * tasks can access tsk->flags in readonly mode for example -- cgit v1.2.3 From 7dea19f9ee636cb244109a4dba426bbb3e5304b7 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 3 May 2017 14:53:15 -0700 Subject: mm: introduce memalloc_nofs_{save,restore} API GFP_NOFS context is used for the following 5 reasons currently: - to prevent from deadlocks when the lock held by the allocation context would be needed during the memory reclaim - to prevent from stack overflows during the reclaim because the allocation is performed from a deep context already - to prevent lockups when the allocation context depends on other reclaimers to make a forward progress indirectly - just in case because this would be safe from the fs POV - silence lockdep false positives Unfortunately overuse of this allocation context brings some problems to the MM. Memory reclaim is much weaker (especially during heavy FS metadata workloads), OOM killer cannot be invoked because the MM layer doesn't have enough information about how much memory is freeable by the FS layer. In many cases it is far from clear why the weaker context is even used and so it might be used unnecessarily. We would like to get rid of those as much as possible. One way to do that is to use the flag in scopes rather than isolated cases. Such a scope is declared when really necessary, tracked per task and all the allocation requests from within the context will simply inherit the GFP_NOFS semantic. Not only this is easier to understand and maintain because there are much less problematic contexts than specific allocation requests, this also helps code paths where FS layer interacts with other layers (e.g. crypto, security modules, MM etc...) and there is no easy way to convey the allocation context between the layers. Introduce memalloc_nofs_{save,restore} API to control the scope of GFP_NOFS allocation context. This is basically copying memalloc_noio_{save,restore} API we have for other restricted allocation context GFP_NOIO. The PF_MEMALLOC_NOFS flag already exists and it is just an alias for PF_FSTRANS which has been xfs specific until recently. There are no more PF_FSTRANS users anymore so let's just drop it. PF_MEMALLOC_NOFS is now checked in the MM layer and drops __GFP_FS implicitly same as PF_MEMALLOC_NOIO drops __GFP_IO. memalloc_noio_flags is renamed to current_gfp_context because it now cares about both PF_MEMALLOC_NOFS and PF_MEMALLOC_NOIO contexts. Xfs code paths preserve their semantic. kmem_flags_convert() doesn't need to evaluate the flag anymore. This patch shouldn't introduce any functional changes. Let's hope that filesystems will drop direct GFP_NOFS (resp. ~__GFP_FS) usage as much as possible and only use a properly documented memalloc_nofs_{save,restore} checkpoints where they are appropriate. [akpm@linux-foundation.org: fix comment typo, reflow comment] Link: http://lkml.kernel.org/r/20170306131408.9828-5-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Dave Chinner Cc: Theodore Ts'o Cc: Chris Mason Cc: David Sterba Cc: Jan Kara Cc: Brian Foster Cc: Darrick J. Wong Cc: Nikolay Borisov Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 8 ++++++++ include/linux/sched.h | 8 +++----- include/linux/sched/mm.h | 26 +++++++++++++++++++++++--- 3 files changed, 34 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 978232a3b4ae..2bfcfd33e476 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -210,8 +210,16 @@ struct vm_area_struct; * * GFP_NOIO will use direct reclaim to discard clean pages or slab pages * that do not require the starting of any physical IO. + * Please try to avoid using this flag directly and instead use + * memalloc_noio_{save,restore} to mark the whole scope which cannot + * perform any IO with a short explanation why. All allocation requests + * will inherit GFP_NOIO implicitly. * * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * Please try to avoid using this flag directly and instead use + * memalloc_nofs_{save,restore} to mark the whole scope which cannot/shouldn't + * recurse into the FS layer with a short explanation why. All allocation + * requests will inherit GFP_NOFS implicitly. * * GFP_USER is for userspace allocations that also need to be directly * accessibly by the kernel or hardware. It is typically used by hardware diff --git a/include/linux/sched.h b/include/linux/sched.h index 8ac11465ac5b..993e7e25a3a5 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1224,9 +1224,9 @@ extern struct pid *cad_pid; #define PF_USED_ASYNC 0x00004000 /* Used async_schedule*(), used by module init */ #define PF_NOFREEZE 0x00008000 /* This thread should not be frozen */ #define PF_FROZEN 0x00010000 /* Frozen for system suspend */ -#define PF_FSTRANS 0x00020000 /* Inside a filesystem transaction */ -#define PF_KSWAPD 0x00040000 /* I am kswapd */ -#define PF_MEMALLOC_NOIO 0x00080000 /* Allocating memory without IO involved */ +#define PF_KSWAPD 0x00020000 /* I am kswapd */ +#define PF_MEMALLOC_NOFS 0x00040000 /* All allocation requests will inherit GFP_NOFS */ +#define PF_MEMALLOC_NOIO 0x00080000 /* All allocation requests will inherit GFP_NOIO */ #define PF_LESS_THROTTLE 0x00100000 /* Throttle me less: I clean memory */ #define PF_KTHREAD 0x00200000 /* I am a kernel thread */ #define PF_RANDOMIZE 0x00400000 /* Randomize virtual address space */ @@ -1237,8 +1237,6 @@ extern struct pid *cad_pid; #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* This thread called freeze_processes() and should not be frozen */ -#define PF_MEMALLOC_NOFS PF_FSTRANS /* Transition to a more generic GFP_NOFS scope semantic */ - /* * Only the _current_ task can read/write to tsk->flags, but other * tasks can access tsk->flags in readonly mode for example diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 830953ebb391..9daabe138c99 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -149,13 +149,21 @@ static inline bool in_vfork(struct task_struct *tsk) return ret; } -/* __GFP_IO isn't allowed if PF_MEMALLOC_NOIO is set in current->flags - * __GFP_FS is also cleared as it implies __GFP_IO. +/* + * Applies per-task gfp context to the given allocation flags. + * PF_MEMALLOC_NOIO implies GFP_NOIO + * PF_MEMALLOC_NOFS implies GFP_NOFS */ -static inline gfp_t memalloc_noio_flags(gfp_t flags) +static inline gfp_t current_gfp_context(gfp_t flags) { + /* + * NOIO implies both NOIO and NOFS and it is a weaker context + * so always make sure it makes precendence + */ if (unlikely(current->flags & PF_MEMALLOC_NOIO)) flags &= ~(__GFP_IO | __GFP_FS); + else if (unlikely(current->flags & PF_MEMALLOC_NOFS)) + flags &= ~__GFP_FS; return flags; } @@ -171,4 +179,16 @@ static inline void memalloc_noio_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC_NOIO) | flags; } +static inline unsigned int memalloc_nofs_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC_NOFS; + current->flags |= PF_MEMALLOC_NOFS; + return flags; +} + +static inline void memalloc_nofs_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; +} + #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 81378da64de6d33d0c200885f1de431c9a3e5ccd Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 3 May 2017 14:53:22 -0700 Subject: jbd2: mark the transaction context with the scope GFP_NOFS context now that we have memalloc_nofs_{save,restore} api we can mark the whole transaction context as implicitly GFP_NOFS. All allocations will automatically inherit GFP_NOFS this way. This means that we do not have to mark any of those requests with GFP_NOFS and moreover all the ext4_kv[mz]alloc(GFP_NOFS) are also safe now because even the hardcoded GFP_KERNEL allocations deep inside the vmalloc will be NOFS now. [akpm@linux-foundation.org: tweak comments] Link: http://lkml.kernel.org/r/20170306131408.9828-7-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Jan Kara Cc: Dave Chinner Cc: Theodore Ts'o Cc: Chris Mason Cc: David Sterba Cc: Brian Foster Cc: Darrick J. Wong Cc: Nikolay Borisov Cc: Peter Zijlstra Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/jbd2.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index dfaa1f4dcb0c..606b6bce3a5b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -491,6 +491,8 @@ struct jbd2_journal_handle unsigned long h_start_jiffies; unsigned int h_requested_credits; + + unsigned int saved_alloc_context; }; -- cgit v1.2.3 From 056b9d8a76924df02011f3941c4f53ace8d6c32a Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 3 May 2017 14:53:32 -0700 Subject: mm: remove rodata_test_data export, add pr_fmt Since commit 3ad38ceb2769 ("x86/mm: Remove CONFIG_DEBUG_NX_TEST"), nothing is using the exported rodata_test_data variable, so drop the export. This additionally updates the pr_fmt to avoid redundant strings and adjusts some whitespace. Link: http://lkml.kernel.org/r/20170307005313.GA85809@beast Signed-off-by: Kees Cook Cc: Jinbum Park Cc: Arjan van de Ven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rodata_test.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rodata_test.h b/include/linux/rodata_test.h index ea05f6c51413..84766bcdd01f 100644 --- a/include/linux/rodata_test.h +++ b/include/linux/rodata_test.h @@ -14,7 +14,6 @@ #define _RODATA_TEST_H #ifdef CONFIG_DEBUG_RODATA_TEST -extern const int rodata_test_data; void rodata_test(void); #else static inline void rodata_test(void) {} -- cgit v1.2.3 From 18863d3a3f593f47b075b9f53ebf9228dc76cf72 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 3 May 2017 14:54:04 -0700 Subject: mm: remove SWAP_DIRTY in ttu If we found lazyfree page is dirty, try_to_unmap_one can just SetPageSwapBakced in there like PG_mlocked page and just return with SWAP_FAIL which is very natural because the page is not swappable right now so that vmscan can activate it. There is no point to introduce new return value SWAP_DIRTY in try_to_unmap at the moment. Link: http://lkml.kernel.org/r/1489555493-14659-3-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Acked-by: Hillf Danton Acked-by: Kirill A. Shutemov Cc: Anshuman Khandual Cc: Johannes Weiner Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index fee10d744ebd..b556eefa62bc 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -298,6 +298,5 @@ static inline int page_mkclean(struct page *page) #define SWAP_AGAIN 1 #define SWAP_FAIL 2 #define SWAP_MLOCK 3 -#define SWAP_DIRTY 4 #endif /* _LINUX_RMAP_H */ -- cgit v1.2.3 From 192d7232569ab61ded40c8be691b12832bc6bcd1 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 3 May 2017 14:54:10 -0700 Subject: mm: make try_to_munlock() return void try_to_munlock returns SWAP_MLOCK if the one of VMAs mapped the page has VM_LOCKED flag. In that time, VM set PG_mlocked to the page if the page is not pte-mapped THP which cannot be mlocked, either. With that, __munlock_isolated_page can use PageMlocked to check whether try_to_munlock is successful or not without relying on try_to_munlock's retval. It helps to make try_to_unmap/try_to_unmap_one simple with upcoming patches. [minchan@kernel.org: remove PG_Mlocked VM_BUG_ON check] Link: http://lkml.kernel.org/r/20170411025615.GA6545@bbox Link: http://lkml.kernel.org/r/1489555493-14659-5-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Anshuman Khandual Cc: Hillf Danton Cc: Johannes Weiner Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Sasha Levin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index b556eefa62bc..1b0cd4cf68e3 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -235,7 +235,7 @@ int page_mkclean(struct page *); * called in munlock()/munmap() path to check for other vmas holding * the page mlocked. */ -int try_to_munlock(struct page *); +void try_to_munlock(struct page *); void remove_migration_ptes(struct page *old, struct page *new, bool locked); -- cgit v1.2.3 From ad6b67041a45497261617d7a28b15159b202cb5a Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 3 May 2017 14:54:13 -0700 Subject: mm: remove SWAP_MLOCK in ttu ttu doesn't need to return SWAP_MLOCK. Instead, just return SWAP_FAIL because it means the page is not-swappable so it should move to another LRU list(active or unevictable). putback friends will move it to right list depending on the page's LRU flag. Link: http://lkml.kernel.org/r/1489555493-14659-6-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Anshuman Khandual Cc: Hillf Danton Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 1b0cd4cf68e3..3630d4dcee13 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -297,6 +297,5 @@ static inline int page_mkclean(struct page *page) #define SWAP_SUCCESS 0 #define SWAP_AGAIN 1 #define SWAP_FAIL 2 -#define SWAP_MLOCK 3 #endif /* _LINUX_RMAP_H */ -- cgit v1.2.3 From 666e5a406c3ed562e7b3ceff8b631b6067bdaead Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 3 May 2017 14:54:20 -0700 Subject: mm: make ttu's return boolean try_to_unmap() returns SWAP_SUCCESS or SWAP_FAIL so it's suitable for boolean return. This patch changes it. Link: http://lkml.kernel.org/r/1489555493-14659-8-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Naoya Horiguchi Cc: Anshuman Khandual Cc: Hillf Danton Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 3630d4dcee13..6028c38d3cac 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -191,7 +191,7 @@ static inline void page_dup_rmap(struct page *page, bool compound) int page_referenced(struct page *, int is_locked, struct mem_cgroup *memcg, unsigned long *vm_flags); -int try_to_unmap(struct page *, enum ttu_flags flags); +bool try_to_unmap(struct page *, enum ttu_flags flags); /* Avoid racy checks */ #define PVMW_SYNC (1 << 0) @@ -281,7 +281,7 @@ static inline int page_referenced(struct page *page, int is_locked, return 0; } -#define try_to_unmap(page, refs) SWAP_FAIL +#define try_to_unmap(page, refs) false static inline int page_mkclean(struct page *page) { -- cgit v1.2.3 From 1df631ae19819cff343d316eda42eca32d3de7fc Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 3 May 2017 14:54:23 -0700 Subject: mm: make rmap_walk() return void There is no user of the return value from rmap_walk() and friends so this patch makes them void-returning functions. Link: http://lkml.kernel.org/r/1489555493-14659-9-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Anshuman Khandual Cc: Hillf Danton Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ksm.h | 5 ++--- include/linux/rmap.h | 4 ++-- 2 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index e1cfda4bee58..78b44a024eaa 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -61,7 +61,7 @@ static inline void set_page_stable_node(struct page *page, struct page *ksm_might_need_to_copy(struct page *page, struct vm_area_struct *vma, unsigned long address); -int rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); +void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); void ksm_migrate_page(struct page *newpage, struct page *oldpage); #else /* !CONFIG_KSM */ @@ -94,10 +94,9 @@ static inline int page_referenced_ksm(struct page *page, return 0; } -static inline int rmap_walk_ksm(struct page *page, +static inline void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc) { - return 0; } static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 6028c38d3cac..1d7d457ca0dc 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -264,8 +264,8 @@ struct rmap_walk_control { bool (*invalid_vma)(struct vm_area_struct *vma, void *arg); }; -int rmap_walk(struct page *page, struct rmap_walk_control *rwc); -int rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc); +void rmap_walk(struct page *page, struct rmap_walk_control *rwc); +void rmap_walk_locked(struct page *page, struct rmap_walk_control *rwc); #else /* !CONFIG_MMU */ -- cgit v1.2.3 From e4b82222712ed15813d35204c91429883d27d1d9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 3 May 2017 14:54:27 -0700 Subject: mm: make rmap_one boolean function rmap_one's return value controls whether rmap_work should contine to scan other ptes or not so it's target for changing to boolean. Return true if the scan should be continued. Otherwise, return false to stop the scanning. This patch makes rmap_one's return value to boolean. Link: http://lkml.kernel.org/r/1489555493-14659-10-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Anshuman Khandual Cc: Hillf Danton Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 1d7d457ca0dc..13ed232cbb29 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -257,7 +257,11 @@ int page_mapped_in_vma(struct page *page, struct vm_area_struct *vma); */ struct rmap_walk_control { void *arg; - int (*rmap_one)(struct page *page, struct vm_area_struct *vma, + /* + * Return false if page table scanning in rmap_walk should be stopped. + * Otherwise, return true. + */ + bool (*rmap_one)(struct page *page, struct vm_area_struct *vma, unsigned long addr, void *arg); int (*done)(struct page *page); struct anon_vma *(*anon_lock)(struct page *page); -- cgit v1.2.3 From 83612a948d3bd2e71b110d7e8735661621bd23d9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 3 May 2017 14:54:30 -0700 Subject: mm: remove SWAP_[SUCCESS|AGAIN|FAIL] There is no user for it. Remove it. [minchan@kernel.org: use false instead of SWAP_FAIL] Link: http://lkml.kernel.org/r/20170316053313.GA19241@bbox Link: http://lkml.kernel.org/r/1489555493-14659-11-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Anshuman Khandual Cc: Hillf Danton Cc: Johannes Weiner Cc: Kirill A. Shutemov Cc: Michal Hocko Cc: Naoya Horiguchi Cc: Vlastimil Babka Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rmap.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rmap.h b/include/linux/rmap.h index 13ed232cbb29..43ef2c30cb0f 100644 --- a/include/linux/rmap.h +++ b/include/linux/rmap.h @@ -295,11 +295,4 @@ static inline int page_mkclean(struct page *page) #endif /* CONFIG_MMU */ -/* - * Return values of try_to_unmap - */ -#define SWAP_SUCCESS 0 -#define SWAP_AGAIN 1 -#define SWAP_FAIL 2 - #endif /* _LINUX_RMAP_H */ -- cgit v1.2.3 From bd33ef3681359343863f2290aded182b0441edee Mon Sep 17 00:00:00 2001 From: Vinayak Menon Date: Wed, 3 May 2017 14:54:42 -0700 Subject: mm: enable page poisoning early at boot On SPARSEMEM systems page poisoning is enabled after buddy is up, because of the dependency on page extension init. This causes the pages released by free_all_bootmem not to be poisoned. This either delays or misses the identification of some issues because the pages have to undergo another cycle of alloc-free-alloc for any corruption to be detected. Enable page poisoning early by getting rid of the PAGE_EXT_DEBUG_POISON flag. Since all the free pages will now be poisoned, the flag need not be verified before checking the poison during an alloc. [vinmenon@codeaurora.org: fix Kconfig] Link: http://lkml.kernel.org/r/1490878002-14423-1-git-send-email-vinmenon@codeaurora.org Link: http://lkml.kernel.org/r/1490358246-11001-1-git-send-email-vinmenon@codeaurora.org Signed-off-by: Vinayak Menon Acked-by: Laura Abbott Tested-by: Laura Abbott Cc: Joonsoo Kim Cc: Michal Hocko Cc: Akinobu Mita Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 695da2a19b4c..5d22e69f51ea 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2487,7 +2487,6 @@ extern long copy_huge_page_from_user(struct page *dst_page, #endif /* CONFIG_TRANSPARENT_HUGEPAGE || CONFIG_HUGETLBFS */ extern struct page_ext_operations debug_guardpage_ops; -extern struct page_ext_operations page_poisoning_ops; #ifdef CONFIG_DEBUG_PAGEALLOC extern unsigned int _debug_guardpage_minorder; -- cgit v1.2.3 From 9927e3887642b976d9b391cd77d71388aa521e54 Mon Sep 17 00:00:00 2001 From: Pushkar Jambhlekar Date: Wed, 3 May 2017 14:54:45 -0700 Subject: include/linux/migrate.h: add arg names to prototype It is preferred, and the rest of migrate.h gets it right. Link: http://lkml.kernel.org/r/1490336009-8024-1-git-send-email-pushkar.iit@gmail.com Signed-off-by: Pushkar Jambhlekar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index fa76b516fa47..48e24844b3c5 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -33,8 +33,9 @@ extern char *migrate_reason_names[MR_TYPES]; #ifdef CONFIG_MIGRATION extern void putback_movable_pages(struct list_head *l); -extern int migrate_page(struct address_space *, - struct page *, struct page *, enum migrate_mode); +extern int migrate_page(struct address_space *mapping, + struct page *newpage, struct page *page, + enum migrate_mode mode); extern int migrate_pages(struct list_head *l, new_page_t new, free_page_t free, unsigned long private, enum migrate_mode mode, int reason); extern int isolate_movable_page(struct page *page, isolate_mode_t mode); -- cgit v1.2.3 From ac2e8e40acf4c73e0ad1addca34b186d855565d7 Mon Sep 17 00:00:00 2001 From: Hao Lee Date: Wed, 3 May 2017 14:54:51 -0700 Subject: mm: fix spelling error Fix variable name error in comments. No code changes. Link: http://lkml.kernel.org/r/20170403161655.5081-1-haolee.swjtu@gmail.com Signed-off-by: Hao Lee Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 2bfcfd33e476..2b1a44f5bdb6 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -313,8 +313,8 @@ static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) /* * GFP_ZONE_TABLE is a word size bitstring that is used for looking up the - * zone to use given the lowest 4 bits of gfp_t. Entries are ZONE_SHIFT long - * and there are 16 of them to cover all possible combinations of + * zone to use given the lowest 4 bits of gfp_t. Entries are GFP_ZONES_SHIFT + * bits long and there are 16 of them to cover all possible combinations of * __GFP_DMA, __GFP_DMA32, __GFP_MOVABLE and __GFP_HIGHMEM. * * The zone fallback order is MOVABLE=>HIGHMEM=>NORMAL=>DMA32=>DMA. -- cgit v1.2.3 From 2a2e48854d704214dac7546e87ae0e4daa0e61a0 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:55:03 -0700 Subject: mm: vmscan: fix IO/refault regression in cache workingset transition Since commit 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list") we noticed bigger IO spikes during changes in cache access patterns. The patch in question shrunk the inactive list size to leave more room for the current workingset in the presence of streaming IO. However, workingset transitions that previously happened on the inactive list are now pushed out of memory and incur more refaults to complete. This patch disables active list protection when refaults are being observed. This accelerates workingset transitions, and allows more of the new set to establish itself from memory, without eating into the ability to protect the established workingset during stable periods. The workloads that were measurably affected for us were hit pretty bad by it, with refault/majfault rates doubling and tripling during cache transitions, and the machines sustaining half-hour periods of 100% IO utilization, where they'd previously have sub-minute peaks at 60-90%. Stateful services that handle user data tend to be more conservative with kernel upgrades. As a result we hit most page cache issues with some delay, as was the case here. The severity seemed to warrant a stable tag. Fixes: 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list") Link: http://lkml.kernel.org/r/20170404220052.27593-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Cc: Rik van Riel Cc: Mel Gorman Cc: Michal Hocko Cc: Vladimir Davydov Cc: [4.7+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 64 +++++++++++++++++++++++++++++++++++++++++++--- include/linux/mmzone.h | 2 ++ 2 files changed, 63 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c5ebb32fef49..cfa91a3ca0ca 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -57,6 +57,9 @@ enum mem_cgroup_stat_index { MEMCG_SLAB_RECLAIMABLE, MEMCG_SLAB_UNRECLAIMABLE, MEMCG_SOCK, + MEMCG_WORKINGSET_REFAULT, + MEMCG_WORKINGSET_ACTIVATE, + MEMCG_WORKINGSET_NODERECLAIM, MEMCG_NR_STAT, }; @@ -495,6 +498,40 @@ extern int do_swap_account; void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); +static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ + long val = 0; + int cpu; + + for_each_possible_cpu(cpu) + val += per_cpu(memcg->stat->count[idx], cpu); + + if (val < 0) + val = 0; + + return val; +} + +static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, int val) +{ + if (!mem_cgroup_disabled()) + this_cpu_add(memcg->stat->count[idx], val); +} + +static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ + mem_cgroup_update_stat(memcg, idx, 1); +} + +static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ + mem_cgroup_update_stat(memcg, idx, -1); +} + /** * mem_cgroup_update_page_stat - update page state statistics * @page: the page @@ -509,14 +546,14 @@ void unlock_page_memcg(struct page *page); * if (TestClearPageState(page)) * mem_cgroup_update_page_stat(page, state, -1); * unlock_page(page) or unlock_page_memcg(page) + * + * Kernel pages are an exception to this, since they'll never move. */ static inline void mem_cgroup_update_page_stat(struct page *page, enum mem_cgroup_stat_index idx, int val) { - VM_BUG_ON(!(rcu_read_lock_held() || PageLocked(page))); - if (page->mem_cgroup) - this_cpu_add(page->mem_cgroup->stat->count[idx], val); + mem_cgroup_update_stat(page->mem_cgroup, idx, val); } static inline void mem_cgroup_inc_page_stat(struct page *page, @@ -741,6 +778,27 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } +static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ + return 0; +} + +static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx, int val) +{ +} + +static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ +} + +static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg, + enum mem_cgroup_stat_index idx) +{ +} + static inline void mem_cgroup_update_page_stat(struct page *page, enum mem_cgroup_stat_index idx, int nr) diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 446cf68c1c09..e0c3c5e3d8a0 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -225,6 +225,8 @@ struct lruvec { struct zone_reclaim_stat reclaim_stat; /* Evictions & activations on the inactive file list */ atomic_long_t inactive_age; + /* Refaults at the time of last reclaim cycle */ + unsigned long refaults; #ifdef CONFIG_MEMCG struct pglist_data *pgdat; #endif -- cgit v1.2.3 From 31176c781508e4e35b1cc4ae2f0a5abd1f4ea689 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:55:07 -0700 Subject: mm: memcontrol: clean up memory.events counting function We only ever count single events, drop the @nr parameter. Rename the function accordingly. Remove low-information kerneldoc. Link: http://lkml.kernel.org/r/20170404220148.28338-1-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 18 +++++------------- 1 file changed, 5 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index cfa91a3ca0ca..bc0c16e284c0 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -287,17 +287,10 @@ static inline bool mem_cgroup_disabled(void) return !cgroup_subsys_enabled(memory_cgrp_subsys); } -/** - * mem_cgroup_events - count memory events against a cgroup - * @memcg: the memory cgroup - * @idx: the event index - * @nr: the number of events to account for - */ -static inline void mem_cgroup_events(struct mem_cgroup *memcg, - enum mem_cgroup_events_index idx, - unsigned int nr) +static inline void mem_cgroup_event(struct mem_cgroup *memcg, + enum mem_cgroup_events_index idx) { - this_cpu_add(memcg->stat->events[idx], nr); + this_cpu_inc(memcg->stat->events[idx]); cgroup_file_notify(&memcg->events_file); } @@ -614,9 +607,8 @@ static inline bool mem_cgroup_disabled(void) return true; } -static inline void mem_cgroup_events(struct mem_cgroup *memcg, - enum mem_cgroup_events_index idx, - unsigned int nr) +static inline void mem_cgroup_event(struct mem_cgroup *memcg, + enum mem_cgroup_events_index idx) { } -- cgit v1.2.3 From df0e53d0619e83b465e363c088bf4eeb2848273b Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:55:10 -0700 Subject: mm: memcontrol: re-use global VM event enum The current duplication is a high-maintenance mess, and it's painful to add new items. This increases the size of the event array, but we'll eventually want most of the VM events tracked on a per-cgroup basis anyway. Link: http://lkml.kernel.org/r/20170404220148.28338-2-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 45 ++++++++++++++------------------------------- 1 file changed, 14 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index bc0c16e284c0..0bb5f055bd26 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -69,20 +69,6 @@ struct mem_cgroup_reclaim_cookie { unsigned int generation; }; -enum mem_cgroup_events_index { - MEM_CGROUP_EVENTS_PGPGIN, /* # of pages paged in */ - MEM_CGROUP_EVENTS_PGPGOUT, /* # of pages paged out */ - MEM_CGROUP_EVENTS_PGFAULT, /* # of page-faults */ - MEM_CGROUP_EVENTS_PGMAJFAULT, /* # of major page-faults */ - MEM_CGROUP_EVENTS_NSTATS, - /* default hierarchy events */ - MEMCG_LOW = MEM_CGROUP_EVENTS_NSTATS, - MEMCG_HIGH, - MEMCG_MAX, - MEMCG_OOM, - MEMCG_NR_EVENTS, -}; - /* * Per memcg event counter is incremented at every pagein/pageout. With THP, * it will be incremated by the number of pages. This counter is used for @@ -106,6 +92,15 @@ struct mem_cgroup_id { atomic_t ref; }; +/* Cgroup-specific events, on top of universal VM events */ +enum memcg_event_item { + MEMCG_LOW = NR_VM_EVENT_ITEMS, + MEMCG_HIGH, + MEMCG_MAX, + MEMCG_OOM, + MEMCG_NR_EVENTS, +}; + struct mem_cgroup_stat_cpu { long count[MEMCG_NR_STAT]; unsigned long events[MEMCG_NR_EVENTS]; @@ -288,9 +283,9 @@ static inline bool mem_cgroup_disabled(void) } static inline void mem_cgroup_event(struct mem_cgroup *memcg, - enum mem_cgroup_events_index idx) + enum memcg_event_item event) { - this_cpu_inc(memcg->stat->events[idx]); + this_cpu_inc(memcg->stat->events[event]); cgroup_file_notify(&memcg->events_file); } @@ -575,20 +570,8 @@ static inline void mem_cgroup_count_vm_event(struct mm_struct *mm, rcu_read_lock(); memcg = mem_cgroup_from_task(rcu_dereference(mm->owner)); - if (unlikely(!memcg)) - goto out; - - switch (idx) { - case PGFAULT: - this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGFAULT]); - break; - case PGMAJFAULT: - this_cpu_inc(memcg->stat->events[MEM_CGROUP_EVENTS_PGMAJFAULT]); - break; - default: - BUG(); - } -out: + if (likely(memcg)) + this_cpu_inc(memcg->stat->events[idx]); rcu_read_unlock(); } #ifdef CONFIG_TRANSPARENT_HUGEPAGE @@ -608,7 +591,7 @@ static inline bool mem_cgroup_disabled(void) } static inline void mem_cgroup_event(struct mem_cgroup *memcg, - enum mem_cgroup_events_index idx) + enum memcg_event_item event) { } -- cgit v1.2.3 From 71cd31135d4cf030a057ed7079a75a40c0a4a796 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:55:13 -0700 Subject: mm: memcontrol: re-use node VM page state enum The current duplication is a high-maintenance mess, and it's painful to add new items or query memcg state from the rest of the VM. This increases the size of the stat array marginally, but we should aim to track all these stats on a per-cgroup level anyway. Link: http://lkml.kernel.org/r/20170404220148.28338-3-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 100 +++++++++++++++++++-------------------------- 1 file changed, 43 insertions(+), 57 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0bb5f055bd26..0fa1f5de6841 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -35,40 +35,45 @@ struct page; struct mm_struct; struct kmem_cache; -/* - * The corresponding mem_cgroup_stat_names is defined in mm/memcontrol.c, - * These two lists should keep in accord with each other. - */ -enum mem_cgroup_stat_index { - /* - * For MEM_CONTAINER_TYPE_ALL, usage = pagecache + rss. - */ - MEM_CGROUP_STAT_CACHE, /* # of pages charged as cache */ - MEM_CGROUP_STAT_RSS, /* # of pages charged as anon rss */ - MEM_CGROUP_STAT_RSS_HUGE, /* # of pages charged as anon huge */ - MEM_CGROUP_STAT_SHMEM, /* # of pages charged as shmem */ - MEM_CGROUP_STAT_FILE_MAPPED, /* # of pages charged as file rss */ - MEM_CGROUP_STAT_DIRTY, /* # of dirty pages in page cache */ - MEM_CGROUP_STAT_WRITEBACK, /* # of pages under writeback */ - MEM_CGROUP_STAT_SWAP, /* # of pages, swapped out */ - MEM_CGROUP_STAT_NSTATS, - /* default hierarchy stats */ - MEMCG_KERNEL_STACK_KB = MEM_CGROUP_STAT_NSTATS, +/* Cgroup-specific page state, on top of universal node page state */ +enum memcg_stat_item { + MEMCG_CACHE = NR_VM_NODE_STAT_ITEMS, + MEMCG_RSS, + MEMCG_RSS_HUGE, + MEMCG_SWAP, + MEMCG_SOCK, + /* XXX: why are these zone and not node counters? */ + MEMCG_KERNEL_STACK_KB, MEMCG_SLAB_RECLAIMABLE, MEMCG_SLAB_UNRECLAIMABLE, - MEMCG_SOCK, - MEMCG_WORKINGSET_REFAULT, - MEMCG_WORKINGSET_ACTIVATE, - MEMCG_WORKINGSET_NODERECLAIM, MEMCG_NR_STAT, }; +/* Cgroup-specific events, on top of universal VM events */ +enum memcg_event_item { + MEMCG_LOW = NR_VM_EVENT_ITEMS, + MEMCG_HIGH, + MEMCG_MAX, + MEMCG_OOM, + MEMCG_NR_EVENTS, +}; + struct mem_cgroup_reclaim_cookie { pg_data_t *pgdat; int priority; unsigned int generation; }; +#ifdef CONFIG_MEMCG + +#define MEM_CGROUP_ID_SHIFT 16 +#define MEM_CGROUP_ID_MAX USHRT_MAX + +struct mem_cgroup_id { + int id; + atomic_t ref; +}; + /* * Per memcg event counter is incremented at every pagein/pageout. With THP, * it will be incremated by the number of pages. This counter is used for @@ -82,25 +87,6 @@ enum mem_cgroup_events_target { MEM_CGROUP_NTARGETS, }; -#ifdef CONFIG_MEMCG - -#define MEM_CGROUP_ID_SHIFT 16 -#define MEM_CGROUP_ID_MAX USHRT_MAX - -struct mem_cgroup_id { - int id; - atomic_t ref; -}; - -/* Cgroup-specific events, on top of universal VM events */ -enum memcg_event_item { - MEMCG_LOW = NR_VM_EVENT_ITEMS, - MEMCG_HIGH, - MEMCG_MAX, - MEMCG_OOM, - MEMCG_NR_EVENTS, -}; - struct mem_cgroup_stat_cpu { long count[MEMCG_NR_STAT]; unsigned long events[MEMCG_NR_EVENTS]; @@ -487,7 +473,7 @@ void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) + enum memcg_stat_item idx) { long val = 0; int cpu; @@ -502,20 +488,20 @@ static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, } static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx, int val) + enum memcg_stat_item idx, int val) { if (!mem_cgroup_disabled()) this_cpu_add(memcg->stat->count[idx], val); } static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) + enum memcg_stat_item idx) { mem_cgroup_update_stat(memcg, idx, 1); } static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) + enum memcg_stat_item idx) { mem_cgroup_update_stat(memcg, idx, -1); } @@ -538,20 +524,20 @@ static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg, * Kernel pages are an exception to this, since they'll never move. */ static inline void mem_cgroup_update_page_stat(struct page *page, - enum mem_cgroup_stat_index idx, int val) + enum memcg_stat_item idx, int val) { if (page->mem_cgroup) mem_cgroup_update_stat(page->mem_cgroup, idx, val); } static inline void mem_cgroup_inc_page_stat(struct page *page, - enum mem_cgroup_stat_index idx) + enum memcg_stat_item idx) { mem_cgroup_update_page_stat(page, idx, 1); } static inline void mem_cgroup_dec_page_stat(struct page *page, - enum mem_cgroup_stat_index idx) + enum memcg_stat_item idx) { mem_cgroup_update_page_stat(page, idx, -1); } @@ -760,33 +746,33 @@ static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, } static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx, int val) + enum memcg_stat_item idx, int val) { } static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) + enum memcg_stat_item idx) { } static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) + enum memcg_stat_item idx) { } static inline void mem_cgroup_update_page_stat(struct page *page, - enum mem_cgroup_stat_index idx, + enum memcg_stat_item idx, int nr) { } static inline void mem_cgroup_inc_page_stat(struct page *page, - enum mem_cgroup_stat_index idx) + enum memcg_stat_item idx) { } static inline void mem_cgroup_dec_page_stat(struct page *page, - enum mem_cgroup_stat_index idx) + enum memcg_stat_item idx) { } @@ -906,7 +892,7 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) * @val: number of pages (positive or negative) */ static inline void memcg_kmem_update_page_stat(struct page *page, - enum mem_cgroup_stat_index idx, int val) + enum memcg_stat_item idx, int val) { if (memcg_kmem_enabled() && page->mem_cgroup) this_cpu_add(page->mem_cgroup->stat->count[idx], val); @@ -935,7 +921,7 @@ static inline void memcg_put_cache_ids(void) } static inline void memcg_kmem_update_page_stat(struct page *page, - enum mem_cgroup_stat_index idx, int val) + enum memcg_stat_item idx, int val) { } #endif /* CONFIG_MEMCG && !CONFIG_SLOB */ -- cgit v1.2.3 From ccda7f4360be86b87497c50d1f58aab3fd85a9a5 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Wed, 3 May 2017 14:55:16 -0700 Subject: mm: memcontrol: use node page state naming scheme for memcg The memory controllers stat function names are awkwardly long and arbitrarily different from the zone and node stat functions. The current interface is named: mem_cgroup_read_stat() mem_cgroup_update_stat() mem_cgroup_inc_stat() mem_cgroup_dec_stat() mem_cgroup_update_page_stat() mem_cgroup_inc_page_stat() mem_cgroup_dec_page_stat() This patch renames it to match the corresponding node stat functions: memcg_page_state() [node_page_state()] mod_memcg_state() [mod_node_state()] inc_memcg_state() [inc_node_state()] dec_memcg_state() [dec_node_state()] mod_memcg_page_state() [mod_node_page_state()] inc_memcg_page_state() [inc_node_page_state()] dec_memcg_page_state() [dec_node_page_state()] Link: http://lkml.kernel.org/r/20170404220148.28338-4-hannes@cmpxchg.org Signed-off-by: Johannes Weiner Acked-by: Vladimir Davydov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 73 +++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0fa1f5de6841..899949bbb2f9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -472,8 +472,8 @@ extern int do_swap_account; void lock_page_memcg(struct page *page); void unlock_page_memcg(struct page *page); -static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) { long val = 0; int cpu; @@ -487,27 +487,27 @@ static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, return val; } -static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg, - enum memcg_stat_item idx, int val) +static inline void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, int val) { if (!mem_cgroup_disabled()) this_cpu_add(memcg->stat->count[idx], val); } -static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline void inc_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) { - mem_cgroup_update_stat(memcg, idx, 1); + mod_memcg_state(memcg, idx, 1); } -static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline void dec_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) { - mem_cgroup_update_stat(memcg, idx, -1); + mod_memcg_state(memcg, idx, -1); } /** - * mem_cgroup_update_page_stat - update page state statistics + * mod_memcg_page_state - update page state statistics * @page: the page * @idx: page state item to account * @val: number of pages (positive or negative) @@ -518,28 +518,28 @@ static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg, * * lock_page(page) or lock_page_memcg(page) * if (TestClearPageState(page)) - * mem_cgroup_update_page_stat(page, state, -1); + * mod_memcg_page_state(page, state, -1); * unlock_page(page) or unlock_page_memcg(page) * * Kernel pages are an exception to this, since they'll never move. */ -static inline void mem_cgroup_update_page_stat(struct page *page, - enum memcg_stat_item idx, int val) +static inline void mod_memcg_page_state(struct page *page, + enum memcg_stat_item idx, int val) { if (page->mem_cgroup) - mem_cgroup_update_stat(page->mem_cgroup, idx, val); + mod_memcg_state(page->mem_cgroup, idx, val); } -static inline void mem_cgroup_inc_page_stat(struct page *page, - enum memcg_stat_item idx) +static inline void inc_memcg_page_state(struct page *page, + enum memcg_stat_item idx) { - mem_cgroup_update_page_stat(page, idx, 1); + mod_memcg_page_state(page, idx, 1); } -static inline void mem_cgroup_dec_page_stat(struct page *page, - enum memcg_stat_item idx) +static inline void dec_memcg_page_state(struct page *page, + enum memcg_stat_item idx) { - mem_cgroup_update_page_stat(page, idx, -1); + mod_memcg_page_state(page, idx, -1); } unsigned long mem_cgroup_soft_limit_reclaim(pg_data_t *pgdat, int order, @@ -739,40 +739,41 @@ static inline bool mem_cgroup_oom_synchronize(bool wait) return false; } -static inline unsigned long mem_cgroup_read_stat(struct mem_cgroup *memcg, - enum mem_cgroup_stat_index idx) +static inline unsigned long memcg_page_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) { return 0; } -static inline void mem_cgroup_update_stat(struct mem_cgroup *memcg, - enum memcg_stat_item idx, int val) +static inline void mod_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx, + int nr) { } -static inline void mem_cgroup_inc_stat(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline void inc_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) { } -static inline void mem_cgroup_dec_stat(struct mem_cgroup *memcg, - enum memcg_stat_item idx) +static inline void dec_memcg_state(struct mem_cgroup *memcg, + enum memcg_stat_item idx) { } -static inline void mem_cgroup_update_page_stat(struct page *page, - enum memcg_stat_item idx, - int nr) +static inline void mod_memcg_page_state(struct page *page, + enum memcg_stat_item idx, + int nr) { } -static inline void mem_cgroup_inc_page_stat(struct page *page, - enum memcg_stat_item idx) +static inline void inc_memcg_page_state(struct page *page, + enum memcg_stat_item idx) { } -static inline void mem_cgroup_dec_page_stat(struct page *page, - enum memcg_stat_item idx) +static inline void dec_memcg_page_state(struct page *page, + enum memcg_stat_item idx) { } -- cgit v1.2.3 From df6b7499806bffd233e6dd0465901827b0b385b8 Mon Sep 17 00:00:00 2001 From: Huang Ying Date: Wed, 3 May 2017 14:55:19 -0700 Subject: mm, swap: remove unused function prototype This is a code cleanup patch, no functionality changes. There are 2 unused function prototype in swap.h, they are removed. Link: http://lkml.kernel.org/r/20170405071017.23677-1-ying.huang@intel.com Signed-off-by: "Huang, Ying" Cc: Tim Chen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 486494e6b2fc..ba5882419a7d 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -411,9 +411,6 @@ struct backing_dev_info; extern int init_swap_address_space(unsigned int type, unsigned long nr_pages); extern void exit_swap_address_space(unsigned int type); -extern int get_swap_slots(int n, swp_entry_t *slots); -extern void swapcache_free_batch(swp_entry_t *entries, int n); - #else /* CONFIG_SWAP */ #define swap_address_space(entry) (NULL) -- cgit v1.2.3 From 74da4a0f574d11ed60dbe50a1e5e942e20476590 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 3 Mar 2017 18:16:07 +0100 Subject: libceph, ceph: always advertise all supported features No reason to hide CephFS-specific features in the rbd case. Recent feature bits mix RADOS and CephFS-specific stuff together anyway. Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_features.h | 4 ++++ include/linux/ceph/libceph.h | 5 +---- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index ae2f66833762..fd8b2953c78f 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -105,8 +105,10 @@ static inline u64 ceph_sanitize_features(u64 features) */ #define CEPH_FEATURES_SUPPORTED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ + CEPH_FEATURE_FLOCK | \ CEPH_FEATURE_SUBSCRIBE2 | \ CEPH_FEATURE_RECONNECT_SEQ | \ + CEPH_FEATURE_DIRLAYOUTHASH | \ CEPH_FEATURE_PGID64 | \ CEPH_FEATURE_PGPOOL3 | \ CEPH_FEATURE_OSDENC | \ @@ -114,11 +116,13 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_MSG_AUTH | \ CEPH_FEATURE_CRUSH_TUNABLES2 | \ CEPH_FEATURE_REPLY_CREATE_INODE | \ + CEPH_FEATURE_MDSENC | \ CEPH_FEATURE_OSDHASHPSPOOL | \ CEPH_FEATURE_OSD_CACHEPOOL | \ CEPH_FEATURE_CRUSH_V2 | \ CEPH_FEATURE_EXPORT_PEER | \ CEPH_FEATURE_OSDMAP_ENC | \ + CEPH_FEATURE_MDS_INLINE_DATA | \ CEPH_FEATURE_CRUSH_TUNABLES3 | \ CEPH_FEATURE_OSD_PRIMARY_AFFINITY | \ CEPH_FEATURE_MSGR_KEEPALIVE2 | \ diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 88cd5dc8e238..cecbf5a26e5a 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -262,10 +262,7 @@ int ceph_print_client_options(struct seq_file *m, struct ceph_client *client); extern void ceph_destroy_options(struct ceph_options *opt); extern int ceph_compare_options(struct ceph_options *new_opt, struct ceph_client *client); -extern struct ceph_client *ceph_create_client(struct ceph_options *opt, - void *private, - u64 supported_features, - u64 required_features); +struct ceph_client *ceph_create_client(struct ceph_options *opt, void *private); struct ceph_entity_addr *ceph_client_addr(struct ceph_client *client); u64 ceph_client_gid(struct ceph_client *client); extern void ceph_destroy_client(struct ceph_client *client); -- cgit v1.2.3 From 06dfa96399a9a3280dd81e47f8696aa89f1783e7 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 17 Mar 2017 14:10:27 +0200 Subject: libceph: convert ceph_snap_context.nref from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index cecbf5a26e5a..3229ae6c7846 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -14,6 +14,7 @@ #include #include #include +#include #include #include @@ -161,7 +162,7 @@ struct ceph_client { * dirtied. */ struct ceph_snap_context { - atomic_t nref; + refcount_t nref; u64 seq; u32 num_snaps; u64 snaps[]; -- cgit v1.2.3 From 02113a0f14e20bd8e675d7cec16db6eaaf2b2380 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 17 Mar 2017 14:10:28 +0200 Subject: libceph: convert ceph_osd.o_ref from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c125b5d9e13c..d6a625e75040 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -27,7 +28,7 @@ typedef void (*ceph_osdc_callback_t)(struct ceph_osd_request *); /* a given osd we're communicating with */ struct ceph_osd { - atomic_t o_ref; + refcount_t o_ref; struct ceph_osd_client *o_osdc; int o_osd; int o_incarnation; -- cgit v1.2.3 From 0e1a5ee6577e43e5be55369d398107080b360941 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 17 Mar 2017 14:10:29 +0200 Subject: libceph: convert ceph_pagelist.refcnt from atomic_t to refcount_t refcount_t type and corresponding API should be used instead of atomic_t when the variable is used as a reference counter. This allows to avoid accidental refcounter overflows that might lead to use-after-free situations. Signed-off-by: Elena Reshetova Signed-off-by: Hans Liljestrand Signed-off-by: Kees Cook Signed-off-by: David Windsor Signed-off-by: Ilya Dryomov --- include/linux/ceph/pagelist.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/pagelist.h b/include/linux/ceph/pagelist.h index 13d71fe18b0c..75a7db21457d 100644 --- a/include/linux/ceph/pagelist.h +++ b/include/linux/ceph/pagelist.h @@ -2,7 +2,7 @@ #define __FS_CEPH_PAGELIST_H #include -#include +#include #include #include @@ -13,7 +13,7 @@ struct ceph_pagelist { size_t room; struct list_head free_list; size_t num_pages_free; - atomic_t refcnt; + refcount_t refcnt; }; struct ceph_pagelist_cursor { @@ -30,7 +30,7 @@ static inline void ceph_pagelist_init(struct ceph_pagelist *pl) pl->room = 0; INIT_LIST_HEAD(&pl->free_list); pl->num_pages_free = 0; - atomic_set(&pl->refcnt, 1); + refcount_set(&pl->refcnt, 1); } extern void ceph_pagelist_release(struct ceph_pagelist *pl); -- cgit v1.2.3 From 76201b6354bb3aa31c7ba2bd42b9cbb8dda71c44 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Tue, 28 Mar 2017 17:04:13 +0800 Subject: ceph: allow connecting to mds whose rank >= mdsmap::m_max_mds mdsmap::m_max_mds is the expected count of active mds. It's not the max rank of active mds. User can decrease mdsmap::m_max_mds, but does not stop mds whose rank >= mdsmap::m_max_mds. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/mdsmap.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/mdsmap.h b/include/linux/ceph/mdsmap.h index 8ed5dc505fbb..d5f783f3226a 100644 --- a/include/linux/ceph/mdsmap.h +++ b/include/linux/ceph/mdsmap.h @@ -25,6 +25,7 @@ struct ceph_mdsmap { u32 m_session_autoclose; /* seconds */ u64 m_max_file_size; u32 m_max_mds; /* size of m_addr, m_state arrays */ + int m_num_mds; struct ceph_mds_info *m_info; /* which object pools file data can be stored in */ @@ -40,7 +41,7 @@ struct ceph_mdsmap { static inline struct ceph_entity_addr * ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) { - if (w >= m->m_max_mds) + if (w >= m->m_num_mds) return NULL; return &m->m_info[w].addr; } @@ -48,14 +49,14 @@ ceph_mdsmap_get_addr(struct ceph_mdsmap *m, int w) static inline int ceph_mdsmap_get_state(struct ceph_mdsmap *m, int w) { BUG_ON(w < 0); - if (w >= m->m_max_mds) + if (w >= m->m_num_mds) return CEPH_MDS_STATE_DNE; return m->m_info[w].state; } static inline bool ceph_mdsmap_is_laggy(struct ceph_mdsmap *m, int w) { - if (w >= 0 && w < m->m_max_mds) + if (w >= 0 && w < m->m_num_mds) return m->m_info[w].laggy; return false; } -- cgit v1.2.3 From 79162547b76e4979b21ef80c9629ada94a51a59b Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Wed, 5 Apr 2017 12:54:05 -0400 Subject: ceph: make seeky readdir more efficient Current cephfs client uses string to indicate start position of readdir. The string is last entry of previous readdir reply. This approach does not work for seeky readdir because we can not easily convert the new postion to a string. For seeky readdir, mds needs to return dentries from the beginning. Client keeps retrying if the reply does not contain the dentry it wants. In current version of ceph, mds sorts CDentry in its cache in hash order. Client also uses dentry hash to compose dir postion. For seeky readdir, if client passes the hash part of dir postion to mds. mds can avoid replying useless dentries. Signed-off-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index f4b2ee18f38c..1787e4a8e251 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -365,6 +365,7 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_READDIR_FRAG_END (1<<0) #define CEPH_READDIR_FRAG_COMPLETE (1<<8) #define CEPH_READDIR_HASH_ORDER (1<<9) +#define CEPH_READDIR_OFFSET_HASH (1<<10) union ceph_mds_request_args { struct { @@ -384,6 +385,7 @@ union ceph_mds_request_args { __le32 max_entries; /* how many dentries to grab */ __le32 max_bytes; __le16 flags; + __le32 offset_hash; } __attribute__ ((packed)) readdir; struct { __le32 mode; -- cgit v1.2.3 From aa26d662b9d44e7f5b0d109e892e537a23471863 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 4 Apr 2017 08:39:36 -0400 Subject: libceph: remove req->r_replay_version Nothing uses this anymore with the removal of the ack vs. commit code. Remove the field and just encode zeroes into place in the request encoding. Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index d6a625e75040..3fc9e7754a9b 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -192,7 +192,6 @@ struct ceph_osd_request { unsigned long r_stamp; /* jiffies, send or check time */ unsigned long r_start_stamp; /* jiffies */ int r_attempts; - struct ceph_eversion r_replay_version; /* aka reassert_version */ u32 r_last_force_resend; u32 r_map_dne_bound; -- cgit v1.2.3 From a1f4020aab10a6bddb2d061c960ebe52cdfa30b5 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 4 Apr 2017 08:39:37 -0400 Subject: libceph: allow requests to return immediately on full conditions if caller wishes Usually, when the osd map is flagged as full or the pool is at quota, write requests just hang. This is not what we want for cephfs, where it would be better to simply report -ENOSPC back to userland instead of stalling. If the caller knows that it will want an immediate error return instead of blocking on a full or at-quota error condition then allow it to set a flag to request that behavior. Set that flag in ceph_osdc_new_request (since ceph.ko is the only caller), and on any other write request from ceph.ko. A later patch will deal with requests that were submitted before the new map showing the full condition came in. Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 3fc9e7754a9b..8cf644197b1a 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -187,6 +187,7 @@ struct ceph_osd_request { struct timespec r_mtime; /* ditto */ u64 r_data_offset; /* ditto */ bool r_linger; /* don't resend on failure */ + bool r_abort_on_full; /* return ENOSPC when full */ /* internal */ unsigned long r_stamp; /* jiffies, send or check time */ -- cgit v1.2.3 From 58eb7932ae4d671d2a2377a1779eda96a2789b11 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Tue, 18 Apr 2017 09:21:16 -0400 Subject: libceph: add an epoch_barrier field to struct ceph_osd_client Cephfs can get cap update requests that contain a new epoch barrier in them. When that happens we want to pause all OSD traffic until the right map epoch arrives. Add an epoch_barrier field to ceph_osd_client that is protected by the osdc->lock rwsem. When the barrier is set, and the current OSD map epoch is below that, pause the request target when submitting the request or when revisiting it. Add a way for upper layers (cephfs) to update the epoch_barrier as well. If we get a new map, compare the new epoch against the barrier before kicking requests and request another map if the map epoch is still lower than the one we want. If we get a map with a full pool, or at quota condition, then set the barrier to the current epoch value. Signed-off-by: Jeff Layton Reviewed-by: Ilya Dryomov Signed-off-by: Ilya Dryomov --- include/linux/ceph/osd_client.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 8cf644197b1a..85650b415e73 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -267,6 +267,7 @@ struct ceph_osd_client { struct rb_root osds; /* osds */ struct list_head osd_lru; /* idle osds */ spinlock_t osd_lru_lock; + u32 epoch_barrier; struct ceph_osd homeless_osd; atomic64_t last_tid; /* tid of last request */ u64 last_linger_id; @@ -305,6 +306,7 @@ extern void ceph_osdc_handle_reply(struct ceph_osd_client *osdc, struct ceph_msg *msg); extern void ceph_osdc_handle_map(struct ceph_osd_client *osdc, struct ceph_msg *msg); +void ceph_osdc_update_epoch_barrier(struct ceph_osd_client *osdc, u32 eb); extern void osd_req_op_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u32 flags); -- cgit v1.2.3 From 14bb211d324d6c8140167bd6b2b8a80757348a2f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 13 Apr 2017 12:17:38 +0200 Subject: rbd: support updating the lock cookie without releasing the lock As we no longer release the lock before potentially raising BLACKLISTED in rbd_reregister_watch(), the "either locked or blacklisted" assert in rbd_queue_workfn() needs to go: we can be both locked and blacklisted at that point now. Signed-off-by: Ilya Dryomov Reviewed-by: Jason Dillaman --- include/linux/ceph/cls_lock_client.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/cls_lock_client.h b/include/linux/ceph/cls_lock_client.h index 84884d8d4710..0594d3bba774 100644 --- a/include/linux/ceph/cls_lock_client.h +++ b/include/linux/ceph/cls_lock_client.h @@ -37,6 +37,11 @@ int ceph_cls_break_lock(struct ceph_osd_client *osdc, struct ceph_object_locator *oloc, char *lock_name, char *cookie, struct ceph_entity_name *locker); +int ceph_cls_set_cookie(struct ceph_osd_client *osdc, + struct ceph_object_id *oid, + struct ceph_object_locator *oloc, + char *lock_name, u8 type, char *old_cookie, + char *tag, char *new_cookie); void ceph_free_lockers(struct ceph_locker *lockers, u32 num_lockers); -- cgit v1.2.3 From f775ff7d89f33fc9ba63f6f70df3bcc98c2d9828 Mon Sep 17 00:00:00 2001 From: Alexander Graf Date: Thu, 27 Apr 2017 18:34:00 +0200 Subject: ceph: fix file open flags on ppc64 The file open flags (O_foo) are platform specific and should never go out to an interface that is not local to the system. Unfortunately these flags have leaked out onto the wire in the cephfs implementation. That lead to bogus flags getting transmitted on ppc64. This patch converts the kernel view of flags to the ceph view of file open flags. Fixes: 124e68e74 ("ceph: file operations") Signed-off-by: Alexander Graf Reviewed-by: "Yan, Zheng" Signed-off-by: Ilya Dryomov --- include/linux/ceph/ceph_fs.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 1787e4a8e251..ad078ebe25d6 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -367,6 +367,18 @@ extern const char *ceph_mds_op_name(int op); #define CEPH_READDIR_HASH_ORDER (1<<9) #define CEPH_READDIR_OFFSET_HASH (1<<10) +/* + * open request flags + */ +#define CEPH_O_RDONLY 00000000 +#define CEPH_O_WRONLY 00000001 +#define CEPH_O_RDWR 00000002 +#define CEPH_O_CREAT 00000100 +#define CEPH_O_EXCL 00000200 +#define CEPH_O_TRUNC 00001000 +#define CEPH_O_DIRECTORY 00200000 +#define CEPH_O_NOFOLLOW 00400000 + union ceph_mds_request_args { struct { __le32 mask; /* CEPH_CAP_* */ -- cgit v1.2.3 From 44f0aeec203738bf34f4b7e16b745c8c71fe0f06 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 2 Apr 2017 16:50:33 +0200 Subject: dmaengine: pl080: Cut some unused defines There is no in-kernel code using these indexed register defines, and their offsets are clearly defined right below. Cut them. Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- include/linux/amba/pl080.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h index 91b84a7f0539..800429f4b997 100644 --- a/include/linux/amba/pl080.h +++ b/include/linux/amba/pl080.h @@ -46,16 +46,8 @@ /* Per channel configuration registers */ -#define PL080_Cx_STRIDE (0x20) +/* Per channel configuration registers */ #define PL080_Cx_BASE(x) ((0x100 + (x * 0x20))) -#define PL080_Cx_SRC_ADDR(x) ((0x100 + (x * 0x20))) -#define PL080_Cx_DST_ADDR(x) ((0x104 + (x * 0x20))) -#define PL080_Cx_LLI(x) ((0x108 + (x * 0x20))) -#define PL080_Cx_CONTROL(x) ((0x10C + (x * 0x20))) -#define PL080_Cx_CONFIG(x) ((0x110 + (x * 0x20))) -#define PL080S_Cx_CONTROL2(x) ((0x110 + (x * 0x20))) -#define PL080S_Cx_CONFIG(x) ((0x114 + (x * 0x20))) - #define PL080_CH_SRC_ADDR (0x00) #define PL080_CH_DST_ADDR (0x04) #define PL080_CH_LLI (0x08) -- cgit v1.2.3 From ded091fee6806b7120f475d89c151d611758a395 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 2 Apr 2017 16:50:53 +0200 Subject: dmaengine: pl08x: Use the BIT() macro consistently This makes the driver shift bits with BIT() which is used on other places in the driver. Signed-off-by: Linus Walleij Signed-off-by: Vinod Koul --- include/linux/amba/pl080.h | 40 ++++++++++++++++++++-------------------- 1 file changed, 20 insertions(+), 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/pl080.h b/include/linux/amba/pl080.h index 800429f4b997..580b5323a717 100644 --- a/include/linux/amba/pl080.h +++ b/include/linux/amba/pl080.h @@ -38,9 +38,9 @@ #define PL080_SOFT_LSREQ (0x2C) #define PL080_CONFIG (0x30) -#define PL080_CONFIG_M2_BE (1 << 2) -#define PL080_CONFIG_M1_BE (1 << 1) -#define PL080_CONFIG_ENABLE (1 << 0) +#define PL080_CONFIG_M2_BE BIT(2) +#define PL080_CONFIG_M1_BE BIT(1) +#define PL080_CONFIG_ENABLE BIT(0) #define PL080_SYNC (0x34) @@ -58,18 +58,18 @@ #define PL080_LLI_ADDR_MASK (0x3fffffff << 2) #define PL080_LLI_ADDR_SHIFT (2) -#define PL080_LLI_LM_AHB2 (1 << 0) +#define PL080_LLI_LM_AHB2 BIT(0) -#define PL080_CONTROL_TC_IRQ_EN (1 << 31) +#define PL080_CONTROL_TC_IRQ_EN BIT(31) #define PL080_CONTROL_PROT_MASK (0x7 << 28) #define PL080_CONTROL_PROT_SHIFT (28) -#define PL080_CONTROL_PROT_CACHE (1 << 30) -#define PL080_CONTROL_PROT_BUFF (1 << 29) -#define PL080_CONTROL_PROT_SYS (1 << 28) -#define PL080_CONTROL_DST_INCR (1 << 27) -#define PL080_CONTROL_SRC_INCR (1 << 26) -#define PL080_CONTROL_DST_AHB2 (1 << 25) -#define PL080_CONTROL_SRC_AHB2 (1 << 24) +#define PL080_CONTROL_PROT_CACHE BIT(30) +#define PL080_CONTROL_PROT_BUFF BIT(29) +#define PL080_CONTROL_PROT_SYS BIT(28) +#define PL080_CONTROL_DST_INCR BIT(27) +#define PL080_CONTROL_SRC_INCR BIT(26) +#define PL080_CONTROL_DST_AHB2 BIT(25) +#define PL080_CONTROL_SRC_AHB2 BIT(24) #define PL080_CONTROL_DWIDTH_MASK (0x7 << 21) #define PL080_CONTROL_DWIDTH_SHIFT (21) #define PL080_CONTROL_SWIDTH_MASK (0x7 << 18) @@ -95,20 +95,20 @@ #define PL080_WIDTH_16BIT (0x1) #define PL080_WIDTH_32BIT (0x2) -#define PL080N_CONFIG_ITPROT (1 << 20) -#define PL080N_CONFIG_SECPROT (1 << 19) -#define PL080_CONFIG_HALT (1 << 18) -#define PL080_CONFIG_ACTIVE (1 << 17) /* RO */ -#define PL080_CONFIG_LOCK (1 << 16) -#define PL080_CONFIG_TC_IRQ_MASK (1 << 15) -#define PL080_CONFIG_ERR_IRQ_MASK (1 << 14) +#define PL080N_CONFIG_ITPROT BIT(20) +#define PL080N_CONFIG_SECPROT BIT(19) +#define PL080_CONFIG_HALT BIT(18) +#define PL080_CONFIG_ACTIVE BIT(17) /* RO */ +#define PL080_CONFIG_LOCK BIT(16) +#define PL080_CONFIG_TC_IRQ_MASK BIT(15) +#define PL080_CONFIG_ERR_IRQ_MASK BIT(14) #define PL080_CONFIG_FLOW_CONTROL_MASK (0x7 << 11) #define PL080_CONFIG_FLOW_CONTROL_SHIFT (11) #define PL080_CONFIG_DST_SEL_MASK (0xf << 6) #define PL080_CONFIG_DST_SEL_SHIFT (6) #define PL080_CONFIG_SRC_SEL_MASK (0xf << 1) #define PL080_CONFIG_SRC_SEL_SHIFT (1) -#define PL080_CONFIG_ENABLE (1 << 0) +#define PL080_CONFIG_ENABLE BIT(0) #define PL080_FLOW_MEM2MEM (0x0) #define PL080_FLOW_MEM2PER (0x1) -- cgit v1.2.3 From 9c1051aacde828073dbbab5e8e59c0fc802efa9a Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 08:17:21 -0600 Subject: blk-mq: untangle debugfs and sysfs Originally, I tied debugfs registration/unregistration together with sysfs. There's no reason to do this, and it's getting in the way of letting schedulers define their own debugfs attributes. Instead, tie the debugfs registration to the lifetime of the structures themselves. The saner lifetimes mean we can also get rid of the extra mq directory and move everything one level up. I.e., nvme0n1/mq/hctx0/tags is now just nvme0n1/hctx0/tags. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++++ include/linux/blkdev.h | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a104832e7ae5..de8ed9aaa156 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -57,6 +57,10 @@ struct blk_mq_hw_ctx { unsigned long poll_considered; unsigned long poll_invoked; unsigned long poll_success; + +#ifdef CONFIG_BLK_DEBUG_FS + struct dentry *debugfs_dir; +#endif }; struct blk_mq_tag_set { diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 83d28623645f..b49a79a29e58 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -579,7 +579,6 @@ struct request_queue { #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; - struct dentry *mq_debugfs_dir; #endif bool mq_sysfs_init_done; -- cgit v1.2.3 From d332ce091813d11a46144354baa72b755833392f Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Thu, 4 May 2017 08:24:40 -0600 Subject: blk-mq-debugfs: allow schedulers to register debugfs attributes This provides the infrastructure for schedulers to expose their internal state through debugfs. We add a list of queue attributes and a list of hctx attributes to struct elevator_type and wire them up when switching schedulers. Signed-off-by: Omar Sandoval Reviewed-by: Hannes Reinecke Add missing seq_file.h header in blk-mq-debugfs.h Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 1 + include/linux/elevator.h | 7 +++++++ 3 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index de8ed9aaa156..c47aa248c640 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -60,6 +60,7 @@ struct blk_mq_hw_ctx { #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; + struct dentry *sched_debugfs_dir; #endif }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b49a79a29e58..80ae958717a1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -579,6 +579,7 @@ struct request_queue { #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; + struct dentry *sched_debugfs_dir; #endif bool mq_sysfs_init_done; diff --git a/include/linux/elevator.h b/include/linux/elevator.h index d44840368ee7..9ec5e22846e0 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -8,6 +8,9 @@ struct io_cq; struct elevator_type; +#ifdef CONFIG_BLK_DEBUG_FS +struct blk_mq_debugfs_attr; +#endif /* * Return values from elevator merger @@ -144,6 +147,10 @@ struct elevator_type char elevator_name[ELV_NAME_MAX]; struct module *elevator_owner; bool uses_mq; +#ifdef CONFIG_BLK_DEBUG_FS + const struct blk_mq_debugfs_attr *queue_debugfs_attrs; + const struct blk_mq_debugfs_attr *hctx_debugfs_attrs; +#endif /* managed by elevator core */ char icq_cache_name[ELV_NAME_MAX + 5]; /* elvname + "_io_cq" */ -- cgit v1.2.3 From 17159420a6c18bb3515ff85598b5ccf1a572763d Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 24 Apr 2017 10:00:10 -0700 Subject: fscrypt: introduce helper function for filename matching Introduce a helper function fscrypt_match_name() which tests whether a fscrypt_name matches a directory entry. Also clean up the magic numbers and document things properly. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_notsupp.h | 9 +++++ include/linux/fscrypt_supp.h | 78 +++++++++++++++++++++++++++++++++++++++++ 2 files changed, 87 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 3511ca798804..ec406aed2f2f 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -147,6 +147,15 @@ static inline int fscrypt_fname_usr_to_disk(struct inode *inode, return -EOPNOTSUPP; } +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + /* Encryption support disabled; use standard comparison */ + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} + /* bio.c */ static inline void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *ctx, struct bio *bio) diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index a140f47e9b27..e12c224a0d1e 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -57,6 +57,84 @@ extern int fscrypt_fname_disk_to_usr(struct inode *, u32, u32, extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, struct fscrypt_str *); +#define FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE 32 + +/* Extracts the second-to-last ciphertext block; see explanation below */ +#define FSCRYPT_FNAME_DIGEST(name, len) \ + ((name) + round_down((len) - FS_CRYPTO_BLOCK_SIZE - 1, \ + FS_CRYPTO_BLOCK_SIZE)) + +#define FSCRYPT_FNAME_DIGEST_SIZE FS_CRYPTO_BLOCK_SIZE + +/** + * fscrypt_digested_name - alternate identifier for an on-disk filename + * + * When userspace lists an encrypted directory without access to the key, + * filenames whose ciphertext is longer than FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE + * bytes are shown in this abbreviated form (base64-encoded) rather than as the + * full ciphertext (base64-encoded). This is necessary to allow supporting + * filenames up to NAME_MAX bytes, since base64 encoding expands the length. + * + * To make it possible for filesystems to still find the correct directory entry + * despite not knowing the full on-disk name, we encode any filesystem-specific + * 'hash' and/or 'minor_hash' which the filesystem may need for its lookups, + * followed by the second-to-last ciphertext block of the filename. Due to the + * use of the CBC-CTS encryption mode, the second-to-last ciphertext block + * depends on the full plaintext. (Note that ciphertext stealing causes the + * last two blocks to appear "flipped".) This makes collisions very unlikely: + * just a 1 in 2^128 chance for two filenames to collide even if they share the + * same filesystem-specific hashes. + * + * This scheme isn't strictly immune to intentional collisions because it's + * basically like a CBC-MAC, which isn't secure on variable-length inputs. + * However, generating a CBC-MAC collision requires the ability to choose + * arbitrary ciphertext, which won't normally be possible with filename + * encryption since it would require write access to the raw disk. + * + * Taking a real cryptographic hash like SHA-256 over the full ciphertext would + * be better in theory but would be less efficient and more complicated to + * implement, especially since the filesystem would need to calculate it for + * each directory entry examined during a search. + */ +struct fscrypt_digested_name { + u32 hash; + u32 minor_hash; + u8 digest[FSCRYPT_FNAME_DIGEST_SIZE]; +}; + +/** + * fscrypt_match_name() - test whether the given name matches a directory entry + * @fname: the name being searched for + * @de_name: the name from the directory entry + * @de_name_len: the length of @de_name in bytes + * + * Normally @fname->disk_name will be set, and in that case we simply compare + * that to the name stored in the directory entry. The only exception is that + * if we don't have the key for an encrypted directory and a filename in it is + * very long, then we won't have the full disk_name and we'll instead need to + * match against the fscrypt_digested_name. + * + * Return: %true if the name matches, otherwise %false. + */ +static inline bool fscrypt_match_name(const struct fscrypt_name *fname, + const u8 *de_name, u32 de_name_len) +{ + if (unlikely(!fname->disk_name.name)) { + const struct fscrypt_digested_name *n = + (const void *)fname->crypto_buf.name; + if (WARN_ON_ONCE(fname->usr_fname->name[0] != '_')) + return false; + if (de_name_len <= FSCRYPT_FNAME_MAX_UNDIGESTED_SIZE) + return false; + return !memcmp(FSCRYPT_FNAME_DIGEST(de_name, de_name_len), + n->digest, FSCRYPT_FNAME_DIGEST_SIZE); + } + + if (de_name_len != fname->disk_name.len) + return false; + return !memcmp(de_name, fname->disk_name.name, fname->disk_name.len); +} + /* bio.c */ extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); extern void fscrypt_pullback_bio_page(struct page **, bool); -- cgit v1.2.3 From 6f9d696f016f5b42f6c6e8c9f723f8d3380e5903 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 1 May 2017 11:43:32 -0700 Subject: fscrypt: correct collision claim for digested names As I noted on the mailing list, it's easier than I originally thought to create intentional collisions in the digested names. Unfortunately it's not too easy to solve this, so for now just fix the comment to not lie. Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_supp.h | 22 +++++++++------------- 1 file changed, 9 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index e12c224a0d1e..cd4e82c17304 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -81,20 +81,16 @@ extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, * followed by the second-to-last ciphertext block of the filename. Due to the * use of the CBC-CTS encryption mode, the second-to-last ciphertext block * depends on the full plaintext. (Note that ciphertext stealing causes the - * last two blocks to appear "flipped".) This makes collisions very unlikely: - * just a 1 in 2^128 chance for two filenames to collide even if they share the - * same filesystem-specific hashes. + * last two blocks to appear "flipped".) This makes accidental collisions very + * unlikely: just a 1 in 2^128 chance for two filenames to collide even if they + * share the same filesystem-specific hashes. * - * This scheme isn't strictly immune to intentional collisions because it's - * basically like a CBC-MAC, which isn't secure on variable-length inputs. - * However, generating a CBC-MAC collision requires the ability to choose - * arbitrary ciphertext, which won't normally be possible with filename - * encryption since it would require write access to the raw disk. - * - * Taking a real cryptographic hash like SHA-256 over the full ciphertext would - * be better in theory but would be less efficient and more complicated to - * implement, especially since the filesystem would need to calculate it for - * each directory entry examined during a search. + * However, this scheme isn't immune to intentional collisions, which can be + * created by anyone able to create arbitrary plaintext filenames and view them + * without the key. Making the "digest" be a real cryptographic hash like + * SHA-256 over the full ciphertext would prevent this, although it would be + * less efficient and harder to implement, especially since the filesystem would + * need to calculate it for each directory entry examined during a search. */ struct fscrypt_digested_name { u32 hash; -- cgit v1.2.3 From f870a3c6727db5fcfeaa42d099f75872e4b17553 Mon Sep 17 00:00:00 2001 From: "sudarsana.kalluru@cavium.com" Date: Thu, 4 May 2017 08:15:03 -0700 Subject: qed*: Fix possible overflow for status block id field. Value for status block id could be more than 256 in 100G mode, need to update its data type from u8 to u16. Signed-off-by: Sudarsana Reddy Kalluru Signed-off-by: Yuval Mintz Signed-off-by: David S. Miller --- include/linux/qed/qed_if.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h index 5544d7b2f2bb..c70ac13a97e6 100644 --- a/include/linux/qed/qed_if.h +++ b/include/linux/qed/qed_if.h @@ -635,7 +635,7 @@ struct qed_common_ops { * @return 0 on success, error otherwise. */ int (*set_coalesce)(struct qed_dev *cdev, u16 rx_coal, u16 tx_coal, - u8 qid, u16 sb_id); + u16 qid, u16 sb_id); /** * @brief set_led - Configure LED mode -- cgit v1.2.3 From 17a70355ea576843a7ac851f1db26872a50b2850 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 4 May 2017 12:56:12 -0500 Subject: of: fix sparse warnings in fdt, irq, reserved mem, and resolver code sparse generates the following warnings in drivers/of/: ../drivers/of/fdt.c:63:36: warning: cast to restricted __be32 ../drivers/of/fdt.c:68:33: warning: cast to restricted __be32 ../drivers/of/irq.c:105:88: warning: incorrect type in initializer (different base types) ../drivers/of/irq.c:105:88: expected restricted __be32 ../drivers/of/irq.c:105:88: got int ../drivers/of/irq.c:526:35: warning: incorrect type in assignment (different modifiers) ../drivers/of/irq.c:526:35: expected int ( *const [usertype] irq_init_cb )( ... ) ../drivers/of/irq.c:526:35: got void const *const data ../drivers/of/of_reserved_mem.c:200:50: warning: incorrect type in initializer (different modifiers) ../drivers/of/of_reserved_mem.c:200:50: expected int ( *[usertype] initfn )( ... ) ../drivers/of/of_reserved_mem.c:200:50: got void const *const data ../drivers/of/resolver.c:95:42: warning: incorrect type in assignment (different base types) ../drivers/of/resolver.c:95:42: expected unsigned int [unsigned] [usertype] ../drivers/of/resolver.c:95:42: got restricted __be32 [usertype] All these are harmless type mismatches fixed by adjusting the types. Signed-off-by: Rob Herring --- include/linux/of_irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 1e0deb8e8494..ec6b11deb773 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -8,7 +8,7 @@ #include #include -typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *); +typedef int const (*of_irq_init_cb_t)(struct device_node *, struct device_node *); /* * Workarounds only applied to 32bit powermac machines -- cgit v1.2.3 From 693dfd5a3f19efc44acf3a57217c0480e414f8ee Mon Sep 17 00:00:00 2001 From: Erez Shitrit Date: Thu, 27 Apr 2017 17:01:34 +0300 Subject: IB/mlx5: Enable IPoIB acceleration Enable mlx5 IPoIB acceleration by declaring mlx5_ib_{alloc,free}_rdma_netdev and assigning the mlx5 IPoIB rdma_netdev callbacks. In addition, this patch brings in sync mlx5's IPoIB parts for net and IB trees. As a precaution, we disabled IPoIB acceleration by default (in the mlx5_core Kconfig file). Signed-off-by: Saeed Mahameed Signed-off-by: Erez Shitrit Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/driver.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3fece51dcf13..cef2b98d479f 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1102,6 +1102,25 @@ struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); +#ifndef CONFIG_MLX5_CORE_IPOIB +static inline +struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)) +{ + return ERR_PTR(-EOPNOTSUPP); +} + +static inline void mlx5_rdma_netdev_free(struct net_device *netdev) {} +#else +struct net_device *mlx5_rdma_netdev_alloc(struct mlx5_core_dev *mdev, + struct ib_device *ibdev, + const char *name, + void (*setup)(struct net_device *)); +void mlx5_rdma_netdev_free(struct net_device *netdev); +#endif /* CONFIG_MLX5_CORE_IPOIB */ + struct mlx5_profile { u64 mask; u8 log_max_qp; -- cgit v1.2.3 From 8f078b38dd382710884ce7abd31a1935c440e6f8 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 4 May 2017 14:01:24 -0700 Subject: libnvdimm: convert NDD_ flags to use bitops, introduce NDD_LOCKED This is a preparation patch for handling locked nvdimm label regions, a new concept as introduced by the latest DSM document on pmem.io [1]. A future patch will leverage nvdimm_set_locked() at DIMM probe time to flag regions that can not be enabled. There should be no functional difference resulting from this change. [1]: http://pmem.io/documents/NVDIMM_DSM_Interface_Example-V1.3.pdf Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index f07b1b14159a..6c807017128d 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -20,9 +20,11 @@ enum { /* when a dimm supports both PMEM and BLK access a label is required */ - NDD_ALIASING = 1 << 0, + NDD_ALIASING = 0, /* unarmed memory devices may not persist writes */ - NDD_UNARMED = 1 << 1, + NDD_UNARMED = 1, + /* locked memory devices should not be accessed */ + NDD_LOCKED = 2, /* need to set a limit somewhere, but yes, this is likely overkill */ ND_IOCTL_MAX_BUFLEN = SZ_4M, -- cgit v1.2.3 From deccf497d804a4c5fca2dbfad2f104675a6f9102 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 4 May 2017 23:30:16 +0100 Subject: Make stat/lstat/fstatat pass AT_NO_AUTOMOUNT to vfs_statx() stat/lstat/fstatat need to pass AT_NO_AUTOMOUNT to vfs_statx() as the pre-statx code didn't set LOOKUP_AUTOMOUNT, even though fstatat() accepted the AT_NO_AUTOMOUNT flag. Fixes: a528d35e8bfc ("statx: Add a system call to make enhanced file info available") Reported-by: Ian Kent Signed-off-by: David Howells Tested-by: Ian Kent cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/linux/fs.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index fc1b4faa6272..866c955314db 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2920,17 +2920,19 @@ extern int vfs_statx_fd(unsigned int, struct kstat *, u32, unsigned int); static inline int vfs_stat(const char __user *filename, struct kstat *stat) { - return vfs_statx(AT_FDCWD, filename, 0, stat, STATX_BASIC_STATS); + return vfs_statx(AT_FDCWD, filename, AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_lstat(const char __user *name, struct kstat *stat) { - return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW, + return vfs_statx(AT_FDCWD, name, AT_SYMLINK_NOFOLLOW | AT_NO_AUTOMOUNT, stat, STATX_BASIC_STATS); } static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { -- cgit v1.2.3 From 2be83da85a64773efaa407639de81bd1377f880e Mon Sep 17 00:00:00 2001 From: Lukasz Luba Date: Thu, 4 May 2017 12:34:32 +0100 Subject: thermal: devfreq_cooling: add new interface for direct power read This patch introduces a new interface for device drivers connected to devfreq_cooling in the thermal framework: get_real_power(). Some devices have more sophisticated methods (like power counters) to approximate the actual power that they use. In the previous implementation we had a pre-calculated power table which was then scaled by 'utilization' ('busy_time' and 'total_time' taken from devfreq 'last_status'). With this new interface the driver can provide more precise data regarding actual power to the thermal governor every time the power budget is calculated. We then use this value and calculate the real resource utilization scaling factor. Reviewed-by: Chris Diamand Acked-by: Javi Merino Signed-off-by: Lukasz Luba --- include/linux/devfreq_cooling.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index c35d0c0e0ada..4635f95000a4 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -34,6 +34,23 @@ * If get_dynamic_power() is NULL, then the * dynamic power is calculated as * @dyn_power_coeff * frequency * voltage^2 + * @get_real_power: When this is set, the framework uses it to ask the + * device driver for the actual power. + * Some devices have more sophisticated methods + * (like power counters) to approximate the actual power + * that they use. + * This function provides more accurate data to the + * thermal governor. When the driver does not provide + * such function, framework just uses pre-calculated + * table and scale the power by 'utilization' + * (based on 'busy_time' and 'total_time' taken from + * devfreq 'last_status'). + * The value returned by this function must be lower + * or equal than the maximum power value + * for the current state + * (which can be found in power_table[state]). + * When this interface is used, the power_table holds + * max total (static + dynamic) power value for each OPP. */ struct devfreq_cooling_power { unsigned long (*get_static_power)(struct devfreq *devfreq, @@ -41,6 +58,8 @@ struct devfreq_cooling_power { unsigned long (*get_dynamic_power)(struct devfreq *devfreq, unsigned long freq, unsigned long voltage); + int (*get_real_power)(struct devfreq *df, u32 *power, + unsigned long freq, unsigned long voltage); unsigned long dyn_power_coeff; }; -- cgit v1.2.3 From 8a537ece3d946227e4afa81eae0e43fa47439c7d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 26 Apr 2017 23:22:09 +0200 Subject: PM / wakeup: Integrate mechanism to abort transitions in progress The system wakeup framework is not very consistent with respect to the way it handles suspend-to-idle and generally wakeup events occurring during transitions to system low-power states. First off, system transitions in progress are aborted by the event reporting helpers like pm_wakeup_event() only if the wakeup_count sysfs attribute is in use (as documented), but there are cases in which system-wide transitions should be aborted even if that is not the case. For example, a wakeup signal from a designated wakeup device during system-wide PM transition, it should cause the transition to be aborted right away. Moreover, there is a freeze_wake() call in wakeup_source_activate(), but that really is only effective after suspend_freeze_state has been set to FREEZE_STATE_ENTER by freeze_enter(). However, it is very unlikely that wakeup_source_activate() will ever be called at that time, as it could only be triggered by a IRQF_NO_SUSPEND interrupt handler, so wakeups from suspend-to-idle don't really occur in wakeup_source_activate(). At the same time there is a way to abort a system suspend in progress (or wake up the system from suspend-to-idle), which is by calling pm_system_wakeup(), but in turn that doesn't cause any wakeup source objects to be activated, so it will not be covered by wakeup source statistics and will not prevent the system from suspending again immediately (in case autosleep is used, for example). Consequently, if anyone wants to abort system transitions in progress and allow the wakeup_count mechanism to work, they need to use both pm_system_wakeup() and pm_wakeup_event(), say, at the same time which is awkward. For the above reasons, make it possible to trigger pm_system_wakeup() from within wakeup_source_activate() and provide a new pm_wakeup_hard_event() helper to do so within the wakeup framework. Signed-off-by: Rafael J. Wysocki --- include/linux/pm_wakeup.h | 25 +++++++++++++++++++++---- 1 file changed, 21 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_wakeup.h b/include/linux/pm_wakeup.h index a3447932df1f..4c2cba7ec1d4 100644 --- a/include/linux/pm_wakeup.h +++ b/include/linux/pm_wakeup.h @@ -106,8 +106,8 @@ extern void __pm_stay_awake(struct wakeup_source *ws); extern void pm_stay_awake(struct device *dev); extern void __pm_relax(struct wakeup_source *ws); extern void pm_relax(struct device *dev); -extern void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec); -extern void pm_wakeup_event(struct device *dev, unsigned int msec); +extern void pm_wakeup_ws_event(struct wakeup_source *ws, unsigned int msec, bool hard); +extern void pm_wakeup_dev_event(struct device *dev, unsigned int msec, bool hard); #else /* !CONFIG_PM_SLEEP */ @@ -182,9 +182,11 @@ static inline void __pm_relax(struct wakeup_source *ws) {} static inline void pm_relax(struct device *dev) {} -static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) {} +static inline void pm_wakeup_ws_event(struct wakeup_source *ws, + unsigned int msec, bool hard) {} -static inline void pm_wakeup_event(struct device *dev, unsigned int msec) {} +static inline void pm_wakeup_dev_event(struct device *dev, unsigned int msec, + bool hard) {} #endif /* !CONFIG_PM_SLEEP */ @@ -201,4 +203,19 @@ static inline void wakeup_source_trash(struct wakeup_source *ws) wakeup_source_drop(ws); } +static inline void __pm_wakeup_event(struct wakeup_source *ws, unsigned int msec) +{ + return pm_wakeup_ws_event(ws, msec, false); +} + +static inline void pm_wakeup_event(struct device *dev, unsigned int msec) +{ + return pm_wakeup_dev_event(dev, msec, false); +} + +static inline void pm_wakeup_hard_event(struct device *dev) +{ + return pm_wakeup_dev_event(dev, 0, true); +} + #endif /* _LINUX_PM_WAKEUP_H */ -- cgit v1.2.3 From eed4d47efe9508b855b09754cf6de4325d8a2f0d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 26 Apr 2017 23:23:03 +0200 Subject: ACPI / sleep: Ignore spurious SCI wakeups from suspend-to-idle The ACPI SCI (System Control Interrupt) is set up as a wakeup IRQ during suspend-to-idle transitions and, consequently, any events signaled through it wake up the system from that state. However, on some systems some of the events signaled via the ACPI SCI while suspended to idle should not cause the system to wake up. In fact, quite often they should just be discarded. Arguably, systems should not resume entirely on such events, but in order to decide which events really should cause the system to resume and which are spurious, it is necessary to resume up to the point when ACPI SCIs are actually handled and processed, which is after executing dpm_resume_noirq() in the system resume path. For this reasons, add a loop around freeze_enter() in which the platforms can process events signaled via multiplexed IRQ lines like the ACPI SCI and add suspend-to-idle hooks that can be used for this purpose to struct platform_freeze_ops. In the ACPI case, the ->wake hook is used for checking if the SCI has triggered while suspended and deferring the interrupt-induced system wakeup until the events signaled through it are actually processed sufficiently to decide whether or not the system should resume. In turn, the ->sync hook allows all of the relevant event queues to be flushed so as to prevent events from being missed due to race conditions. In addition to that, some ACPI code processing wakeup events needs to be modified to use the "hard" version of wakeup triggers, so that it will cause a system resume to happen on device-induced wakeup events even if the "soft" mechanism to prevent the system from suspending is not enabled (that also helps to catch device-induced wakeup events occurring during suspend transitions in progress). Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index d9718378a8be..0b1cf32edfd7 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -189,6 +189,8 @@ struct platform_suspend_ops { struct platform_freeze_ops { int (*begin)(void); int (*prepare)(void); + void (*wake)(void); + void (*sync)(void); void (*restore)(void); void (*end)(void); }; @@ -428,7 +430,8 @@ extern unsigned int pm_wakeup_irq; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); -extern void pm_wakeup_clear(void); +extern void pm_system_cancel_wakeup(void); +extern void pm_wakeup_clear(bool reset); extern void pm_system_irq_wakeup(unsigned int irq_number); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); @@ -478,7 +481,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline bool pm_wakeup_pending(void) { return false; } static inline void pm_system_wakeup(void) {} -static inline void pm_wakeup_clear(void) {} +static inline void pm_wakeup_clear(bool reset) {} static inline void pm_system_irq_wakeup(unsigned int irq_number) {} static inline void lock_system_sleep(void) {} -- cgit v1.2.3 From 71afe470e20db133b30730cfa856e5d6854312e9 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 13 Apr 2017 09:06:20 +0200 Subject: KVM: arm64: vgic-its: Introduce migration ABI infrastructure We plan to support different migration ABIs, ie. characterizing the ITS table layout format in guest RAM. For example, a new ABI will be needed if vLPIs get supported for nested use case. So let's introduce an array of supported ABIs (at the moment a single ABI is supported though). The following characteristics are foreseen to vary with the ABI: size of table entries, save/restore operation, the way abi settings are applied. By default the MAX_ABI_REV is applied on its creation. In subsequent patches we will introduce a way for the userspace to change the ABI in use. The entry sizes now are set according to the ABI version and not hardcoded anymore. Signed-off-by: Eric Auger Reviewed-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 97cbca19430d..81ebe437ccc3 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -132,6 +132,9 @@ #define GIC_BASER_SHAREABILITY(reg, type) \ (GIC_BASER_##type << reg##_SHAREABILITY_SHIFT) +/* encode a size field of width @w containing @n - 1 units */ +#define GIC_ENCODE_SZ(n, w) (((unsigned long)(n) - 1) & GENMASK_ULL(((w) - 1), 0)) + #define GICR_PROPBASER_SHAREABILITY_SHIFT (10) #define GICR_PROPBASER_INNER_CACHEABILITY_SHIFT (7) #define GICR_PROPBASER_OUTER_CACHEABILITY_SHIFT (56) @@ -232,6 +235,7 @@ #define GITS_CTLR_QUIESCENT (1U << 31) #define GITS_TYPER_PLPIS (1UL << 0) +#define GITS_TYPER_ITT_ENTRY_SIZE_SHIFT 4 #define GITS_TYPER_IDBITS_SHIFT 8 #define GITS_TYPER_DEVBITS_SHIFT 13 #define GITS_TYPER_DEVBITS(r) ((((r) >> GITS_TYPER_DEVBITS_SHIFT) & 0x1f) + 1) @@ -290,6 +294,7 @@ #define GITS_BASER_TYPE(r) (((r) >> GITS_BASER_TYPE_SHIFT) & 7) #define GITS_BASER_ENTRY_SIZE_SHIFT (48) #define GITS_BASER_ENTRY_SIZE(r) ((((r) >> GITS_BASER_ENTRY_SIZE_SHIFT) & 0x1f) + 1) +#define GITS_BASER_ENTRY_SIZE_MASK GENMASK_ULL(52, 48) #define GITS_BASER_SHAREABILITY_SHIFT (10) #define GITS_BASER_InnerShareable \ GIC_BASER_SHAREABILITY(GITS_BASER, InnerShareable) -- cgit v1.2.3 From ab01c6bdacc43c41c6b326889f4358f5afc38bf9 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 23 Mar 2017 15:14:00 +0100 Subject: KVM: arm64: vgic-its: Implement vgic_mmio_uaccess_write_its_iidr The GITS_IIDR revision field is used to encode the migration ABI revision. So we need to restore it to check the table layout is readable by the destination. By writing the IIDR, userspace thus forces the ABI revision to be used and this must be less than or equal to the max revision KVM supports. Signed-off-by: Eric Auger Reviewed-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 81ebe437ccc3..2eaea308f003 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -242,6 +242,11 @@ #define GITS_TYPER_PTA (1UL << 19) #define GITS_TYPER_HWCOLLCNT_SHIFT 24 +#define GITS_IIDR_REV_SHIFT 12 +#define GITS_IIDR_REV_MASK (0xf << GITS_IIDR_REV_SHIFT) +#define GITS_IIDR_REV(r) (((r) >> GITS_IIDR_REV_SHIFT) & 0xf) +#define GITS_IIDR_PRODUCTID_SHIFT 24 + #define GITS_CBASER_VALID (1ULL << 63) #define GITS_CBASER_SHAREABILITY_SHIFT (10) #define GITS_CBASER_INNER_CACHEABILITY_SHIFT (59) -- cgit v1.2.3 From 0d44cdb631ef53ea75be056886cf0541311e48df Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 22 Dec 2016 18:14:14 +0100 Subject: KVM: arm64: vgic-its: Interpret MAPD Size field and check related errors Up to now the MAPD's ITT size field has been ignored. It encodes the number of eventid bit minus 1. It should be used to check the eventid when a MAPTI command is issued on a device. Let's store the number of eventid bits in the its_device and do the check on MAPTI. Also make sure the ITT size field does not exceed the GITS_TYPER IDBITS field. Signed-off-by: Eric Auger Reviewed-by: Christoffer Dall Reviewed-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 2eaea308f003..be8bad00c419 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -347,9 +347,11 @@ #define E_ITS_INT_UNMAPPED_INTERRUPT 0x010307 #define E_ITS_CLEAR_UNMAPPED_INTERRUPT 0x010507 #define E_ITS_MAPD_DEVICE_OOR 0x010801 +#define E_ITS_MAPD_ITTSIZE_OOR 0x010802 #define E_ITS_MAPC_PROCNUM_OOR 0x010902 #define E_ITS_MAPC_COLLECTION_OOR 0x010903 #define E_ITS_MAPTI_UNMAPPED_DEVICE 0x010a04 +#define E_ITS_MAPTI_ID_OOR 0x010a05 #define E_ITS_MAPTI_PHYSICALID_OOR 0x010a06 #define E_ITS_INV_UNMAPPED_INTERRUPT 0x010c07 #define E_ITS_INVALL_UNMAPPED_COLLECTION 0x010d09 -- cgit v1.2.3 From 44de9d683847ba6dbac290bb8c9f1b773cbda746 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Thu, 4 May 2017 11:19:52 +0200 Subject: KVM: arm64: vgic-v3: vgic_v3_lpi_sync_pending_status this new helper synchronizes the irq pending_latch with the LPI pending bit status found in rdist pending table. As the status is consumed, we reset the bit in pending table. As we need the PENDBASER_ADDRESS() in vgic-v3, let's move its definition in the irqchip header. We restore the full length of the field, ie [51:16]. Same for PROPBASER_ADDRESS with full field length of [51:12]. Signed-off-by: Eric Auger Reviewed-by: Marc Zyngier Reviewed-by: Christoffer Dall --- include/linux/irqchip/arm-gic-v3.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index be8bad00c419..fffb91202bc9 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -159,6 +159,8 @@ #define GICR_PROPBASER_RaWaWb GIC_BASER_CACHEABILITY(GICR_PROPBASER, INNER, RaWaWb) #define GICR_PROPBASER_IDBITS_MASK (0x1f) +#define GICR_PROPBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 12)) +#define GICR_PENDBASER_ADDRESS(x) ((x) & GENMASK_ULL(51, 16)) #define GICR_PENDBASER_SHAREABILITY_SHIFT (10) #define GICR_PENDBASER_INNER_CACHEABILITY_SHIFT (7) -- cgit v1.2.3 From ef51042472f55b325fd7f2b26a2e29fd89757234 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 8 May 2017 10:55:27 -0700 Subject: block, dax: move "select DAX" from BLOCK to FS_DAX For configurations that do not enable DAX filesystems or drivers, do not require the DAX core to be built. Given that the 'direct_access' method has been removed from 'block_device_operations', we can also go ahead and remove the block-related dax helper functions from fs/block_dev.c to drivers/dax/super.c. This keeps dax details out of the block layer and lets the DAX core be built as a module in the FS_DAX=n case. Filesystems need to include dax.h to call bdev_dax_supported(). Cc: linux-xfs@vger.kernel.org Cc: Jens Axboe Cc: "Theodore Ts'o" Cc: Matthew Wilcox Cc: Alexander Viro Cc: "Darrick J. Wong" Cc: Ross Zwisler Reviewed-by: Jan Kara Reported-by: Geert Uytterhoeven Signed-off-by: Dan Williams --- include/linux/blkdev.h | 2 -- include/linux/dax.h | 30 ++++++++++++++++++++++++++++-- 2 files changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 848f87eb1905..e4d9899755a7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1940,8 +1940,6 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -extern int bdev_dax_supported(struct super_block *, int); -int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #else /* CONFIG_BLOCK */ struct block_device; diff --git a/include/linux/dax.h b/include/linux/dax.h index d3158e74a59e..7fdf1d710042 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -18,12 +18,38 @@ struct dax_operations { void **, pfn_t *); }; +int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); +#if IS_ENABLED(CONFIG_FS_DAX) +int __bdev_dax_supported(struct super_block *sb, int blocksize); +static inline int bdev_dax_supported(struct super_block *sb, int blocksize) +{ + return __bdev_dax_supported(sb, blocksize); +} +#else +static inline int bdev_dax_supported(struct super_block *sb, int blocksize) +{ + return -EOPNOTSUPP; +} +#endif + +#if IS_ENABLED(CONFIG_DAX) +struct dax_device *dax_get_by_host(const char *host); +void put_dax(struct dax_device *dax_dev); +#else +static inline struct dax_device *dax_get_by_host(const char *host) +{ + return NULL; +} + +static inline void put_dax(struct dax_device *dax_dev) +{ +} +#endif + int dax_read_lock(void); void dax_read_unlock(int id); -struct dax_device *dax_get_by_host(const char *host); struct dax_device *alloc_dax(void *private, const char *host, const struct dax_operations *ops); -void put_dax(struct dax_device *dax_dev); bool dax_alive(struct dax_device *dax_dev); void kill_dax(struct dax_device *dax_dev); void *dax_get_private(struct dax_device *dax_dev); -- cgit v1.2.3 From e092693443b995c8e3a565a73b5fdb05f1260f9b Mon Sep 17 00:00:00 2001 From: Olga Kornievskaia Date: Mon, 8 May 2017 18:02:24 -0400 Subject: NFS append COMMIT after synchronous COPY Instead of messing with the commit path which has been causing issues, add a COMMIT op after the COPY and ask for stable copies in the first space. It saves a round trip, since after the COPY, the client sends a COMMIT anyway. Signed-off-by: Olga Kornievskaia Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 677c6b91dfcd..b28c83475ee8 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1383,6 +1383,7 @@ struct nfs42_copy_res { struct nfs42_write_res write_res; bool consecutive; bool synchronous; + struct nfs_commitres commit_res; }; struct nfs42_seek_args { -- cgit v1.2.3 From 02aa0cdd72483c6dd436ed24d1000f86e0038d28 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 8 May 2017 15:54:40 -0700 Subject: mm, page_alloc: count movable pages when stealing from pageblock When stealing pages from pageblock of a different migratetype, we count how many free pages were stolen, and change the pageblock's migratetype if more than half of the pageblock was free. This might be too conservative, as there might be other pages that are not free, but were allocated with the same migratetype as our allocation requested. While we cannot determine the migratetype of allocated pages precisely (at least without the page_owner functionality enabled), we can count pages that compaction would try to isolate for migration - those are either on LRU or __PageMovable(). The rest can be assumed to be MIGRATE_RECLAIMABLE or MIGRATE_UNMOVABLE, which we cannot easily distinguish. This counting can be done as part of free page stealing with little additional overhead. The page stealing code is changed so that it considers free pages plus pages of the "good" migratetype for the decision whether to change pageblock's migratetype. The result should be more accurate migratetype of pageblocks wrt the actual pages in the pageblocks, when stealing from semi-occupied pageblocks. This should help the efficiency of page grouping by mobility. In testing based on 4.9 kernel with stress-highalloc from mmtests configured for order-4 GFP_KERNEL allocations, this patch has reduced the number of unmovable allocations falling back to movable pageblocks by 47%. The number of movable allocations falling back to other pageblocks are increased by 55%, but these events don't cause permanent fragmentation, so the tradeoff should be positive. Later patches also offset the movable fallback increase to some extent. [akpm@linux-foundation.org: merge fix] Link: http://lkml.kernel.org/r/20170307131545.28577-5-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Cc: Johannes Weiner Cc: Joonsoo Kim Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-isolation.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 047d64706f2a..d4cd2014fa6f 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -33,10 +33,7 @@ bool has_unmovable_pages(struct zone *zone, struct page *page, int count, bool skip_hwpoisoned_pages); void set_pageblock_migratetype(struct page *page, int migratetype); int move_freepages_block(struct zone *zone, struct page *page, - int migratetype); -int move_freepages(struct zone *zone, - struct page *start_page, struct page *end_page, - int migratetype); + int migratetype, int *num_movable); /* * Changes migrate type in [start_pfn, end_pfn) to be MIGRATE_ISOLATE. -- cgit v1.2.3 From b682debd97153706ffbe2fe3f8ec30a7ee11f9e1 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 8 May 2017 15:54:43 -0700 Subject: mm, compaction: change migrate_async_suitable() to suitable_migration_source() Preparation for making the decisions more complex and depending on compact_control flags. No functional change. Link: http://lkml.kernel.org/r/20170307131545.28577-6-vbabka@suse.cz Signed-off-by: Vlastimil Babka Acked-by: Mel Gorman Acked-by: Johannes Weiner Cc: Joonsoo Kim Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e0c3c5e3d8a0..ebaccd4e7d8c 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -74,6 +74,11 @@ extern char * const migratetype_names[MIGRATE_TYPES]; # define is_migrate_cma_page(_page) false #endif +static inline bool is_migrate_movable(int mt) +{ + return is_migrate_cma(mt) || mt == MIGRATE_MOVABLE; +} + #define for_each_migratetype_order(order, type) \ for (order = 0; order < MAX_ORDER; order++) \ for (type = 0; type < MIGRATE_TYPES; type++) -- cgit v1.2.3 From 7c30f352c852bae2715ad65ac4a38ca9af7d7696 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 8 May 2017 15:55:05 -0700 Subject: jiffies.h: declare jiffies and jiffies_64 with ____cacheline_aligned_in_smp jiffies_64 is defined in kernel/time/timer.c with ____cacheline_aligned_in_smp, however this macro is not part of the declaration of jiffies and jiffies_64 in jiffies.h. As a result clang generates the following warning: kernel/time/timer.c:57:26: error: section does not match previous declaration [-Werror,-Wsection] __visible u64 jiffies_64 __cacheline_aligned_in_smp = INITIAL_JIFFIES; ^ include/linux/cache.h:39:36: note: expanded from macro '__cacheline_aligned_in_smp' ^ include/linux/cache.h:34:4: note: expanded from macro '__cacheline_aligned' __section__(".data..cacheline_aligned"))) ^ include/linux/jiffies.h:77:12: note: previous attribute is here extern u64 __jiffy_data jiffies_64; ^ include/linux/jiffies.h:70:38: note: expanded from macro '__jiffy_data' Link: http://lkml.kernel.org/r/20170403190200.70273-1-mka@chromium.org Signed-off-by: Matthias Kaehlcke Cc: "Jason A . Donenfeld" Cc: Grant Grundler Cc: Michael Davidson Cc: Greg Hackmann Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/jiffies.h | 11 +++-------- 1 file changed, 3 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jiffies.h b/include/linux/jiffies.h index 624215cebee5..36872fbb815d 100644 --- a/include/linux/jiffies.h +++ b/include/linux/jiffies.h @@ -1,6 +1,7 @@ #ifndef _LINUX_JIFFIES_H #define _LINUX_JIFFIES_H +#include #include #include #include @@ -63,19 +64,13 @@ extern int register_refined_jiffies(long clock_tick_rate); /* TICK_USEC is the time between ticks in usec assuming fake USER_HZ */ #define TICK_USEC ((1000000UL + USER_HZ/2) / USER_HZ) -/* some arch's have a small-data section that can be accessed register-relative - * but that can only take up to, say, 4-byte variables. jiffies being part of - * an 8-byte variable may not be correctly accessed unless we force the issue - */ -#define __jiffy_data __attribute__((section(".data"))) - /* * The 64-bit value is not atomic - you MUST NOT read it * without sampling the sequence number in jiffies_lock. * get_jiffies_64() will do this for you as appropriate. */ -extern u64 __jiffy_data jiffies_64; -extern unsigned long volatile __jiffy_data jiffies; +extern u64 __cacheline_aligned_in_smp jiffies_64; +extern unsigned long volatile __cacheline_aligned_in_smp jiffies; #if (BITS_PER_LONG < 64) u64 get_jiffies_64(void); -- cgit v1.2.3 From c311c797998c1e70eade463dd60b843da4f1a203 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Mon, 8 May 2017 15:56:15 -0700 Subject: cpumask: make "nr_cpumask_bits" unsigned Bit searching functions accept "unsigned long" indices but "nr_cpumask_bits" is "int" which is signed, so inevitable sign extensions occur on x86_64. Those MOVSX are #1 MOVSX bloat by number of uses across whole kernel. Change "nr_cpumask_bits" to unsigned, this number can't be negative after all. It allows to do implicit zero-extension on x86_64 without MOVSX. Change signed comparisons into unsigned comparisons where necessary. Other uses looks fine because it is either argument passed to a function or comparison is already unsigned. Net win on allyesconfig type of kernel: ~2.8 KB (!) add/remove: 0/0 grow/shrink: 8/725 up/down: 93/-2926 (-2833) function old new delta xen_exit_mmap 691 735 +44 qstat_read 426 440 +14 __cpufreq_cooling_register 1678 1687 +9 trace_rb_cpu_prepare 447 455 +8 vermagic 54 60 +6 nfp_driver_version 54 60 +6 rcu_torture_stats_print 1147 1151 +4 find_next_push_cpu 267 269 +2 xen_irq_resume 961 960 -1 ... init_vp_index 946 906 -40 od_set_powersave_bias 328 281 -47 power_cpu_exit 193 139 -54 arch_show_interrupts 3538 3484 -54 select_idle_sibling 1558 1471 -87 Total: Before=158358910, After=158356077, chg -0.00% Same arguments apply to "nr_cpu_ids" but I haven't yet found enough courage to delve into this issue (and proper fix may require new type "cpu_t" which is whole separate story). Link: http://lkml.kernel.org/r/20170309205322.GA1728@avx2 Signed-off-by: Alexey Dobriyan Cc: Rusty Russell Cc: Heiko Carstens Cc: Martin Schwidefsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpumask.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index 1a675604b17d..2404ad238c0b 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -40,9 +40,9 @@ extern int nr_cpu_ids; #ifdef CONFIG_CPUMASK_OFFSTACK /* Assuming NR_CPUS is huge, a runtime limit is more efficient. Also, * not all bits may be allocated. */ -#define nr_cpumask_bits nr_cpu_ids +#define nr_cpumask_bits ((unsigned int)nr_cpu_ids) #else -#define nr_cpumask_bits NR_CPUS +#define nr_cpumask_bits ((unsigned int)NR_CPUS) #endif /* -- cgit v1.2.3 From 692f66f26a4c19d73249736aa973c13a1521b387 Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Mon, 8 May 2017 15:56:18 -0700 Subject: crash: move crashkernel parsing and vmcore related code under CONFIG_CRASH_CORE Patch series "kexec/fadump: remove dependency with CONFIG_KEXEC and reuse crashkernel parameter for fadump", v4. Traditionally, kdump is used to save vmcore in case of a crash. Some architectures like powerpc can save vmcore using architecture specific support instead of kexec/kdump mechanism. Such architecture specific support also needs to reserve memory, to be used by dump capture kernel. crashkernel parameter can be a reused, for memory reservation, by such architecture specific infrastructure. This patchset removes dependency with CONFIG_KEXEC for crashkernel parameter and vmcoreinfo related code as it can be reused without kexec support. Also, crashkernel parameter is reused instead of fadump_reserve_mem to reserve memory for fadump. The first patch moves crashkernel parameter parsing and vmcoreinfo related code under CONFIG_CRASH_CORE instead of CONFIG_KEXEC_CORE. The second patch reuses the definitions of append_elf_note() & final_note() functions under CONFIG_CRASH_CORE in IA64 arch code. The third patch removes dependency on CONFIG_KEXEC for firmware-assisted dump (fadump) in powerpc. The next patch reuses crashkernel parameter for reserving memory for fadump, instead of the fadump_reserve_mem parameter. This has the advantage of using all syntaxes crashkernel parameter supports, for fadump as well. The last patch updates fadump kernel documentation about use of crashkernel parameter. This patch (of 5): Traditionally, kdump is used to save vmcore in case of a crash. Some architectures like powerpc can save vmcore using architecture specific support instead of kexec/kdump mechanism. Such architecture specific support also needs to reserve memory, to be used by dump capture kernel. crashkernel parameter can be a reused, for memory reservation, by such architecture specific infrastructure. But currently, code related to vmcoreinfo and parsing of crashkernel parameter is built under CONFIG_KEXEC_CORE. This patch introduces CONFIG_CRASH_CORE and moves the above mentioned code under this config, allowing code reuse without dependency on CONFIG_KEXEC. There is no functional change with this patch. Link: http://lkml.kernel.org/r/149035338104.6881.4550894432615189948.stgit@hbathini.in.ibm.com Signed-off-by: Hari Bathini Acked-by: Dave Young Cc: Fenghua Yu Cc: Tony Luck Cc: Eric Biederman Cc: Mahesh Salgaonkar Cc: Vivek Goyal Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 65 ++++++++++++++++++++++++++++++++++++++++++++++ include/linux/kexec.h | 57 +++------------------------------------- include/linux/printk.h | 4 +-- 3 files changed, 71 insertions(+), 55 deletions(-) create mode 100644 include/linux/crash_core.h (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h new file mode 100644 index 000000000000..18d0f946fda3 --- /dev/null +++ b/include/linux/crash_core.h @@ -0,0 +1,65 @@ +#ifndef LINUX_CRASH_CORE_H +#define LINUX_CRASH_CORE_H + +#include +#include +#include + +#define CRASH_CORE_NOTE_NAME "CORE" +#define CRASH_CORE_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) +#define CRASH_CORE_NOTE_NAME_BYTES ALIGN(sizeof(CRASH_CORE_NOTE_NAME), 4) +#define CRASH_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) + +#define CRASH_CORE_NOTE_BYTES ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + CRASH_CORE_NOTE_NAME_BYTES + \ + CRASH_CORE_NOTE_DESC_BYTES) + +#define VMCOREINFO_BYTES (4096) +#define VMCOREINFO_NOTE_NAME "VMCOREINFO" +#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) +#define VMCOREINFO_NOTE_SIZE ((CRASH_CORE_NOTE_HEAD_BYTES * 2) + \ + VMCOREINFO_NOTE_NAME_BYTES + \ + VMCOREINFO_BYTES) + +typedef u32 note_buf_t[CRASH_CORE_NOTE_BYTES/4]; + +void crash_save_vmcoreinfo(void); +void arch_crash_save_vmcoreinfo(void); +__printf(1, 2) +void vmcoreinfo_append_str(const char *fmt, ...); +phys_addr_t paddr_vmcoreinfo_note(void); + +#define VMCOREINFO_OSRELEASE(value) \ + vmcoreinfo_append_str("OSRELEASE=%s\n", value) +#define VMCOREINFO_PAGESIZE(value) \ + vmcoreinfo_append_str("PAGESIZE=%ld\n", value) +#define VMCOREINFO_SYMBOL(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) +#define VMCOREINFO_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(name)) +#define VMCOREINFO_STRUCT_SIZE(name) \ + vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ + (unsigned long)sizeof(struct name)) +#define VMCOREINFO_OFFSET(name, field) \ + vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ + (unsigned long)offsetof(struct name, field)) +#define VMCOREINFO_LENGTH(name, value) \ + vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) +#define VMCOREINFO_NUMBER(name) \ + vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) +#define VMCOREINFO_CONFIG(name) \ + vmcoreinfo_append_str("CONFIG_%s=y\n", #name) + +extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; +extern size_t vmcoreinfo_size; +extern size_t vmcoreinfo_max_size; + +int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); +int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); +int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, + unsigned long long *crash_size, unsigned long long *crash_base); + +#endif /* LINUX_CRASH_CORE_H */ diff --git a/include/linux/kexec.h b/include/linux/kexec.h index d419d0e51fe5..c9481ebcbc0c 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -14,17 +14,15 @@ #if !defined(__ASSEMBLY__) +#include #include #include #ifdef CONFIG_KEXEC_CORE #include -#include #include #include -#include -#include #include #include @@ -62,19 +60,15 @@ #define KEXEC_CRASH_MEM_ALIGN PAGE_SIZE #endif -#define KEXEC_NOTE_HEAD_BYTES ALIGN(sizeof(struct elf_note), 4) -#define KEXEC_CORE_NOTE_NAME "CORE" -#define KEXEC_CORE_NOTE_NAME_BYTES ALIGN(sizeof(KEXEC_CORE_NOTE_NAME), 4) -#define KEXEC_CORE_NOTE_DESC_BYTES ALIGN(sizeof(struct elf_prstatus), 4) +#define KEXEC_CORE_NOTE_NAME CRASH_CORE_NOTE_NAME + /* * The per-cpu notes area is a list of notes terminated by a "NULL" * note header. For kdump, the code in vmcore.c runs in the context * of the second kernel to combine them into one note. */ #ifndef KEXEC_NOTE_BYTES -#define KEXEC_NOTE_BYTES ( (KEXEC_NOTE_HEAD_BYTES * 2) + \ - KEXEC_CORE_NOTE_NAME_BYTES + \ - KEXEC_CORE_NOTE_DESC_BYTES ) +#define KEXEC_NOTE_BYTES CRASH_CORE_NOTE_BYTES #endif /* @@ -256,33 +250,6 @@ extern void crash_kexec(struct pt_regs *); int kexec_should_crash(struct task_struct *); int kexec_crash_loaded(void); void crash_save_cpu(struct pt_regs *regs, int cpu); -void crash_save_vmcoreinfo(void); -void arch_crash_save_vmcoreinfo(void); -__printf(1, 2) -void vmcoreinfo_append_str(const char *fmt, ...); -phys_addr_t paddr_vmcoreinfo_note(void); - -#define VMCOREINFO_OSRELEASE(value) \ - vmcoreinfo_append_str("OSRELEASE=%s\n", value) -#define VMCOREINFO_PAGESIZE(value) \ - vmcoreinfo_append_str("PAGESIZE=%ld\n", value) -#define VMCOREINFO_SYMBOL(name) \ - vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) -#define VMCOREINFO_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(name)) -#define VMCOREINFO_STRUCT_SIZE(name) \ - vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ - (unsigned long)sizeof(struct name)) -#define VMCOREINFO_OFFSET(name, field) \ - vmcoreinfo_append_str("OFFSET(%s.%s)=%lu\n", #name, #field, \ - (unsigned long)offsetof(struct name, field)) -#define VMCOREINFO_LENGTH(name, value) \ - vmcoreinfo_append_str("LENGTH(%s)=%lu\n", #name, (unsigned long)value) -#define VMCOREINFO_NUMBER(name) \ - vmcoreinfo_append_str("NUMBER(%s)=%ld\n", #name, (long)name) -#define VMCOREINFO_CONFIG(name) \ - vmcoreinfo_append_str("CONFIG_%s=y\n", #name) extern struct kimage *kexec_image; extern struct kimage *kexec_crash_image; @@ -303,31 +270,15 @@ extern int kexec_load_disabled; #define KEXEC_FILE_FLAGS (KEXEC_FILE_UNLOAD | KEXEC_FILE_ON_CRASH | \ KEXEC_FILE_NO_INITRAMFS) -#define VMCOREINFO_BYTES (4096) -#define VMCOREINFO_NOTE_NAME "VMCOREINFO" -#define VMCOREINFO_NOTE_NAME_BYTES ALIGN(sizeof(VMCOREINFO_NOTE_NAME), 4) -#define VMCOREINFO_NOTE_SIZE (KEXEC_NOTE_HEAD_BYTES*2 + VMCOREINFO_BYTES \ - + VMCOREINFO_NOTE_NAME_BYTES) - /* Location of a reserved region to hold the crash kernel. */ extern struct resource crashk_res; extern struct resource crashk_low_res; -typedef u32 note_buf_t[KEXEC_NOTE_BYTES/4]; extern note_buf_t __percpu *crash_notes; -extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; -extern size_t vmcoreinfo_size; -extern size_t vmcoreinfo_max_size; /* flag to track if kexec reboot is in progress */ extern bool kexec_in_progress; -int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); -int parse_crashkernel_low(char *cmdline, unsigned long long system_ram, - unsigned long long *crash_size, unsigned long long *crash_base); int crash_shrink_memory(unsigned long new_size); size_t crash_get_memory_size(void); void crash_free_reserved_phys_range(unsigned long begin, unsigned long end); diff --git a/include/linux/printk.h b/include/linux/printk.h index 571257e0f53d..e10f27468322 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -198,7 +198,7 @@ extern void wake_up_klogd(void); char *log_buf_addr_get(void); u32 log_buf_len_get(void); -void log_buf_kexec_setup(void); +void log_buf_vmcoreinfo_setup(void); void __init setup_log_buf(int early); __printf(1, 2) void dump_stack_set_arch_desc(const char *fmt, ...); void dump_stack_print_info(const char *log_lvl); @@ -246,7 +246,7 @@ static inline u32 log_buf_len_get(void) return 0; } -static inline void log_buf_kexec_setup(void) +static inline void log_buf_vmcoreinfo_setup(void) { } -- cgit v1.2.3 From 51dbd92520d4344fef78481b1bcbc3a7de32b69b Mon Sep 17 00:00:00 2001 From: Hari Bathini Date: Mon, 8 May 2017 15:56:21 -0700 Subject: ia64: reuse append_elf_note() and final_note() functions Get rid of multiple definitions of append_elf_note() & final_note() functions. Reuse these functions compiled under CONFIG_CRASH_CORE Also, define Elf_Word and use it instead of generic u32 or the more specific Elf64_Word. Link: http://lkml.kernel.org/r/149035342324.6881.11667840929850361402.stgit@hbathini.in.ibm.com Signed-off-by: Hari Bathini Acked-by: Dave Young Acked-by: Tony Luck Cc: Fenghua Yu Cc: Eric Biederman Cc: Mahesh Salgaonkar Cc: Vivek Goyal Cc: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 4 ++++ include/linux/elf.h | 2 ++ 2 files changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 18d0f946fda3..541a197ba4a2 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -55,6 +55,10 @@ extern u32 vmcoreinfo_note[VMCOREINFO_NOTE_SIZE/4]; extern size_t vmcoreinfo_size; extern size_t vmcoreinfo_max_size; +Elf_Word *append_elf_note(Elf_Word *buf, char *name, unsigned int type, + void *data, size_t data_len); +void final_note(Elf_Word *buf); + int __init parse_crashkernel(char *cmdline, unsigned long long system_ram, unsigned long long *crash_size, unsigned long long *crash_base); int parse_crashkernel_high(char *cmdline, unsigned long long system_ram, diff --git a/include/linux/elf.h b/include/linux/elf.h index 20fa8d8ae313..ba069e8f4f78 100644 --- a/include/linux/elf.h +++ b/include/linux/elf.h @@ -29,6 +29,7 @@ extern Elf32_Dyn _DYNAMIC []; #define elf_note elf32_note #define elf_addr_t Elf32_Off #define Elf_Half Elf32_Half +#define Elf_Word Elf32_Word #else @@ -39,6 +40,7 @@ extern Elf64_Dyn _DYNAMIC []; #define elf_note elf64_note #define elf_addr_t Elf64_Off #define Elf_Half Elf64_Half +#define Elf_Word Elf64_Word #endif -- cgit v1.2.3 From 25b14e92af1a563c7331466ca59188f88050bbf0 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 8 May 2017 15:56:38 -0700 Subject: ns: allow ns_entries to have custom symlink content Patch series "Expose task pid_ns_for_children to userspace". pid_ns_for_children set by a task is known only to the task itself, and it's impossible to identify it from outside. It's a big problem for checkpoint/restore software like CRIU, because it can't correctly handle tasks, that do setns(CLONE_NEWPID) in proccess of their work. If they have a custom pid_ns_for_children before dump, they must have the same ns after restore. Otherwise, restored task bumped into enviroment it does not expect. This patchset solves the problem. It exposes pid_ns_for_children to ns directory in standard way with the name "pid_for_children": ~# ls /proc/5531/ns -l | grep pid lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836] lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286] This patch (of 2): Make possible to have link content prefix yyy different from the link name xxx: $ readlink /proc/[pid]/ns/xxx yyy:[4026531838] This will be used in next patch. Link: http://lkml.kernel.org/r/149201120318.6007.7362655181033883000.stgit@localhost.localdomain Signed-off-by: Kirill Tkhai Reviewed-by: Cyrill Gorcunov Acked-by: Andrei Vagin Cc: Andreas Gruenbacher Cc: Kees Cook Cc: Michael Kerrisk Cc: Al Viro Cc: Oleg Nesterov Cc: Paul Moore Cc: Eric Biederman Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_ns.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 12cb8bd81d2d..88dba3b53375 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -14,6 +14,7 @@ struct inode; struct proc_ns_operations { const char *name; + const char *real_ns_name; int type; struct ns_common *(*get)(struct task_struct *task); void (*put)(struct ns_common *ns); -- cgit v1.2.3 From eaa0d190bfe1ed891b814a52712dcd852554cb08 Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Mon, 8 May 2017 15:56:41 -0700 Subject: pidns: expose task pid_ns_for_children to userspace pid_ns_for_children set by a task is known only to the task itself, and it's impossible to identify it from outside. It's a big problem for checkpoint/restore software like CRIU, because it can't correctly handle tasks, that do setns(CLONE_NEWPID) in proccess of their work. This patch solves the problem, and it exposes pid_ns_for_children to ns directory in standard way with the name "pid_for_children": ~# ls /proc/5531/ns -l | grep pid lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid -> pid:[4026531836] lrwxrwxrwx 1 root root 0 Jan 14 16:38 pid_for_children -> pid:[4026532286] Link: http://lkml.kernel.org/r/149201123914.6007.2187327078064239572.stgit@localhost.localdomain Signed-off-by: Kirill Tkhai Cc: Andrei Vagin Cc: Andreas Gruenbacher Cc: Kees Cook Cc: Michael Kerrisk Cc: Al Viro Cc: Oleg Nesterov Cc: Paul Moore Cc: Eric Biederman Cc: Andy Lutomirski Cc: Ingo Molnar Cc: Serge Hallyn Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/proc_ns.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/proc_ns.h b/include/linux/proc_ns.h index 88dba3b53375..58ab28d81fc2 100644 --- a/include/linux/proc_ns.h +++ b/include/linux/proc_ns.h @@ -27,6 +27,7 @@ extern const struct proc_ns_operations netns_operations; extern const struct proc_ns_operations utsns_operations; extern const struct proc_ns_operations ipcns_operations; extern const struct proc_ns_operations pidns_operations; +extern const struct proc_ns_operations pidns_for_children_operations; extern const struct proc_ns_operations userns_operations; extern const struct proc_ns_operations mntns_operations; extern const struct proc_ns_operations cgroupns_operations; -- cgit v1.2.3 From 60f3e00d25b44e3aa51846590d1e10f408466a83 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 8 May 2017 15:57:06 -0700 Subject: sysv,ipc: cacheline align kern_ipc_perm Assign 'struct kern_ipc_perm' its own cacheline to avoid false sharing with sysv ipc calls. While the structure itself is rather read-mostly throughout the lifespan of ipc, the spinlock causes most of the invalidations. One example is commit 31a7c4746e9 ("ipc/sem.c: cacheline align the ipc spinlock for semaphores"). Therefore, extend this to all ipc. The effect of cacheline alignment on sems can be seen in sembench, which deals mostly with semtimedop wait/wakes is seen to improve raw throughput (worker loops) between 8 to 12% on a 24-core x86 with over 4 threads. Link: http://lkml.kernel.org/r/1486673582-6979-4-git-send-email-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc.h | 7 +++---- include/linux/sem.h | 3 +-- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipc.h b/include/linux/ipc.h index 9d84942ae2e5..71fd92d81b26 100644 --- a/include/linux/ipc.h +++ b/include/linux/ipc.h @@ -8,8 +8,7 @@ #define IPCMNI 32768 /* <= MAX_INT limit for ipc arrays (including sysctl changes) */ /* used by in-kernel data structures */ -struct kern_ipc_perm -{ +struct kern_ipc_perm { spinlock_t lock; bool deleted; int id; @@ -18,9 +17,9 @@ struct kern_ipc_perm kgid_t gid; kuid_t cuid; kgid_t cgid; - umode_t mode; + umode_t mode; unsigned long seq; void *security; -}; +} ____cacheline_aligned_in_smp; #endif /* _LINUX_IPC_H */ diff --git a/include/linux/sem.h b/include/linux/sem.h index 4fc222f8755d..9edec926e9d9 100644 --- a/include/linux/sem.h +++ b/include/linux/sem.h @@ -10,8 +10,7 @@ struct task_struct; /* One sem_array data structure for each set of semaphores in the system. */ struct sem_array { - struct kern_ipc_perm ____cacheline_aligned_in_smp - sem_perm; /* permissions .. see ipc.h */ + struct kern_ipc_perm sem_perm; /* permissions .. see ipc.h */ time_t sem_ctime; /* last change time */ struct sem *sem_base; /* ptr to first semaphore in array */ struct list_head pending_alter; /* pending operations */ -- cgit v1.2.3 From a7c3e901a46ff54c016d040847eda598a9e3e653 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 May 2017 15:57:09 -0700 Subject: mm: introduce kv[mz]alloc helpers Patch series "kvmalloc", v5. There are many open coded kmalloc with vmalloc fallback instances in the tree. Most of them are not careful enough or simply do not care about the underlying semantic of the kmalloc/page allocator which means that a) some vmalloc fallbacks are basically unreachable because the kmalloc part will keep retrying until it succeeds b) the page allocator can invoke a really disruptive steps like the OOM killer to move forward which doesn't sound appropriate when we consider that the vmalloc fallback is available. As it can be seen implementing kvmalloc requires quite an intimate knowledge if the page allocator and the memory reclaim internals which strongly suggests that a helper should be implemented in the memory subsystem proper. Most callers, I could find, have been converted to use the helper instead. This is patch 6. There are some more relying on __GFP_REPEAT in the networking stack which I have converted as well and Eric Dumazet was not opposed [2] to convert them as well. [1] http://lkml.kernel.org/r/20170130094940.13546-1-mhocko@kernel.org [2] http://lkml.kernel.org/r/1485273626.16328.301.camel@edumazet-glaptop3.roam.corp.google.com This patch (of 9): Using kmalloc with the vmalloc fallback for larger allocations is a common pattern in the kernel code. Yet we do not have any common helper for that and so users have invented their own helpers. Some of them are really creative when doing so. Let's just add kv[mz]alloc and make sure it is implemented properly. This implementation makes sure to not make a large memory pressure for > PAGE_SZE requests (__GFP_NORETRY) and also to not warn about allocation failures. This also rules out the OOM killer as the vmalloc is a more approapriate fallback than a disruptive user visible action. This patch also changes some existing users and removes helpers which are specific for them. In some cases this is not possible (e.g. ext4_kvmalloc, libcfs_kvzalloc) because those seems to be broken and require GFP_NO{FS,IO} context which is not vmalloc compatible in general (note that the page table allocation is GFP_KERNEL). Those need to be fixed separately. While we are at it, document that __vmalloc{_node} about unsupported gfp mask because there seems to be a lot of confusion out there. kvmalloc_node will warn about GFP_KERNEL incompatible (which are not superset) flags to catch new abusers. Existing ones would have to die slowly. [sfr@canb.auug.org.au: f2fs fixup] Link: http://lkml.kernel.org/r/20170320163735.332e64b7@canb.auug.org.au Link: http://lkml.kernel.org/r/20170306103032.2540-2-mhocko@kernel.org Signed-off-by: Michal Hocko Signed-off-by: Stephen Rothwell Reviewed-by: Andreas Dilger [ext4 part] Acked-by: Vlastimil Babka Cc: John Hubbard Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kvm_host.h | 2 -- include/linux/mm.h | 14 ++++++++++++++ include/linux/vmalloc.h | 1 + 3 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d0250744507a..5d9b2a08e553 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -767,8 +767,6 @@ void kvm_arch_check_processor_compat(void *rtn); int kvm_arch_vcpu_runnable(struct kvm_vcpu *vcpu); int kvm_arch_vcpu_should_kick(struct kvm_vcpu *vcpu); -void *kvm_kvzalloc(unsigned long size); - #ifndef __KVM_HAVE_ARCH_VM_ALLOC static inline struct kvm *kvm_arch_alloc_vm(void) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 5d22e69f51ea..08e2849d27ca 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -518,6 +518,20 @@ static inline int is_vmalloc_or_module_addr(const void *x) } #endif +extern void *kvmalloc_node(size_t size, gfp_t flags, int node); +static inline void *kvmalloc(size_t size, gfp_t flags) +{ + return kvmalloc_node(size, flags, NUMA_NO_NODE); +} +static inline void *kvzalloc_node(size_t size, gfp_t flags, int node) +{ + return kvmalloc_node(size, flags | __GFP_ZERO, node); +} +static inline void *kvzalloc(size_t size, gfp_t flags) +{ + return kvmalloc(size, flags | __GFP_ZERO); +} + extern void kvfree(const void *addr); static inline atomic_t *compound_mapcount_ptr(struct page *page) diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index d68edffbf142..46991ad3ddd5 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -80,6 +80,7 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); +extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); -- cgit v1.2.3 From 1f5307b1e094bfffa83c65c40ac6e3415c108780 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 May 2017 15:57:12 -0700 Subject: mm, vmalloc: properly track vmalloc users __vmalloc_node_flags used to be static inline but this has changed by "mm: introduce kv[mz]alloc helpers" because kvmalloc_node needs to use it as well and the code is outside of the vmalloc proper. I haven't realized that changing this will lead to a subtle bug though. The function is responsible to track the caller as well. This caller is then printed by /proc/vmallocinfo. If __vmalloc_node_flags is not inline then we would get only direct users of __vmalloc_node_flags as callers (e.g. v[mz]alloc) which reduces usefulness of this debugging feature considerably. It simply doesn't help to see that the given range belongs to vmalloc as a caller: 0xffffc90002c79000-0xffffc90002c7d000 16384 vmalloc+0x16/0x18 pages=3 vmalloc N0=3 0xffffc90002c81000-0xffffc90002c85000 16384 vmalloc+0x16/0x18 pages=3 vmalloc N1=3 0xffffc90002c8d000-0xffffc90002c91000 16384 vmalloc+0x16/0x18 pages=3 vmalloc N1=3 0xffffc90002c95000-0xffffc90002c99000 16384 vmalloc+0x16/0x18 pages=3 vmalloc N1=3 We really want to catch the _caller_ of the vmalloc function. Fix this issue by making __vmalloc_node_flags static inline again. Link: http://lkml.kernel.org/r/20170502134657.12381-1-mhocko@kernel.org Signed-off-by: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 19 +++++++++++++++++++ 1 file changed, 19 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 46991ad3ddd5..0328ce003992 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -6,6 +6,7 @@ #include #include #include /* pgprot_t */ +#include /* PAGE_KERNEL */ #include struct vm_area_struct; /* vma defining user mapping in mm_types.h */ @@ -80,7 +81,25 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, unsigned long start, unsigned long end, gfp_t gfp_mask, pgprot_t prot, unsigned long vm_flags, int node, const void *caller); +#ifndef CONFIG_MMU extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); +#else +extern void *__vmalloc_node(unsigned long size, unsigned long align, + gfp_t gfp_mask, pgprot_t prot, + int node, const void *caller); + +/* + * We really want to have this inlined due to caller tracking. This + * function is used by the highlevel vmalloc apis and so we want to track + * their callers and inlining will achieve that. + */ +static inline void *__vmalloc_node_flags(unsigned long size, + int node, gfp_t flags) +{ + return __vmalloc_node(size, 1, flags, PAGE_KERNEL, + node, __builtin_return_address(0)); +} +#endif extern void vfree(const void *addr); extern void vfree_atomic(const void *addr); -- cgit v1.2.3 From 752ade68cbd81d0321dfecc188f655a945551b25 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 8 May 2017 15:57:27 -0700 Subject: treewide: use kv[mz]alloc* rather than opencoded variants There are many code paths opencoding kvmalloc. Let's use the helper instead. The main difference to kvmalloc is that those users are usually not considering all the aspects of the memory allocator. E.g. allocation requests <= 32kB (with 4kB pages) are basically never failing and invoke OOM killer to satisfy the allocation. This sounds too disruptive for something that has a reasonable fallback - the vmalloc. On the other hand those requests might fallback to vmalloc even when the memory allocator would succeed after several more reclaim/compaction attempts previously. There is no guarantee something like that happens though. This patch converts many of those places to kv[mz]alloc* helpers because they are more conservative. Link: http://lkml.kernel.org/r/20170306103327.2766-2-mhocko@kernel.org Signed-off-by: Michal Hocko Reviewed-by: Boris Ostrovsky # Xen bits Acked-by: Kees Cook Acked-by: Vlastimil Babka Acked-by: Andreas Dilger # Lustre Acked-by: Christian Borntraeger # KVM/s390 Acked-by: Dan Williams # nvdim Acked-by: David Sterba # btrfs Acked-by: Ilya Dryomov # Ceph Acked-by: Tariq Toukan # mlx4 Acked-by: Leon Romanovsky # mlx5 Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Herbert Xu Cc: Anton Vorontsov Cc: Colin Cross Cc: Tony Luck Cc: "Rafael J. Wysocki" Cc: Ben Skeggs Cc: Kent Overstreet Cc: Santosh Raspatur Cc: Hariprasad S Cc: Yishai Hadas Cc: Oleg Drokin Cc: "Yan, Zheng" Cc: Alexander Viro Cc: Alexei Starovoitov Cc: Eric Dumazet Cc: David Miller Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mlx5/driver.h | 7 +------ include/linux/mm.h | 8 ++++++++ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 3fece51dcf13..18fc65b84b79 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -892,12 +892,7 @@ static inline u16 cmdif_rev(struct mlx5_core_dev *dev) static inline void *mlx5_vzalloc(unsigned long size) { - void *rtn; - - rtn = kzalloc(size, GFP_KERNEL | __GFP_NOWARN); - if (!rtn) - rtn = vzalloc(size); - return rtn; + return kvzalloc(size, GFP_KERNEL); } static inline u32 mlx5_base_mkey(const u32 key) diff --git a/include/linux/mm.h b/include/linux/mm.h index 08e2849d27ca..7cb17c6b97de 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -532,6 +532,14 @@ static inline void *kvzalloc(size_t size, gfp_t flags) return kvmalloc(size, flags | __GFP_ZERO); } +static inline void *kvmalloc_array(size_t n, size_t size, gfp_t flags) +{ + if (size != 0 && n > SIZE_MAX / size) + return NULL; + + return kvmalloc(n * size, flags); +} + extern void kvfree(const void *addr); static inline atomic_t *compound_mapcount_ptr(struct page *page) -- cgit v1.2.3 From ad61dd303a0f2439bb104349e2d2ec91a3010ce0 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 8 May 2017 15:57:50 -0700 Subject: scripts/spelling.txt: add regsiter -> register spelling mistake This typo is quite common. Fix it and add it to the spelling file so that checkpatch catches it earlier. Link: http://lkml.kernel.org/r/20170317011131.6881-2-sboyd@codeaurora.org Signed-off-by: Stephen Boyd Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bcma/bcma_driver_pci.h | 2 +- include/linux/ftrace.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma_driver_pci.h b/include/linux/bcma/bcma_driver_pci.h index 9657f11d48a7..bca6a5e4ca3d 100644 --- a/include/linux/bcma/bcma_driver_pci.h +++ b/include/linux/bcma/bcma_driver_pci.h @@ -80,7 +80,7 @@ struct pci_dev; #define BCMA_CORE_PCI_MDIODATA_DEV_TX 0x1e /* SERDES TX Dev */ #define BCMA_CORE_PCI_MDIODATA_DEV_RX 0x1f /* SERDES RX Dev */ #define BCMA_CORE_PCI_PCIEIND_ADDR 0x0130 /* indirect access to the internal register */ -#define BCMA_CORE_PCI_PCIEIND_DATA 0x0134 /* Data to/from the internal regsiter */ +#define BCMA_CORE_PCI_PCIEIND_DATA 0x0134 /* Data to/from the internal register */ #define BCMA_CORE_PCI_CLKREQENCTRL 0x0138 /* >= rev 6, Clkreq rdma control */ #define BCMA_CORE_PCI_PCICFG0 0x0400 /* PCI config space 0 (rev >= 8) */ #define BCMA_CORE_PCI_PCICFG1 0x0500 /* PCI config space 1 (rev >= 8) */ diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6d2a63e4ea52..473f088aabea 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -72,7 +72,7 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * CONTROL, SAVE_REGS, SAVE_REGS_IF_SUPPORTED, RECURSION_SAFE, STUB and * IPMODIFY are a kind of attribute flags which can be set only before * registering the ftrace_ops, and can not be modified while registered. - * Changing those attribute flags after regsitering ftrace_ops will + * Changing those attribute flags after registering ftrace_ops will * cause unexpected results. * * ENABLED - set/unset when ftrace_ops is registered/unregistered -- cgit v1.2.3 From 8ac1ed791401790968fd00ca63ca4fa814677199 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Mon, 8 May 2017 15:57:56 -0700 Subject: treewide: spelling: correct diffrent[iate] and banlance typos Add these misspellings to scripts/spelling.txt too Link: http://lkml.kernel.org/r/962aace119675e5fe87be2a88ddac1a5486f8e60.1490931810.git.joe@perches.com Signed-off-by: Joe Perches Acked-by: Mauro Carvalho Chehab Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mlx4/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 74b765ce48ab..d5bed0875d30 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -108,7 +108,7 @@ enum { MLX4_MFUNC_EQE_MASK = (MLX4_MFUNC_MAX_EQES - 1) }; -/* Driver supports 3 diffrent device methods to manage traffic steering: +/* Driver supports 3 different device methods to manage traffic steering: * -device managed - High level API for ib and eth flow steering. FW is * managing flow steering tables. * - B0 steering mode - Common low level API for ib and (if supported) eth. -- cgit v1.2.3 From 2d0bde57f3527ffac9279b4c8ba61060ba395b1a Mon Sep 17 00:00:00 2001 From: Laura Abbott Date: Mon, 8 May 2017 15:58:26 -0700 Subject: include/linux/filter.h: use set_memory.h header set_memory_* functions have moved to set_memory.h. Switch to this explicitly. Link: http://lkml.kernel.org/r/1488920133-27229-11-git-send-email-labbott@redhat.com Signed-off-by: Laura Abbott Acked-by: Daniel Borkmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/filter.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 9a7786db14fa..56197f82af45 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -19,7 +19,9 @@ #include -#include +#ifdef CONFIG_ARCH_HAS_SET_MEMORY +#include +#endif #include #include -- cgit v1.2.3 From ec48c940da6cb96c4be6638d0f2efade24d5242a Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 8 May 2017 15:58:50 -0700 Subject: kref: remove WARN_ON for NULL release functions The kref functions check for NULL release functions. This WARN_ON seems rather pointless. We will eventually release and then just crash nicely. It is also somewhat expensive because these functions are inlined in a lot of places. Removing the WARN_ONs saves around 2.3k in this kernel (likely more in others with more drivers) text data bss dec hex filename 9083992 5367600 11116544 25568136 1862388 vmlinux-before-load-avg 9070166 5367600 11116544 25554310 185ed86 vmlinux-load-avg Link: http://lkml.kernel.org/r/20170315021431.13107-5-andi@firstfloor.org Signed-off-by: Andi Kleen Acked-by: Greg Kroah-Hartman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kref.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kref.h b/include/linux/kref.h index f4156f88f557..29220724bf1c 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -66,8 +66,6 @@ static inline void kref_get(struct kref *kref) */ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref)) { - WARN_ON(release == NULL); - if (refcount_dec_and_test(&kref->refcount)) { release(kref); return 1; @@ -79,8 +77,6 @@ static inline int kref_put_mutex(struct kref *kref, void (*release)(struct kref *kref), struct mutex *lock) { - WARN_ON(release == NULL); - if (refcount_dec_and_mutex_lock(&kref->refcount, lock)) { release(kref); return 1; @@ -92,8 +88,6 @@ static inline int kref_put_lock(struct kref *kref, void (*release)(struct kref *kref), spinlock_t *lock) { - WARN_ON(release == NULL); - if (refcount_dec_and_lock(&kref->refcount, lock)) { release(kref); return 1; -- cgit v1.2.3 From f44a2920c84af809883ecbbd08d47fb5fe47c8ad Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 8 May 2017 15:58:56 -0700 Subject: include/linux/uaccess.h: remove expensive WARN_ON in pagefault_disabled_dec pagefault_disabled_dec is frequently used inline, and it has a WARN_ON for underflow that expands to about 6.5k of extra code. The warning doesn't seem to be that useful and worth so much code so remove it. If it was needed could make it depending on some debug kernel option. Saves ~6.5k in my kernel text data bss dec hex filename 9039417 5367568 11116544 25523529 1857549 vmlinux-before-pf 9032805 5367568 11116544 25516917 1855b75 vmlinux-pf Link: http://lkml.kernel.org/r/20170315021431.13107-8-andi@firstfloor.org Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index e0cbfb09e60f..201418d5e15c 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -203,7 +203,6 @@ static __always_inline void pagefault_disabled_inc(void) static __always_inline void pagefault_disabled_dec(void) { current->pagefault_disabled--; - WARN_ON(current->pagefault_disabled < 0); } /* -- cgit v1.2.3 From c718a97514e4d77c97a35734b728aaf541a0621b Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Mon, 8 May 2017 15:58:59 -0700 Subject: fs: semove set but not checked AOP_FLAG_UNINTERRUPTIBLE flag Commit afddba49d18f ("fs: introduce write_begin, write_end, and perform_write aops") introduced AOP_FLAG_UNINTERRUPTIBLE flag which was checked in pagecache_write_begin(), but that check was removed by 4e02ed4b4a2f ("fs: remove prepare_write/commit_write"). Between these two commits, commit d9414774dc0c ("cifs: Convert cifs to new aops.") added a check in cifs_write_begin(), but that check was soon removed by commit a98ee8c1c707 ("[CIFS] fix regression in cifs_write_begin/cifs_write_end"). Therefore, AOP_FLAG_UNINTERRUPTIBLE flag is checked nowhere. Let's remove this flag. This patch has no functionality changes. Link: http://lkml.kernel.org/r/1489294781-53494-1-git-send-email-penguin-kernel@I-love.SAKURA.ne.jp Signed-off-by: Tetsuo Handa Reviewed-by: Jeff Layton Reviewed-by: Christoph Hellwig Cc: Nick Piggin Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 5d62d2c47939..249dad4e8d26 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -250,9 +250,8 @@ enum positive_aop_returns { AOP_TRUNCATED_PAGE = 0x80001, }; -#define AOP_FLAG_UNINTERRUPTIBLE 0x0001 /* will not do a short write */ -#define AOP_FLAG_CONT_EXPAND 0x0002 /* called from cont_expand */ -#define AOP_FLAG_NOFS 0x0004 /* used by filesystem to direct +#define AOP_FLAG_CONT_EXPAND 0x0001 /* called from cont_expand */ +#define AOP_FLAG_NOFS 0x0002 /* used by filesystem to direct * helper code (eg buffer layer) * to clear GFP_FS from alloc */ -- cgit v1.2.3 From bfe1c566453a0979c0b3cd3728d0de962272f034 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Mon, 8 May 2017 15:59:37 -0700 Subject: time: delete CURRENT_TIME_SEC and CURRENT_TIME All uses of CURRENT_TIME_SEC and CURRENT_TIME macros have been replaced by other time functions. These macros are also not y2038 safe. And, all their use cases can be fulfilled by y2038 safe ktime_get_* variants. Link: http://lkml.kernel.org/r/1491613030-11599-12-git-send-email-deepa.kernel@gmail.com Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Acked-by: John Stultz Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/time.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 23f0f5ce3090..c0543f5f25de 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -151,9 +151,6 @@ static inline bool timespec_inject_offset_valid(const struct timespec *ts) return true; } -#define CURRENT_TIME (current_kernel_time()) -#define CURRENT_TIME_SEC ((struct timespec) { get_seconds(), 0 }) - /* Some architectures do not supply their own clocksource. * This is mainly the case in architectures that get their * inter-tick times by reading the counter on their interval -- cgit v1.2.3 From 499118e966f1d2150bd66647c8932343c4e9a0b8 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Mon, 8 May 2017 15:59:50 -0700 Subject: mm: introduce memalloc_noreclaim_{save,restore} The previous patch ("mm: prevent potential recursive reclaim due to clearing PF_MEMALLOC") has shown that simply setting and clearing PF_MEMALLOC in current->flags can result in wrongly clearing a pre-existing PF_MEMALLOC flag and potentially lead to recursive reclaim. Let's introduce helpers that support proper nesting by saving the previous stat of the flag, similar to the existing memalloc_noio_* and memalloc_nofs_* helpers. Convert existing setting/clearing of PF_MEMALLOC within mm to the new helpers. There are no known issues with the converted code, but the change makes it more robust. Link: http://lkml.kernel.org/r/20170405074700.29871-3-vbabka@suse.cz Signed-off-by: Vlastimil Babka Suggested-by: Michal Hocko Acked-by: Michal Hocko Acked-by: Hillf Danton Cc: Mel Gorman Cc: Johannes Weiner Cc: Andrey Ryabinin Cc: Boris Brezillon Cc: Chris Leech Cc: "David S. Miller" Cc: Eric Dumazet Cc: Josef Bacik Cc: Lee Duncan Cc: Michal Hocko Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched/mm.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/mm.h b/include/linux/sched/mm.h index 9daabe138c99..2b24a6974847 100644 --- a/include/linux/sched/mm.h +++ b/include/linux/sched/mm.h @@ -191,4 +191,16 @@ static inline void memalloc_nofs_restore(unsigned int flags) current->flags = (current->flags & ~PF_MEMALLOC_NOFS) | flags; } +static inline unsigned int memalloc_noreclaim_save(void) +{ + unsigned int flags = current->flags & PF_MEMALLOC; + current->flags |= PF_MEMALLOC; + return flags; +} + +static inline void memalloc_noreclaim_restore(unsigned int flags) +{ + current->flags = (current->flags & ~PF_MEMALLOC) | flags; +} + #endif /* _LINUX_SCHED_MM_H */ -- cgit v1.2.3 From 497d72d80a789501501cccabdad6b145f9e31371 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Mon, 8 May 2017 20:38:40 +0200 Subject: KVM: Add kvm_vcpu_get_idx to get vcpu index in kvm->vcpus There are occasional needs to use the index of vcpu in the kvm->vcpus array to map something related to a VCPU. For example, unlike the vcpu->vcpu_id, the vcpu index is guaranteed to not be sparse across all vcpus which is useful when allocating a memory area for each vcpu. Signed-off-by: Christoffer Dall Reviewed-by: Eric Auger --- include/linux/kvm_host.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2c14ad9809da..12eb26d665e8 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -490,6 +490,17 @@ static inline struct kvm_vcpu *kvm_get_vcpu_by_id(struct kvm *kvm, int id) return NULL; } +static inline int kvm_vcpu_get_idx(struct kvm_vcpu *vcpu) +{ + struct kvm_vcpu *tmp; + int idx; + + kvm_for_each_vcpu(idx, tmp, vcpu->kvm) + if (tmp == vcpu) + return idx; + BUG(); +} + #define kvm_for_each_memslot(memslot, slots) \ for (memslot = &slots->memslots[0]; \ memslot < slots->memslots + KVM_MEM_SLOTS_NUM && memslot->npages;\ -- cgit v1.2.3 From ea47dd191d543f81e0912b5dc0471b48346b016e Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Wed, 19 Apr 2017 05:12:18 +1000 Subject: of/fdt: introduce of_scan_flat_dt_subnodes and of_get_flat_dt_phandle Introduce primitives for FDT parsing. These will be used for powerpc cpufeatures node scanning, which has quite complex structure but should be processed early. Cc: devicetree@vger.kernel.org Acked-by: Rob Herring Signed-off-by: Nicholas Piggin Signed-off-by: Michael Ellerman --- include/linux/of_fdt.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_fdt.h b/include/linux/of_fdt.h index 271b3fdf0070..1dfbfd0d8040 100644 --- a/include/linux/of_fdt.h +++ b/include/linux/of_fdt.h @@ -54,6 +54,11 @@ extern char __dtb_end[]; extern int of_scan_flat_dt(int (*it)(unsigned long node, const char *uname, int depth, void *data), void *data); +extern int of_scan_flat_dt_subnodes(unsigned long node, + int (*it)(unsigned long node, + const char *uname, + void *data), + void *data); extern int of_get_flat_dt_subnode_by_name(unsigned long node, const char *uname); extern const void *of_get_flat_dt_prop(unsigned long node, const char *name, @@ -62,6 +67,7 @@ extern int of_flat_dt_is_compatible(unsigned long node, const char *name); extern int of_flat_dt_match(unsigned long node, const char *const *matches); extern unsigned long of_get_flat_dt_root(void); extern int of_get_flat_dt_size(void); +extern uint32_t of_get_flat_dt_phandle(unsigned long node); extern int early_init_dt_scan_chosen(unsigned long node, const char *uname, int depth, void *data); -- cgit v1.2.3 From 9ea762a5ae45235eb3e5ec9c05c33cf37db78d70 Mon Sep 17 00:00:00 2001 From: Cornelia Huck Date: Thu, 30 Mar 2017 13:13:33 +0200 Subject: virtio: virtio_driver doc Add comments for the virtio_driver members that were not documented. Signed-off-by: Cornelia Huck Signed-off-by: Michael S. Tsirkin --- include/linux/virtio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/virtio.h b/include/linux/virtio.h index ed04753278d4..28b0e965360f 100644 --- a/include/linux/virtio.h +++ b/include/linux/virtio.h @@ -165,9 +165,13 @@ int virtio_device_restore(struct virtio_device *dev); * @feature_table_legacy: same as feature_table but when working in legacy mode. * @feature_table_size_legacy: number of entries in feature table legacy array. * @probe: the function to call when a device is found. Returns 0 or -errno. + * @scan: optional function to call after successful probe; intended + * for virtio-scsi to invoke a scan. * @remove: the function to call when a device is removed. * @config_changed: optional function to call when the device configuration * changes; may be called in interrupt context. + * @freeze: optional function to call during suspend/hibernation. + * @restore: optional function to call on resume. */ struct virtio_driver { struct device_driver driver; -- cgit v1.2.3 From fb9de9704775d6190c204f4ddf8da4cfdac26be1 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 7 Apr 2017 08:25:09 +0300 Subject: ptr_ring: batch ring zeroing A known weakness in ptr_ring design is that it does not handle well the situation when ring is almost full: as entries are consumed they are immediately used again by the producer, so consumer and producer are writing to a shared cache line. To fix this, add batching to consume calls: as entries are consumed do not write NULL into the ring until we get a multiple (in current implementation 2x) of cache lines away from the producer. At that point, write them all out. We do the write out in the reverse order to keep producer from sharing cache with consumer for as long as possible. Writeout also triggers when ring wraps around - there's no special reason to do this but it helps keep the code a bit simpler. What should we do if getting away from producer by 2 cache lines would mean we are keeping the ring moe than half empty? Maybe we should reduce the batching in this case, current patch simply reduces the batching. Notes: - it is no longer true that a call to consume guarantees that the following call to produce will succeed. No users seem to assume that. - batching can also in theory reduce the signalling rate: users that would previously send interrups to the producer to wake it up after consuming each entry would now only need to do this once in a batch. Doing this would be easy by returning a flag to the caller. No users seem to do signalling on consume yet so this was not implemented yet. Signed-off-by: Michael S. Tsirkin Reviewed-by: Jesper Dangaard Brouer Acked-by: Jason Wang --- include/linux/ptr_ring.h | 63 +++++++++++++++++++++++++++++++++++++++++------- 1 file changed, 54 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6c70444da3b9..6b2e0dd88569 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -34,11 +34,13 @@ struct ptr_ring { int producer ____cacheline_aligned_in_smp; spinlock_t producer_lock; - int consumer ____cacheline_aligned_in_smp; + int consumer_head ____cacheline_aligned_in_smp; /* next valid entry */ + int consumer_tail; /* next entry to invalidate */ spinlock_t consumer_lock; /* Shared consumer/producer data */ /* Read-only by both the producer and the consumer */ int size ____cacheline_aligned_in_smp; /* max entries in queue */ + int batch; /* number of entries to consume in a batch */ void **queue; }; @@ -170,7 +172,7 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) static inline void *__ptr_ring_peek(struct ptr_ring *r) { if (likely(r->size)) - return r->queue[r->consumer]; + return r->queue[r->consumer_head]; return NULL; } @@ -231,9 +233,38 @@ static inline bool ptr_ring_empty_bh(struct ptr_ring *r) /* Must only be called after __ptr_ring_peek returned !NULL */ static inline void __ptr_ring_discard_one(struct ptr_ring *r) { - r->queue[r->consumer++] = NULL; - if (unlikely(r->consumer >= r->size)) - r->consumer = 0; + /* Fundamentally, what we want to do is update consumer + * index and zero out the entry so producer can reuse it. + * Doing it naively at each consume would be as simple as: + * r->queue[r->consumer++] = NULL; + * if (unlikely(r->consumer >= r->size)) + * r->consumer = 0; + * but that is suboptimal when the ring is full as producer is writing + * out new entries in the same cache line. Defer these updates until a + * batch of entries has been consumed. + */ + int head = r->consumer_head++; + + /* Once we have processed enough entries invalidate them in + * the ring all at once so producer can reuse their space in the ring. + * We also do this when we reach end of the ring - not mandatory + * but helps keep the implementation simple. + */ + if (unlikely(r->consumer_head - r->consumer_tail >= r->batch || + r->consumer_head >= r->size)) { + /* Zero out entries in the reverse order: this way we touch the + * cache line that producer might currently be reading the last; + * producer won't make progress and touch other cache lines + * besides the first one until we write out all entries. + */ + while (likely(head >= r->consumer_tail)) + r->queue[head--] = NULL; + r->consumer_tail = r->consumer_head; + } + if (unlikely(r->consumer_head >= r->size)) { + r->consumer_head = 0; + r->consumer_tail = 0; + } } static inline void *__ptr_ring_consume(struct ptr_ring *r) @@ -345,14 +376,27 @@ static inline void **__ptr_ring_init_queue_alloc(int size, gfp_t gfp) return kzalloc(ALIGN(size * sizeof(void *), SMP_CACHE_BYTES), gfp); } +static inline void __ptr_ring_set_size(struct ptr_ring *r, int size) +{ + r->size = size; + r->batch = SMP_CACHE_BYTES * 2 / sizeof(*(r->queue)); + /* We need to set batch at least to 1 to make logic + * in __ptr_ring_discard_one work correctly. + * Batching too much (because ring is small) would cause a lot of + * burstiness. Needs tuning, for now disable batching. + */ + if (r->batch > r->size / 2 || !r->batch) + r->batch = 1; +} + static inline int ptr_ring_init(struct ptr_ring *r, int size, gfp_t gfp) { r->queue = __ptr_ring_init_queue_alloc(size, gfp); if (!r->queue) return -ENOMEM; - r->size = size; - r->producer = r->consumer = 0; + __ptr_ring_set_size(r, size); + r->producer = r->consumer_head = r->consumer_tail = 0; spin_lock_init(&r->producer_lock); spin_lock_init(&r->consumer_lock); @@ -373,9 +417,10 @@ static inline void **__ptr_ring_swap_queue(struct ptr_ring *r, void **queue, else if (destroy) destroy(ptr); - r->size = size; + __ptr_ring_set_size(r, size); r->producer = producer; - r->consumer = 0; + r->consumer_head = 0; + r->consumer_tail = 0; old = r->queue; r->queue = queue; -- cgit v1.2.3 From 3ae3d67ba705c754a3c91ac009f9ce73a0e7286a Mon Sep 17 00:00:00 2001 From: Vishal Verma Date: Wed, 10 May 2017 15:01:30 -0600 Subject: libnvdimm: add an atomic vs process context flag to rw_bytes nsio_rw_bytes can clear media errors, but this cannot be done while we are in an atomic context due to locking within ACPI. From the BTT, ->rw_bytes may be called either from atomic or process context depending on whether the calls happen during initialization or during IO. During init, we want to ensure error clearing happens, and the flag marking process context allows nsio_rw_bytes to do that. When called during IO, we're in atomic context, and error clearing can be skipped. Cc: Dan Williams Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/linux/nd.h | 12 +++++++----- 1 file changed, 7 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nd.h b/include/linux/nd.h index fa66aeed441a..194b8e002ea7 100644 --- a/include/linux/nd.h +++ b/include/linux/nd.h @@ -48,7 +48,7 @@ struct nd_namespace_common { struct device dev; struct device *claim; int (*rw_bytes)(struct nd_namespace_common *, resource_size_t offset, - void *buf, size_t size, int rw); + void *buf, size_t size, int rw, unsigned long flags); }; static inline struct nd_namespace_common *to_ndns(struct device *dev) @@ -134,9 +134,10 @@ static inline struct nd_namespace_blk *to_nd_namespace_blk(const struct device * * @buf is up-to-date upon return from this routine. */ static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns, - resource_size_t offset, void *buf, size_t size) + resource_size_t offset, void *buf, size_t size, + unsigned long flags) { - return ndns->rw_bytes(ndns, offset, buf, size, READ); + return ndns->rw_bytes(ndns, offset, buf, size, READ, flags); } /** @@ -152,9 +153,10 @@ static inline int nvdimm_read_bytes(struct nd_namespace_common *ndns, * to media is handled internal to the @ndns driver, if at all. */ static inline int nvdimm_write_bytes(struct nd_namespace_common *ndns, - resource_size_t offset, void *buf, size_t size) + resource_size_t offset, void *buf, size_t size, + unsigned long flags) { - return ndns->rw_bytes(ndns, offset, buf, size, WRITE); + return ndns->rw_bytes(ndns, offset, buf, size, WRITE, flags); } #define MODULE_ALIAS_ND_DEVICE(type) \ -- cgit v1.2.3 From df3ed932394488e57e72dd0e73c224d1804fdc8f Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Thu, 11 May 2017 10:15:10 -0500 Subject: Partially Revert "of: fix sparse warnings in fdt, irq, reserved mem, and resolver code" A change to function pointers that was meant to address a sparse warning turned out to cause hundreds of new gcc-7 warnings: include/linux/of_irq.h:11:13: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers] drivers/of/of_reserved_mem.c: In function '__reserved_mem_init_node': drivers/of/of_reserved_mem.c:200:7: error: type qualifiers ignored on function return type [-Werror=ignored-qualifiers] int const (*initfn)(struct reserved_mem *rmem) = i->data; Turns out the sparse warnings were spurious and have been fixed in upstream sparse since 0.5.0 in commit "sparse: treat function pointers as pointers to const data". This partially reverts commit 17a70355ea576843a7ac851f1db26872a50b2850. Fixes: 17a70355ea57 ("of: fix sparse warnings in fdt, irq, reserved mem, and resolver code") Reported-by: Arnd Bergmann Signed-off-by: Rob Herring --- include/linux/of_irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index ec6b11deb773..1e0deb8e8494 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -8,7 +8,7 @@ #include #include -typedef int const (*of_irq_init_cb_t)(struct device_node *, struct device_node *); +typedef int (*of_irq_init_cb_t)(struct device_node *, struct device_node *); /* * Workarounds only applied to 32bit powermac machines -- cgit v1.2.3 From d1174416747d790d750742d0514915deeed93acf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 May 2017 11:22:52 -0700 Subject: bpf: Track alignment of register values in the verifier. Currently if we add only constant values to pointers we can fully validate the alignment, and properly check if we need to reject the program on !CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS architectures. However, once an unknown value is introduced we only allow byte sized memory accesses which is too restrictive. Add logic to track the known minimum alignment of register values, and propagate this state into registers containing pointers. The most common paradigm that makes use of this new logic is computing the transport header using the IP header length field. For example: struct ethhdr *ep = skb->data; struct iphdr *iph = (struct iphdr *) (ep + 1); struct tcphdr *th; ... n = iph->ihl; th = ((void *)iph + (n * 4)); port = th->dest; The existing code will reject the load of th->dest because it cannot validate that the alignment is at least 2 once "n * 4" is added the the packet pointer. In the new code, the register holding "n * 4" will have a reg->min_align value of 4, because any value multiplied by 4 will be at least 4 byte aligned. (actually, the eBPF code emitted by the compiler in this case is most likely to use a shift left by 2, but the end result is identical) At the critical addition: th = ((void *)iph + (n * 4)); The register holding 'th' will start with reg->off value of 14. The pointer addition will transform that reg into something that looks like: reg->aux_off = 14 reg->aux_off_align = 4 Next, the verifier will look at the th->dest load, and it will see a load offset of 2, and first check: if (reg->aux_off_align % size) which will pass because aux_off_align is 4. reg_off will be computed: reg_off = reg->off; ... reg_off += reg->aux_off; plus we have off==2, and it will thus check: if ((NET_IP_ALIGN + reg_off + off) % size != 0) which evaluates to: if ((NET_IP_ALIGN + 14 + 2) % size != 0) On strict alignment architectures, NET_IP_ALIGN is 2, thus: if ((2 + 14 + 2) % size != 0) which passes. These pointer transformations and checks work regardless of whether the constant offset or the variable with known alignment is added first to the pointer register. Signed-off-by: David S. Miller Acked-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5efb4db44e1e..7c6a51924afc 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -40,6 +40,9 @@ struct bpf_reg_state { */ s64 min_value; u64 max_value; + u32 min_align; + u32 aux_off; + u32 aux_off_align; }; enum bpf_stack_slot_type { -- cgit v1.2.3 From e07b98d9bffe410019dfcf62c3428d4a96c56a2c Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 10 May 2017 11:38:07 -0700 Subject: bpf: Add strict alignment flag for BPF_PROG_LOAD. Add a new field, "prog_flags", and an initial flag value BPF_F_STRICT_ALIGNMENT. When set, the verifier will enforce strict pointer alignment regardless of the setting of CONFIG_EFFICIENT_UNALIGNED_ACCESS. The verifier, in this mode, will also use a fixed value of "2" in place of NET_IP_ALIGN. This facilitates test cases that will exercise and validate this part of the verifier even when run on architectures where alignment doesn't matter. Signed-off-by: David S. Miller Acked-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7c6a51924afc..d5093b52b485 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -90,6 +90,7 @@ struct bpf_verifier_env { struct bpf_prog *prog; /* eBPF program being verified */ struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ + bool strict_alignment; /* perform strict pointer alignment checks */ struct bpf_verifier_state cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ -- cgit v1.2.3 From d67b9cd28c1d7f82c2e5e727731ea7c89b23a0a8 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 12 May 2017 01:04:46 +0200 Subject: xdp: refine xdp api with regards to generic xdp While working on the iproute2 generic XDP frontend, I noticed that as of right now it's possible to have native *and* generic XDP programs loaded both at the same time for the case when a driver supports native XDP. The intended model for generic XDP from b5cdae3291f7 ("net: Generic XDP") is, however, that only one out of the two can be present at once which is also indicated as such in the XDP netlink dump part. The main rationale for generic XDP is to ease accessibility (in case a driver does not yet have XDP support) and to generically provide a semantical model as an example for driver developers wanting to add XDP support. The generic XDP option for an XDP aware driver can still be useful for comparing and testing both implementations. However, it is not intended to have a second XDP processing stage or layer with exactly the same functionality of the first native stage. Only reason could be to have a partial fallback for future XDP features that are not supported yet in the native implementation and we probably also shouldn't strive for such fallback and instead encourage native feature support in the first place. Given there's currently no such fallback issue or use case, lets not go there yet if we don't need to. Therefore, change semantics for loading XDP and bail out if the user tries to load a generic XDP program when a native one is present and vice versa. Another alternative to bailing out would be to handle the transition from one flavor to another gracefully, but that would require to bring the device down, exchange both types of programs, and bring it up again in order to avoid a tiny window where a packet could hit both hooks. Given this complicates the logic for just a debugging feature in the native case, I went with the simpler variant. For the dump, remove IFLA_XDP_FLAGS that was added with b5cdae3291f7 and reuse IFLA_XDP_ATTACHED for indicating the mode. Dumping all or just a subset of flags that were used for loading the XDP prog is suboptimal in the long run since not all flags are useful for dumping and if we start to reuse the same flag definitions for load and dump, then we'll waste bit space. What we really just want is to dump the mode for now. Current IFLA_XDP_ATTACHED semantics are: nothing was installed (0), a program is running at the native driver layer (1). Thus, add a mode that says that a program is running at generic XDP layer (2). Applications will handle this fine in that older binaries will just indicate that something is attached at XDP layer, effectively this is similar to IFLA_XDP_FLAGS attr that we would have had modulo the redundancy. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9c23bd2efb56..3f39d27decf4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3296,11 +3296,15 @@ int dev_get_phys_port_id(struct net_device *dev, int dev_get_phys_port_name(struct net_device *dev, char *name, size_t len); int dev_change_proto_down(struct net_device *dev, bool proto_down); -int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, - int fd, u32 flags); struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *dev); struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); + +typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp); +int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, + int fd, u32 flags); +bool __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op); + int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); bool is_skb_forwardable(const struct net_device *dev, -- cgit v1.2.3 From 572e0ca9b909339fbe017aaff1a225efb6db3b61 Mon Sep 17 00:00:00 2001 From: Deepa Dinamani Date: Fri, 12 May 2017 15:46:29 -0700 Subject: time: delete current_fs_time() All uses of the current_fs_time() function have been replaced by other time interfaces. And, its use cases can be fulfilled by current_time() or ktime_get_* variants. Link: http://lkml.kernel.org/r/1491613030-11599-13-git-send-email-deepa.kernel@gmail.com Signed-off-by: Deepa Dinamani Reviewed-by: Arnd Bergmann Cc: John Stultz Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 0ad325ed71e8..803e5a9b2654 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1431,7 +1431,6 @@ static inline void i_gid_write(struct inode *inode, gid_t gid) inode->i_gid = make_kgid(inode->i_sb->s_user_ns, gid); } -extern struct timespec current_fs_time(struct super_block *sb); extern struct timespec current_time(struct inode *inode); /* -- cgit v1.2.3 From 8594a21cf7a8318baedbedc3fcd2967a17ddeec0 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 12 May 2017 15:46:41 -0700 Subject: mm, vmalloc: fix vmalloc users tracking properly Commit 1f5307b1e094 ("mm, vmalloc: properly track vmalloc users") has pulled asm/pgtable.h include dependency to linux/vmalloc.h and that turned out to be a bad idea for some architectures. E.g. m68k fails with In file included from arch/m68k/include/asm/pgtable_mm.h:145:0, from arch/m68k/include/asm/pgtable.h:4, from include/linux/vmalloc.h:9, from arch/m68k/kernel/module.c:9: arch/m68k/include/asm/mcf_pgtable.h: In function 'nocache_page': >> arch/m68k/include/asm/mcf_pgtable.h:339:43: error: 'init_mm' undeclared (first use in this function) #define pgd_offset_k(address) pgd_offset(&init_mm, address) as spotted by kernel build bot. nios2 fails for other reason In file included from include/asm-generic/io.h:767:0, from arch/nios2/include/asm/io.h:61, from include/linux/io.h:25, from arch/nios2/include/asm/pgtable.h:18, from include/linux/mm.h:70, from include/linux/pid_namespace.h:6, from include/linux/ptrace.h:9, from arch/nios2/include/uapi/asm/elf.h:23, from arch/nios2/include/asm/elf.h:22, from include/linux/elf.h:4, from include/linux/module.h:15, from init/main.c:16: include/linux/vmalloc.h: In function '__vmalloc_node_flags': include/linux/vmalloc.h:99:40: error: 'PAGE_KERNEL' undeclared (first use in this function); did you mean 'GFP_KERNEL'? which is due to the newly added #include , which on nios2 includes and thus and which again includes . Tweaking that around just turns out a bigger headache than necessary. This patch reverts 1f5307b1e094 and reimplements the original fix in a different way. __vmalloc_node_flags can stay static inline which will cover vmalloc* functions. We only have one external user (kvmalloc_node) and we can export __vmalloc_node_flags_caller and provide the caller directly. This is much simpler and it doesn't really need any games with header files. [akpm@linux-foundation.org: coding-style fixes] [mhocko@kernel.org: revert old comment] Link: http://lkml.kernel.org/r/20170509211054.GB16325@dhcp22.suse.cz Fixes: 1f5307b1e094 ("mm, vmalloc: properly track vmalloc users") Link: http://lkml.kernel.org/r/20170509153702.GR6481@dhcp22.suse.cz Signed-off-by: Michal Hocko Cc: Tobias Klauser Cc: Geert Uytterhoeven Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 21 ++++++--------------- 1 file changed, 6 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0328ce003992..2d92dd002abd 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -6,7 +6,6 @@ #include #include #include /* pgprot_t */ -#include /* PAGE_KERNEL */ #include struct vm_area_struct; /* vma defining user mapping in mm_types.h */ @@ -83,22 +82,14 @@ extern void *__vmalloc_node_range(unsigned long size, unsigned long align, const void *caller); #ifndef CONFIG_MMU extern void *__vmalloc_node_flags(unsigned long size, int node, gfp_t flags); -#else -extern void *__vmalloc_node(unsigned long size, unsigned long align, - gfp_t gfp_mask, pgprot_t prot, - int node, const void *caller); - -/* - * We really want to have this inlined due to caller tracking. This - * function is used by the highlevel vmalloc apis and so we want to track - * their callers and inlining will achieve that. - */ -static inline void *__vmalloc_node_flags(unsigned long size, - int node, gfp_t flags) +static inline void *__vmalloc_node_flags_caller(unsigned long size, int node, + gfp_t flags, void *caller) { - return __vmalloc_node(size, 1, flags, PAGE_KERNEL, - node, __builtin_return_address(0)); + return __vmalloc_node_flags(size, node, flags); } +#else +extern void *__vmalloc_node_flags_caller(unsigned long size, + int node, gfp_t flags, void *caller); #endif extern void vfree(const void *addr); -- cgit v1.2.3 From 4636e70bb0a8b871998b6841a2e4b205cf2bc863 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Fri, 12 May 2017 15:46:47 -0700 Subject: dax: prevent invalidation of mapped DAX entries Patch series "mm,dax: Fix data corruption due to mmap inconsistency", v4. This series fixes data corruption that can happen for DAX mounts when page faults race with write(2) and as a result page tables get out of sync with block mappings in the filesystem and thus data seen through mmap is different from data seen through read(2). The series passes testing with t_mmap_stale test program from Ross and also other mmap related tests on DAX filesystem. This patch (of 4): dax_invalidate_mapping_entry() currently removes DAX exceptional entries only if they are clean and unlocked. This is done via: invalidate_mapping_pages() invalidate_exceptional_entry() dax_invalidate_mapping_entry() However, for page cache pages removed in invalidate_mapping_pages() there is an additional criteria which is that the page must not be mapped. This is noted in the comments above invalidate_mapping_pages() and is checked in invalidate_inode_page(). For DAX entries this means that we can can end up in a situation where a DAX exceptional entry, either a huge zero page or a regular DAX entry, could end up mapped but without an associated radix tree entry. This is inconsistent with the rest of the DAX code and with what happens in the page cache case. We aren't able to unmap the DAX exceptional entry because according to its comments invalidate_mapping_pages() isn't allowed to block, and unmap_mapping_range() takes a write lock on the mapping->i_mmap_rwsem. Since we essentially never have unmapped DAX entries to evict from the radix tree, just remove dax_invalidate_mapping_entry(). Fixes: c6dcf52c23d2 ("mm: Invalidate DAX radix tree entries only if appropriate") Link: http://lkml.kernel.org/r/20170510085419.27601-2-jack@suse.cz Signed-off-by: Ross Zwisler Signed-off-by: Jan Kara Reported-by: Jan Kara Cc: Dan Williams Cc: [4.10+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dax.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index d3158e74a59e..d1236d16ef00 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -63,7 +63,6 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, const struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); -int dax_invalidate_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); void dax_wake_mapping_entry_waiter(struct address_space *mapping, -- cgit v1.2.3 From f5705aa8cfed142d980ecac12bee0d81b756479e Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 13 May 2017 16:31:05 -0700 Subject: dax, xfs, ext4: compile out iomap-dax paths in the FS_DAX=n case Tetsuo reports: fs/built-in.o: In function `xfs_file_iomap_end': xfs_iomap.c:(.text+0xe0ef9): undefined reference to `put_dax' fs/built-in.o: In function `xfs_file_iomap_begin': xfs_iomap.c:(.text+0xe1a7f): undefined reference to `dax_get_by_host' make: *** [vmlinux] Error 1 $ grep DAX .config CONFIG_DAX=m # CONFIG_DEV_DAX is not set # CONFIG_FS_DAX is not set When FS_DAX=n we can/must throw away the dax code in filesystems. Implement 'fs_' versions of dax_get_by_host() and put_dax() that are nops in the FS_DAX=n case. Cc: Cc: Cc: Jan Kara Cc: "Theodore Ts'o" Cc: "Darrick J. Wong" Cc: Ross Zwisler Tested-by: Tony Luck Fixes: ef51042472f5 ("block, dax: move 'select DAX' from BLOCK to FS_DAX") Reported-by: Tetsuo Handa Signed-off-by: Dan Williams --- include/linux/dax.h | 34 +++++++++++++++++++++++++++------- 1 file changed, 27 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 00ebac854bb7..5ec1f6c47716 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -18,6 +18,20 @@ struct dax_operations { void **, pfn_t *); }; +#if IS_ENABLED(CONFIG_DAX) +struct dax_device *dax_get_by_host(const char *host); +void put_dax(struct dax_device *dax_dev); +#else +static inline struct dax_device *dax_get_by_host(const char *host) +{ + return NULL; +} + +static inline void put_dax(struct dax_device *dax_dev) +{ +} +#endif + int bdev_dax_pgoff(struct block_device *, sector_t, size_t, pgoff_t *pgoff); #if IS_ENABLED(CONFIG_FS_DAX) int __bdev_dax_supported(struct super_block *sb, int blocksize); @@ -25,23 +39,29 @@ static inline int bdev_dax_supported(struct super_block *sb, int blocksize) { return __bdev_dax_supported(sb, blocksize); } + +static inline struct dax_device *fs_dax_get_by_host(const char *host) +{ + return dax_get_by_host(host); +} + +static inline void fs_put_dax(struct dax_device *dax_dev) +{ + put_dax(dax_dev); +} + #else static inline int bdev_dax_supported(struct super_block *sb, int blocksize) { return -EOPNOTSUPP; } -#endif -#if IS_ENABLED(CONFIG_DAX) -struct dax_device *dax_get_by_host(const char *host); -void put_dax(struct dax_device *dax_dev); -#else -static inline struct dax_device *dax_get_by_host(const char *host) +static inline struct dax_device *fs_dax_get_by_host(const char *host) { return NULL; } -static inline void put_dax(struct dax_device *dax_dev) +static inline void fs_put_dax(struct dax_device *dax_dev) { } #endif -- cgit v1.2.3 From 508541146af18e43072e41a31aa62fac2b01aac1 Mon Sep 17 00:00:00 2001 From: Yishai Hadas Date: Tue, 25 Apr 2017 10:39:57 +0300 Subject: net/mlx5: Use underlay QPN from the root name space Root flow table is dynamically changed by the underlying flow steering layer, and IPoIB/ULPs have no idea what will be the root flow table in the future, hence we need a dynamic infrastructure to move Underlay QPs with the root flow table. Fixes: b3ba51498bdd ("net/mlx5: Refactor create flow table method to accept underlay QP") Signed-off-by: Erez Shitrit Signed-off-by: Maor Gottlieb Signed-off-by: Yishai Hadas Signed-off-by: Saeed Mahameed --- include/linux/mlx5/fs.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/fs.h b/include/linux/mlx5/fs.h index 1b166d2e19c5..b25e7baa273e 100644 --- a/include/linux/mlx5/fs.h +++ b/include/linux/mlx5/fs.h @@ -109,7 +109,6 @@ struct mlx5_flow_table_attr { int max_fte; u32 level; u32 flags; - u32 underlay_qpn; }; struct mlx5_flow_table * @@ -167,4 +166,7 @@ struct mlx5_fc *mlx5_fc_create(struct mlx5_core_dev *dev, bool aging); void mlx5_fc_destroy(struct mlx5_core_dev *dev, struct mlx5_fc *counter); void mlx5_fc_query_cached(struct mlx5_fc *counter, u64 *bytes, u64 *packets, u64 *lastuse); +int mlx5_fs_add_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); +int mlx5_fs_remove_rx_underlay_qpn(struct mlx5_core_dev *dev, u32 underlay_qpn); + #endif -- cgit v1.2.3 From 324318f0248c31be8a08984146e7e4dd7cdd091d Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 9 May 2017 16:17:37 -0400 Subject: netfilter: xtables: zero padding in data_to_user When looking up an iptables rule, the iptables binary compares the aligned match and target data (XT_ALIGN). In some cases this can exceed the actual data size to include padding bytes. Before commit f77bc5b23fb1 ("iptables: use match, target and data copy_to_user helpers") the malloc()ed bytes were overwritten by the kernel with kzalloced contents, zeroing the padding and making the comparison succeed. After this patch, the kernel copies and clears only data, leaving the padding bytes undefined. Extend the clear operation from data size to aligned data size to include the padding bytes, if any. Padding bytes can be observed in both match and target, and the bug triggered, by issuing a rule with match icmp and target ACCEPT: iptables -t mangle -A INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT iptables -t mangle -D INPUT -i lo -p icmp --icmp-type 1 -j ACCEPT Fixes: f77bc5b23fb1 ("iptables: use match, target and data copy_to_user helpers") Reported-by: Paul Moore Reported-by: Richard Guy Briggs Signed-off-by: Willem de Bruijn Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index be378cf47fcc..b3044c2c62cb 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -294,7 +294,7 @@ int xt_match_to_user(const struct xt_entry_match *m, int xt_target_to_user(const struct xt_entry_target *t, struct xt_entry_target __user *u); int xt_data_to_user(void __user *dst, const void *src, - int usersize, int size); + int usersize, int size, int aligned_size); void *xt_copy_counters_from_user(const void __user *user, unsigned int len, struct xt_counters_info *info, bool compat); -- cgit v1.2.3 From c953d63548207a085abcb12a15fefc8a11ffdf0a Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Tue, 16 May 2017 09:30:18 +0800 Subject: ebtables: arpreply: Add the standard target sanity check The info->target comes from userspace and it would be used directly. So we need to add the sanity check to make sure it is a valid standard target, although the ebtables tool has already checked it. Kernel needs to validate anything coming from userspace. If the target is set as an evil value, it would break the ebtables and cause a panic. Because the non-standard target is treated as one offset. Now add one helper function ebt_invalid_target, and we would replace the macro INVALID_TARGET later. Signed-off-by: Gao Feng Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index a30efb437e6d..e0cbf17af780 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -125,4 +125,9 @@ extern unsigned int ebt_do_table(struct sk_buff *skb, /* True if the target is not a standard target */ #define INVALID_TARGET (info->target < -NUM_STANDARD_TARGETS || info->target >= 0) +static inline bool ebt_invalid_target(int target) +{ + return (target < -NUM_STANDARD_TARGETS || target >= 0); +} + #endif -- cgit v1.2.3 From 30e7d894c1478c88d50ce94ddcdbd7f9763d9cdd Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 17 May 2017 10:19:49 +0200 Subject: tracing/kprobes: Enforce kprobes teardown after testing Enabling the tracer selftest triggers occasionally the warning in text_poke(), which warns when the to be modified page is not marked reserved. The reason is that the tracer selftest installs kprobes on functions marked __init for testing. These probes are removed after the tests, but that removal schedules the delayed kprobes_optimizer work, which will do the actual text poke. If the work is executed after the init text is freed, then the warning triggers. The bug can be reproduced reliably when the work delay is increased. Flush the optimizer work and wait for the optimizing/unoptimizing lists to become empty before returning from the kprobes tracer selftest. That ensures that all operations which were queued due to the probes removal have completed. Link: http://lkml.kernel.org/r/20170516094802.76a468bb@gandalf.local.home Signed-off-by: Thomas Gleixner Acked-by: Masami Hiramatsu Cc: stable@vger.kernel.org Fixes: 6274de498 ("kprobes: Support delayed unoptimizing") Signed-off-by: Steven Rostedt (VMware) --- include/linux/kprobes.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 30f90c1a0aaf..541df0b5b815 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -349,6 +349,9 @@ extern int proc_kprobes_optimization_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, loff_t *ppos); #endif +extern void wait_for_kprobe_optimizer(void); +#else +static inline void wait_for_kprobe_optimizer(void) { } #endif /* CONFIG_OPTPROBES */ #ifdef CONFIG_KPROBES_ON_FTRACE extern void kprobe_ftrace_handler(unsigned long ip, unsigned long parent_ip, -- cgit v1.2.3 From 6a29beef9d1b16c762e469d77e28c3de3f5c3dbb Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Wed, 17 May 2017 18:32:02 +0300 Subject: usb: host: xhci-ring: don't need to clear interrupt pending for MSI enabled hcd According to xHCI spec Figure 30: Interrupt Throttle Flow Diagram If PCI Message Signaled Interrupts (MSI or MSI-X) are enabled, then the assertion of the Interrupt Pending (IP) flag in Figure 30 generates a PCI Dword write. The IP flag is automatically cleared by the completion of the PCI write. the MSI enabled HCs don't need to clear interrupt pending bit, but hcd->irq = 0 doesn't equal to MSI enabled HCD. At some Dual-role controller software designs, it sets hcd->irq as 0 to avoid HCD requesting interrupt, and they want to decide when to call usb_hcd_irq by software. Signed-off-by: Peter Chen Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index a469999a106d..50398b69ca44 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -148,6 +148,7 @@ struct usb_hcd { unsigned rh_registered:1;/* is root hub registered? */ unsigned rh_pollable:1; /* may we poll the root hub? */ unsigned msix_enabled:1; /* driver has MSI-X enabled? */ + unsigned msi_enabled:1; /* driver has MSI enabled? */ unsigned remove_phy:1; /* auto-remove USB phy */ /* The next flag is a stopgap, to be removed when all the HCDs -- cgit v1.2.3 From 6bdc00d01e202ae11fa1cae0dacbef895434483d Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Fri, 28 Apr 2017 13:47:21 +0200 Subject: serdev: Restore serdev_device_write_buf for atomic context Starting with commit 6fe729c4bdae ("serdev: Add serdev_device_write subroutine") the function serdev_device_write_buf cannot be used in atomic context anymore (mutex_lock is sleeping). So restore the old behavior. Signed-off-by: Stefan Wahren Fixes: 6fe729c4bdae ("serdev: Add serdev_device_write subroutine") Acked-by: Rob Herring Reviewed-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index cda76c6506ca..e2a225bf716d 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -195,6 +195,7 @@ int serdev_device_open(struct serdev_device *); void serdev_device_close(struct serdev_device *); unsigned int serdev_device_set_baudrate(struct serdev_device *, unsigned int); void serdev_device_set_flow_control(struct serdev_device *, bool); +int serdev_device_write_buf(struct serdev_device *, const unsigned char *, size_t); void serdev_device_wait_until_sent(struct serdev_device *, long); int serdev_device_get_tiocm(struct serdev_device *); int serdev_device_set_tiocm(struct serdev_device *, int, int); @@ -236,6 +237,12 @@ static inline unsigned int serdev_device_set_baudrate(struct serdev_device *sdev return 0; } static inline void serdev_device_set_flow_control(struct serdev_device *sdev, bool enable) {} +static inline int serdev_device_write_buf(struct serdev_device *serdev, + const unsigned char *buf, + size_t count) +{ + return -ENODEV; +} static inline void serdev_device_wait_until_sent(struct serdev_device *sdev, long timeout) {} static inline int serdev_device_get_tiocm(struct serdev_device *serdev) { @@ -312,11 +319,4 @@ static inline struct device *serdev_tty_port_register(struct tty_port *port, static inline void serdev_tty_port_unregister(struct tty_port *port) {} #endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */ -static inline int serdev_device_write_buf(struct serdev_device *serdev, - const unsigned char *data, - size_t count) -{ - return serdev_device_write(serdev, data, count, 0); -} - #endif /*_LINUX_SERDEV_H */ -- cgit v1.2.3 From 8cde11b2baa1d02eb2eb955dfd47d9f2a12f12cf Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Thu, 18 May 2017 17:33:00 +0200 Subject: tty/serdev: add serdev registration interface Add a new interface for registering a serdev controller and clients, and a helper function to deregister serdev devices (or a tty device) that were previously registered using the new interface. Once every driver currently using the tty_port_register_device() helpers have been vetted and converted to use the new serdev registration interface (at least for deregistration), we can move serdev registration to the current helpers and get rid of the serdev-specific functions. Reviewed-by: Rob Herring Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 7 +++++-- include/linux/tty.h | 9 +++++++++ 2 files changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index e2a225bf716d..e69402d4a8ae 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -308,7 +308,7 @@ struct tty_driver; struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, struct tty_driver *drv, int idx); -void serdev_tty_port_unregister(struct tty_port *port); +int serdev_tty_port_unregister(struct tty_port *port); #else static inline struct device *serdev_tty_port_register(struct tty_port *port, struct device *parent, @@ -316,7 +316,10 @@ static inline struct device *serdev_tty_port_register(struct tty_port *port, { return ERR_PTR(-ENODEV); } -static inline void serdev_tty_port_unregister(struct tty_port *port) {} +static inline int serdev_tty_port_unregister(struct tty_port *port) +{ + return -ENODEV; +} #endif /* CONFIG_SERIAL_DEV_CTRL_TTYPORT */ #endif /*_LINUX_SERDEV_H */ diff --git a/include/linux/tty.h b/include/linux/tty.h index d07cd2105a6c..eccb4ec30a8a 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -558,6 +558,15 @@ extern struct device *tty_port_register_device_attr(struct tty_port *port, struct tty_driver *driver, unsigned index, struct device *device, void *drvdata, const struct attribute_group **attr_grp); +extern struct device *tty_port_register_device_serdev(struct tty_port *port, + struct tty_driver *driver, unsigned index, + struct device *device); +extern struct device *tty_port_register_device_attr_serdev(struct tty_port *port, + struct tty_driver *driver, unsigned index, + struct device *device, void *drvdata, + const struct attribute_group **attr_grp); +extern void tty_port_unregister_device(struct tty_port *port, + struct tty_driver *driver, unsigned index); extern int tty_port_alloc_xmit_buf(struct tty_port *port); extern void tty_port_free_xmit_buf(struct tty_port *port); extern void tty_port_destroy(struct tty_port *port); -- cgit v1.2.3 From 4123109050a869a8871e58a50f28f383d41e49ad Mon Sep 17 00:00:00 2001 From: James Smart Date: Fri, 5 May 2017 16:13:02 -0700 Subject: nvme-fc: correct port role bits FC Port roles is a bit mask, not individual values. Correct nvme definitions to unique bits. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme-fc-driver.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 0db37158a61d..12e344b5b77f 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -27,8 +27,8 @@ /* FC Port role bitmask - can merge with FC Port Roles in fc transport */ #define FC_PORT_ROLE_NVME_INITIATOR 0x10 -#define FC_PORT_ROLE_NVME_TARGET 0x11 -#define FC_PORT_ROLE_NVME_DISCOVERY 0x12 +#define FC_PORT_ROLE_NVME_TARGET 0x20 +#define FC_PORT_ROLE_NVME_DISCOVERY 0x40 /** -- cgit v1.2.3 From 4b8ba5fa525bc8bdaaed2a5c5433f0f2008d7bc5 Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 25 Apr 2017 16:23:09 -0700 Subject: nvmet-fc: remove target cpu scheduling flag Remove NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED. It's unnecessary. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme-fc-driver.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 12e344b5b77f..6c8c5d8041b7 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -642,15 +642,7 @@ enum { * sequence in one LLDD operation. Errors during Data * sequence transmit must not allow RSP sequence to be sent. */ - NVMET_FCTGTFEAT_NEEDS_CMD_CPUSCHED = (1 << 1), - /* Bit 1: When 0, the LLDD will deliver FCP CMD - * on the CPU it should be affinitized to. Thus work will - * be scheduled on the cpu received on. When 1, the LLDD - * may not deliver the CMD on the CPU it should be worked - * on. The transport should pick a cpu to schedule the work - * on. - */ - NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 2), + NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1), /* Bit 2: When 0, the LLDD is calling the cmd rcv handler * in a non-isr context, allowing the transport to finish * op completion in the calling context. When 1, the LLDD @@ -658,7 +650,7 @@ enum { * requiring the transport to transition to a workqueue * for op completion. */ - NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 3), + NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2), /* Bit 3: When 0, the LLDD is calling the op done handler * in a non-isr context, allowing the transport to finish * op completion in the calling context. When 1, the LLDD -- cgit v1.2.3 From c2372c20425bd75a5527b3e2204059762190f6ca Mon Sep 17 00:00:00 2001 From: Jan Glauber Date: Mon, 22 May 2017 13:09:20 +0200 Subject: of/platform: Make of_platform_device_destroy globally visible of_platform_device_destroy is the counterpart to of_platform_device_create which is a non-static function. After creating a platform device it might be neccessary to destroy it to deal with -EPROBE_DEFER where a repeated of_platform_device_create call would fail otherwise. Therefore also make of_platform_device_destroy globally visible. Signed-off-by: Jan Glauber Acked-by: Rob Herring Signed-off-by: Ulf Hansson --- include/linux/of_platform.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of_platform.h b/include/linux/of_platform.h index dc8224ae28d5..e0d1946270f3 100644 --- a/include/linux/of_platform.h +++ b/include/linux/of_platform.h @@ -64,6 +64,7 @@ extern struct platform_device *of_platform_device_create(struct device_node *np, const char *bus_id, struct device *parent); +extern int of_platform_device_destroy(struct device *dev, void *data); extern int of_platform_bus_probe(struct device_node *root, const struct of_device_id *matches, struct device *parent); -- cgit v1.2.3 From 7254a50a5db40ca6739ddf37e0a45e6912532b2c Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 22 May 2017 23:05:05 +0800 Subject: blk-mq: remove blk_mq_abort_requeue_list() No one uses it any more, so remove it. Reviewed-by: Keith Busch Reviewed-by: Johannes Thumshirn Signed-off-by: Ming Lei Signed-off-by: Christoph Hellwig --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c47aa248c640..fcd641032f8d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -238,7 +238,6 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); -void blk_mq_abort_requeue_list(struct request_queue *q); void blk_mq_complete_request(struct request *rq); bool blk_mq_queue_stopped(struct request_queue *q); -- cgit v1.2.3 From c70d9d809fdeecedb96972457ee45c49a232d97f Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Mon, 22 May 2017 15:40:12 -0500 Subject: ptrace: Properly initialize ptracer_cred on fork When I introduced ptracer_cred I failed to consider the weirdness of fork where the task_struct copies the old value by default. This winds up leaving ptracer_cred set even when a process forks and the child process does not wind up being ptraced. Because ptracer_cred is not set on non-ptraced processes whose parents were ptraced this has broken the ability of the enlightenment window manager to start setuid children. Fix this by properly initializing ptracer_cred in ptrace_init_task This must be done with a little bit of care to preserve the current value of ptracer_cred when ptrace carries through fork. Re-reading the ptracer_cred from the ptracing process at this point is inconsistent with how PT_PTRACE_CAP has been maintained all of these years. Tested-by: Takashi Iwai Fixes: 64b875f7ac8a ("ptrace: Capture the ptracer's creds not PT_PTRACE_CAP") Signed-off-by: "Eric W. Biederman" --- include/linux/ptrace.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptrace.h b/include/linux/ptrace.h index 422bc2e4cb6a..ef3eb8bbfee4 100644 --- a/include/linux/ptrace.h +++ b/include/linux/ptrace.h @@ -54,7 +54,8 @@ extern int ptrace_request(struct task_struct *child, long request, unsigned long addr, unsigned long data); extern void ptrace_notify(int exit_code); extern void __ptrace_link(struct task_struct *child, - struct task_struct *new_parent); + struct task_struct *new_parent, + const struct cred *ptracer_cred); extern void __ptrace_unlink(struct task_struct *child); extern void exit_ptrace(struct task_struct *tracer, struct list_head *dead); #define PTRACE_MODE_READ 0x01 @@ -206,7 +207,7 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) if (unlikely(ptrace) && current->ptrace) { child->ptrace = current->ptrace; - __ptrace_link(child, current->parent); + __ptrace_link(child, current->parent, current->ptracer_cred); if (child->ptrace & PT_SEIZED) task_set_jobctl_pending(child, JOBCTL_TRAP_STOP); @@ -215,6 +216,8 @@ static inline void ptrace_init_task(struct task_struct *child, bool ptrace) set_tsk_thread_flag(child, TIF_SIGPENDING); } + else + child->ptracer_cred = NULL; } /** -- cgit v1.2.3 From 73dd3a4839c1d27c36d4dcc92e1ff44225ecbeb7 Mon Sep 17 00:00:00 2001 From: Mohamad Haj Yahia Date: Thu, 23 Feb 2017 11:19:36 +0200 Subject: net/mlx5: Avoid using pending command interface slots Currently when firmware command gets stuck or it takes long time to complete, the driver command will get timeout and the command slot is freed and can be used for new commands, and if the firmware receive new command on the old busy slot its behavior is unexpected and this could be harmful. To fix this when the driver command gets timeout we return failure, but we don't free the command slot and we wait for the firmware to explicitly respond to that command. Once all the entries are busy we will stop processing new firmware commands. Fixes: 9cba4ebcf374 ('net/mlx5: Fix potential deadlock in command mode change') Signed-off-by: Mohamad Haj Yahia Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index bcdf739ee41a..93273d9ea4d1 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -787,7 +787,12 @@ enum { typedef void (*mlx5_cmd_cbk_t)(int status, void *context); +enum { + MLX5_CMD_ENT_STATE_PENDING_COMP, +}; + struct mlx5_cmd_work_ent { + unsigned long state; struct mlx5_cmd_msg *in; struct mlx5_cmd_msg *out; void *uout; @@ -976,7 +981,7 @@ void mlx5_cq_completion(struct mlx5_core_dev *dev, u32 cqn); void mlx5_rsc_event(struct mlx5_core_dev *dev, u32 rsn, int event_type); void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec, bool forced); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, -- cgit v1.2.3 From 7f65b1f5adc5f8496ca8bec4947de66fefe36220 Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 22 May 2017 14:50:30 +0200 Subject: cdc-ether: divorce initialisation with a filter reset and a generic method Some devices need their multicast filter reset but others are crashed by that. So the methods need to be separated. Signed-off-by: Oliver Neukum Reported-by: "Ridgway, Keith" Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 7dffa5624ea6..97116379db5f 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -206,6 +206,7 @@ struct cdc_state { }; extern int usbnet_generic_cdc_bind(struct usbnet *, struct usb_interface *); +extern int usbnet_ether_cdc_bind(struct usbnet *dev, struct usb_interface *intf); extern int usbnet_cdc_bind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_unbind(struct usbnet *, struct usb_interface *); extern void usbnet_cdc_status(struct usbnet *, struct urb *); -- cgit v1.2.3 From 12e8b570e732eaa5eae3a2895ba3fbcf91bde2b4 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 22 May 2017 20:13:07 +0200 Subject: mlx5: fix bug reading rss_hash_type from CQE Masks for extracting part of the Completion Queue Entry (CQE) field rss_hash_type was swapped, namely CQE_RSS_HTYPE_IP and CQE_RSS_HTYPE_L4. The bug resulted in setting skb->l4_hash, even-though the rss_hash_type indicated that hash was NOT computed over the L4 (UDP or TCP) part of the packet. Added comments from the datasheet, to make it more clear what these masks are selecting. Signed-off-by: Jesper Dangaard Brouer Acked-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index dd9a263ed368..a940ec6a046c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -787,8 +787,14 @@ enum { }; enum { - CQE_RSS_HTYPE_IP = 0x3 << 6, - CQE_RSS_HTYPE_L4 = 0x3 << 2, + CQE_RSS_HTYPE_IP = 0x3 << 2, + /* cqe->rss_hash_type[3:2] - IP destination selected for hash + * (00 = none, 01 = IPv4, 10 = IPv6, 11 = Reserved) + */ + CQE_RSS_HTYPE_L4 = 0x3 << 6, + /* cqe->rss_hash_type[7:6] - L4 destination selected for hash + * (00 = none, 01 = TCP. 10 = UDP, 11 = IPSEC.SPI + */ }; enum { -- cgit v1.2.3 From 6f4dbd149d2a151b89d1a5bbf7530ee5546c7908 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 19 May 2017 11:33:16 +0200 Subject: libceph: use kbasename() and kill ceph_file_part() Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/ceph_debug.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_debug.h b/include/linux/ceph/ceph_debug.h index aa2e19182d99..51c5bd64bd00 100644 --- a/include/linux/ceph/ceph_debug.h +++ b/include/linux/ceph/ceph_debug.h @@ -3,6 +3,8 @@ #define pr_fmt(fmt) KBUILD_MODNAME ": " fmt +#include + #ifdef CONFIG_CEPH_LIB_PRETTYDEBUG /* @@ -12,12 +14,10 @@ */ # if defined(DEBUG) || defined(CONFIG_DYNAMIC_DEBUG) -extern const char *ceph_file_part(const char *s, int len); # define dout(fmt, ...) \ pr_debug("%.*s %12.12s:%-4d : " fmt, \ 8 - (int)sizeof(KBUILD_MODNAME), " ", \ - ceph_file_part(__FILE__, sizeof(__FILE__)), \ - __LINE__, ##__VA_ARGS__) + kbasename(__FILE__), __LINE__, ##__VA_ARGS__) # else /* faux printk call just to see any compiler warnings. */ # define dout(fmt, ...) do { \ -- cgit v1.2.3 From 4d071c3238987325b9e50e33051a40d1cce311cc Mon Sep 17 00:00:00 2001 From: Imre Deak Date: Tue, 23 May 2017 14:18:17 -0500 Subject: PCI/PM: Add needs_resume flag to avoid suspend complete optimization Some drivers - like i915 - may not support the system suspend direct complete optimization due to differences in their runtime and system suspend sequence. Add a flag that when set resumes the device before calling the driver's system suspend handlers which effectively disables the optimization. Needed by a future patch fixing suspend/resume on i915. Suggested by Rafael. Signed-off-by: Imre Deak Signed-off-by: Bjorn Helgaas Acked-by: Rafael J. Wysocki Cc: stable@vger.kernel.org --- include/linux/pci.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 33c2b0b77429..df7dd9021646 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -183,6 +183,11 @@ enum pci_dev_flags { PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9), /* Do not use FLR even if device advertises PCI_AF_CAP */ PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), + /* + * Resume before calling the driver's system suspend hooks, disabling + * the direct_complete optimization. + */ + PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11), }; enum pci_irq_reroute_variant { -- cgit v1.2.3 From 35d2f80b07bbe03fb358afb0bdeff7437a7d67ff Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Tue, 23 May 2017 13:38:41 -0400 Subject: vlan: Fix tcp checksum offloads in Q-in-Q vlans It appears that TCP checksum offloading has been broken for Q-in-Q vlans. The behavior was execerbated by the series commit afb0bc972b52 ("Merge branch 'stacked_vlan_tso'") that that enabled accleleration features on stacked vlans. However, event without that series, it is possible to trigger this issue. It just requires a lot more specialized configuration. The root cause is the interaction between how netdev_intersect_features() works, the features actually set on the vlan devices and HW having the ability to run checksum with longer headers. The issue starts when netdev_interesect_features() replaces NETIF_F_HW_CSUM with a combination of NETIF_F_IP_CSUM | NETIF_F_IPV6_CSUM, if the HW advertises IP|IPV6 specific checksums. This happens for tagged and multi-tagged packets. However, HW that enables IP|IPV6 checksum offloading doesn't gurantee that packets with arbitrarily long headers can be checksummed. This patch disables IP|IPV6 checksums on the packet for multi-tagged packets. CC: Toshiaki Makita CC: Michal Kubecek Signed-off-by: Vladislav Yasevich Acked-by: Toshiaki Makita Signed-off-by: David S. Miller --- include/linux/if_vlan.h | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_vlan.h b/include/linux/if_vlan.h index 8d5fcd6284ce..283dc2f5364d 100644 --- a/include/linux/if_vlan.h +++ b/include/linux/if_vlan.h @@ -614,14 +614,16 @@ static inline bool skb_vlan_tagged_multi(const struct sk_buff *skb) static inline netdev_features_t vlan_features_check(const struct sk_buff *skb, netdev_features_t features) { - if (skb_vlan_tagged_multi(skb)) - features = netdev_intersect_features(features, - NETIF_F_SG | - NETIF_F_HIGHDMA | - NETIF_F_FRAGLIST | - NETIF_F_HW_CSUM | - NETIF_F_HW_VLAN_CTAG_TX | - NETIF_F_HW_VLAN_STAG_TX); + if (skb_vlan_tagged_multi(skb)) { + /* In the case of multi-tagged packets, use a direct mask + * instead of using netdev_interesect_features(), to make + * sure that only devices supporting NETIF_F_HW_CSUM will + * have checksum offloading support. + */ + features &= NETIF_F_SG | NETIF_F_HIGHDMA | NETIF_F_HW_CSUM | + NETIF_F_FRAGLIST | NETIF_F_HW_VLAN_CTAG_TX | + NETIF_F_HW_VLAN_STAG_TX; + } return features; } -- cgit v1.2.3 From 614d0d77b49a9b131e58b77473698ab5b2c525b7 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 25 May 2017 01:05:09 +0200 Subject: bpf: add various verifier test cases This patch adds various verifier test cases: 1) A test case for the pruning issue when tracking alignment is used. 2) Various PTR_TO_MAP_VALUE_OR_NULL tests to make sure pointer arithmetic turns such register into UNKNOWN_VALUE type. 3) Test cases for the special treatment of LD_ABS/LD_IND to make sure verifier doesn't break calling convention here. Latter is needed, since f.e. arm64 JIT uses r1 - r5 for storing temporary data, so they really must be marked as NOT_INIT. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 56197f82af45..62d948f80730 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -272,6 +272,16 @@ struct bpf_prog_aux; .off = OFF, \ .imm = IMM }) +/* Unconditional jumps, goto pc + off16 */ + +#define BPF_JMP_A(OFF) \ + ((struct bpf_insn) { \ + .code = BPF_JMP | BPF_JA, \ + .dst_reg = 0, \ + .src_reg = 0, \ + .off = OFF, \ + .imm = 0 }) + /* Function call */ #define BPF_EMIT_CALL(FUNC) \ -- cgit v1.2.3 From 83b4605b0c16cde5b00c8cf192408d51eab75402 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 20 May 2017 18:59:54 +0200 Subject: PCI/msi: fix the pci_alloc_irq_vectors_affinity stub We need to return an error for any call that asks for MSI / MSI-X vectors only, so that non-trivial fallback logic can work properly. Also valid dev->irq and use the "correct" errno value based on feedback from Linus. Signed-off-by: Christoph Hellwig Reported-by: Steven Rostedt Fixes: aff17164 ("PCI: Provide sensible IRQ vector alloc/free routines") Signed-off-by: Linus Torvalds --- include/linux/pci.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 33c2b0b77429..fc2e832d7b9c 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1342,9 +1342,9 @@ pci_alloc_irq_vectors_affinity(struct pci_dev *dev, unsigned int min_vecs, unsigned int max_vecs, unsigned int flags, const struct irq_affinity *aff_desc) { - if (min_vecs > 1) - return -EINVAL; - return 1; + if ((flags & PCI_IRQ_LEGACY) && min_vecs == 1 && dev->irq) + return 1; + return -ENOSPC; } static inline void pci_free_irq_vectors(struct pci_dev *dev) -- cgit v1.2.3