From 780b1350d316fda28d85fcae17854c778d89cbbe Mon Sep 17 00:00:00 2001 From: Ramesh Shanmugasundaram Date: Mon, 3 Jul 2017 12:04:21 +0100 Subject: regmap: Avoid namespace collision within macro & tidy up Renamed variable "timeout" to "__timeout" & "pollret" to "__ret" to avoid namespace collision. Tidy up macro arguments with parentheses. Signed-off-by: Ramesh Shanmugasundaram Signed-off-by: Mark Brown --- include/linux/regmap.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index e88649225a60..6e1df5e721a9 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -120,23 +120,24 @@ struct reg_sequence { */ #define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \ ({ \ - ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ - int pollret; \ + ktime_t __timeout = ktime_add_us(ktime_get(), timeout_us); \ + int __ret; \ might_sleep_if(sleep_us); \ for (;;) { \ - pollret = regmap_read((map), (addr), &(val)); \ - if (pollret) \ + __ret = regmap_read((map), (addr), &(val)); \ + if (__ret) \ break; \ if (cond) \ break; \ - if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ - pollret = regmap_read((map), (addr), &(val)); \ + if ((timeout_us) && \ + ktime_compare(ktime_get(), __timeout) > 0) { \ + __ret = regmap_read((map), (addr), &(val)); \ break; \ } \ if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ + usleep_range(((sleep_us) >> 2) + 1, sleep_us); \ } \ - pollret ?: ((cond) ? 0 : -ETIMEDOUT); \ + __ret ?: ((cond) ? 0 : -ETIMEDOUT); \ }) #ifdef CONFIG_REGMAP -- cgit v1.2.3 From 300238cecede382fbd9084499e57f3b30acc75fa Mon Sep 17 00:00:00 2001 From: Gustavo Padovan Date: Mon, 31 Jul 2017 16:36:55 -0300 Subject: dma-buf/sync_file: document flags field Documentation for it was missing. Signed-off-by: Gustavo Padovan Acked-by: Sumit Semwal Link: https://patchwork.freedesktop.org/patch/msgid/20170731193655.6176-1-gustavo@padovan.org --- include/linux/sync_file.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sync_file.h b/include/linux/sync_file.h index 0ad87c434ae6..790ca021203a 100644 --- a/include/linux/sync_file.h +++ b/include/linux/sync_file.h @@ -25,8 +25,12 @@ * @file: file representing this fence * @sync_file_list: membership in global file list * @wq: wait queue for fence signaling + * @flags: flags for the sync_file * @fence: fence with the fences in the sync_file * @cb: fence callback information + * + * flags: + * POLL_ENABLED: whether userspace is currently poll()'ing or not */ struct sync_file { struct file *file; -- cgit v1.2.3 From 63b19547cc3d96041d7bc7ab8de6292b0ebaf2c9 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 23 Jul 2017 17:25:43 +0100 Subject: iio: Use macro magic to avoid manual assign of driver_module Starting point in boiler plate reduction similar to that done for many similar cases elsewhere in the kernel. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/iio.h | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index d68bec297a45..97a014300947 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -518,6 +518,7 @@ struct iio_buffer_setup_ops { /** * struct iio_dev - industrial I/O device * @id: [INTERN] used to identify device internally + * @driver_module: [INTERN] used to make it harder to undercut users * @modes: [DRIVER] operating modes supported by device * @currentmode: [DRIVER] current operating mode * @dev: [DRIVER] device structure, should be assigned a parent @@ -558,6 +559,7 @@ struct iio_buffer_setup_ops { */ struct iio_dev { int id; + struct module *driver_module; int modes; int currentmode; @@ -604,9 +606,34 @@ struct iio_dev { const struct iio_chan_spec *iio_find_channel_from_si(struct iio_dev *indio_dev, int si); -int iio_device_register(struct iio_dev *indio_dev); +/** + * iio_device_register() - register a device with the IIO subsystem + * @indio_dev: Device structure filled by the device driver + **/ +#define iio_device_register(iio_dev) \ + __iio_device_register((iio_dev), THIS_MODULE) +int __iio_device_register(struct iio_dev *indio_dev, struct module *this_mod); void iio_device_unregister(struct iio_dev *indio_dev); -int devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev); +/** + * devm_iio_device_register - Resource-managed iio_device_register() + * @dev: Device to allocate iio_dev for + * @indio_dev: Device structure filled by the device driver + * + * Managed iio_device_register. The IIO device registered with this + * function is automatically unregistered on driver detach. This function + * calls iio_device_register() internally. Refer to that function for more + * information. + * + * If an iio_dev registered with this function needs to be unregistered + * separately, devm_iio_device_unregister() must be used. + * + * RETURNS: + * 0 on success, negative error number on failure. + */ +#define devm_iio_device_register(dev, indio_dev) \ + __devm_iio_device_register((dev), (indio_dev), THIS_MODULE); +int __devm_iio_device_register(struct device *dev, struct iio_dev *indio_dev, + struct module *this_mod); void devm_iio_device_unregister(struct device *dev, struct iio_dev *indio_dev); int iio_push_event(struct iio_dev *indio_dev, u64 ev_code, s64 timestamp); int iio_device_claim_direct_mode(struct iio_dev *indio_dev); -- cgit v1.2.3 From 035c70aeb64b861aa88a666c61fda8b2ae49aeae Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 23 Jul 2017 17:25:44 +0100 Subject: iio: triggers: Use macros to avoid boilerplate assignment of owner. This trig_ops.owner assignment occurs in all trigger drivers and can be simply automated using a macro as has been done in many other places in the kernel. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/trigger.h | 19 +++++++++++++------ 1 file changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index ea08302f2d7b..999793212b40 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -62,6 +62,7 @@ struct iio_trigger_ops { **/ struct iio_trigger { const struct iio_trigger_ops *ops; + struct module *owner; int id; const char *name; struct device dev; @@ -87,14 +88,14 @@ static inline struct iio_trigger *to_iio_trigger(struct device *d) static inline void iio_trigger_put(struct iio_trigger *trig) { - module_put(trig->ops->owner); + module_put(trig->owner); put_device(&trig->dev); } static inline struct iio_trigger *iio_trigger_get(struct iio_trigger *trig) { get_device(&trig->dev); - __module_get(trig->ops->owner); + __module_get(trig->owner); return trig; } @@ -127,10 +128,16 @@ static inline void *iio_trigger_get_drvdata(struct iio_trigger *trig) * iio_trigger_register() - register a trigger with the IIO core * @trig_info: trigger to be registered **/ -int iio_trigger_register(struct iio_trigger *trig_info); - -int devm_iio_trigger_register(struct device *dev, - struct iio_trigger *trig_info); +#define iio_trigger_register(trig_info) \ + __iio_trigger_register((trig_info), THIS_MODULE) +int __iio_trigger_register(struct iio_trigger *trig_info, + struct module *this_mod); + +#define devm_iio_trigger_register(dev, trig_info) \ + __devm_iio_trigger_register((dev), (trig_info), THIS_MODULE) +int __devm_iio_trigger_register(struct device *dev, + struct iio_trigger *trig_info, + struct module *this_mod); /** * iio_trigger_unregister() - unregister a trigger from the core -- cgit v1.2.3 From 97623c0a80a605ef3fae337081ed008796bf8cc2 Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Sun, 23 Jul 2017 17:26:21 +0100 Subject: iio: drop iio_info.driver_module and iio_trigger_ops.owner. The equivalents are now assigned automatically in the relevant registration calls and so are not needed in these operations structures. Signed-off-by: Jonathan Cameron Reviewed-by: Lars-Peter Clausen --- include/linux/iio/iio.h | 3 --- include/linux/iio/trigger.h | 2 -- 2 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 97a014300947..486ffbb1a926 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -369,8 +369,6 @@ struct iio_dev; /** * struct iio_info - constant information about device - * @driver_module: module structure used to ensure correct - * ownership of chrdevs etc * @event_attrs: event control attributes * @attrs: general purpose device attributes * @read_raw: function to request a value from the device. @@ -425,7 +423,6 @@ struct iio_dev; * were flushed and there was an error. **/ struct iio_info { - struct module *driver_module; const struct attribute_group *event_attrs; const struct attribute_group *attrs; diff --git a/include/linux/iio/trigger.h b/include/linux/iio/trigger.h index 999793212b40..1afe349af1fa 100644 --- a/include/linux/iio/trigger.h +++ b/include/linux/iio/trigger.h @@ -23,7 +23,6 @@ struct iio_trigger; /** * struct iio_trigger_ops - operations structure for an iio_trigger. - * @owner: used to monitor usage count of the trigger. * @set_trigger_state: switch on/off the trigger on demand * @try_reenable: function to reenable the trigger when the * use count is zero (may be NULL) @@ -34,7 +33,6 @@ struct iio_trigger; * instances of a given device. **/ struct iio_trigger_ops { - struct module *owner; int (*set_trigger_state)(struct iio_trigger *trig, bool state); int (*try_reenable)(struct iio_trigger *trig); int (*validate_device)(struct iio_trigger *trig, -- cgit v1.2.3 From c4860ad60564838994b74e7ee7dd12ceeda0f520 Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Mon, 31 Jul 2017 19:55:08 +0100 Subject: lib/scatterlist: Fix offset type in sg_alloc_table_from_pages Scatterlist entries have an unsigned int for the offset so correct the sg_alloc_table_from_pages function accordingly. Since these are offsets withing a page, unsigned int is wide enough. Also converts callers which were using unsigned long locally with the lower_32_bits annotation to make it explicitly clear what is happening. v2: Use offset_in_page. (Chris Wilson) Signed-off-by: Tvrtko Ursulin Cc: Masahiro Yamada Cc: Pawel Osciak Cc: Marek Szyprowski Cc: Kyungmin Park Cc: Tomasz Stanislawski Cc: Matt Porter Cc: Alexandre Bounine Cc: linux-media@vger.kernel.org Cc: linux-kernel@vger.kernel.org Acked-by: Marek Szyprowski (v1) Reviewed-by: Chris Wilson Reviewed-by: Mauro Carvalho Chehab Link: https://patchwork.freedesktop.org/patch/msgid/20170731185512.20010-1-tvrtko.ursulin@linux.intel.com --- include/linux/scatterlist.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 4b3286ac60c8..205aefb4ed93 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -263,7 +263,7 @@ int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, unsigned int n_pages, - unsigned long offset, unsigned long size, + unsigned int offset, unsigned long size, gfp_t gfp_mask); size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, -- cgit v1.2.3 From c125906b839b794c580a5de911de65bd2c63aaee Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 3 Aug 2017 10:13:12 +0100 Subject: lib/scatterlist: Avoid potential scatterlist entry overflow Since the scatterlist length field is an unsigned int, make sure that sg_alloc_table_from_pages does not overflow it while coalescing pages to a single entry. v2: Drop reference to future use. Use UINT_MAX. v3: max_segment must be page aligned. v4: Do not rely on compiler to optimise out the rounddown. (Joonas Lahtinen) v5: Simplified loops and use post-increments rather than pre-increments. Use PAGE_MASK and fix comment typo. (Andy Shevchenko) v6: Commit spelling fix. Signed-off-by: Tvrtko Ursulin Cc: Masahiro Yamada Cc: linux-kernel@vger.kernel.org Reviewed-by: Chris Wilson Cc: Joonas Lahtinen Cc: Andy Shevchenko Link: https://patchwork.freedesktop.org/patch/msgid/20170803091312.22875-1-tvrtko.ursulin@linux.intel.com --- include/linux/scatterlist.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 205aefb4ed93..6dd2ddbc6230 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -20,6 +20,12 @@ struct scatterlist { #endif }; +/* + * Since the above length field is an unsigned int, below we define the maximum + * length in bytes that can be stored in one scatterlist entry. + */ +#define SCATTERLIST_MAX_SEGMENT (UINT_MAX & PAGE_MASK) + /* * These macros should be used after a dma_map_sg call has been done * to get bus addresses of each of the SG entries and their lengths. -- cgit v1.2.3 From 89d8589cd72c6f48b19c370517d16f3ee23909df Mon Sep 17 00:00:00 2001 From: Tvrtko Ursulin Date: Thu, 3 Aug 2017 10:13:51 +0100 Subject: lib/scatterlist: Introduce and export __sg_alloc_table_from_pages Drivers like i915 benefit from being able to control the maxium size of the sg coalesced segment while building the scatter- gather list. Introduce and export the __sg_alloc_table_from_pages function which will allow it that control. v2: Reorder parameters. (Chris Wilson) v3: Fix incomplete reordering in v2. v4: max_segment needs to be page aligned. v5: Rebase. v6: Rebase. v7: Fix spelling in commit and mention max segment size in __sg_alloc_table_from_pages kerneldoc. (Andrew Morton) Signed-off-by: Tvrtko Ursulin Cc: Masahiro Yamada Cc: linux-kernel@vger.kernel.org Cc: Chris Wilson Reviewed-by: Chris Wilson Cc: Joonas Lahtinen Cc: Andrew Morton Link: https://patchwork.freedesktop.org/patch/msgid/20170803091351.23594-1-tvrtko.ursulin@linux.intel.com --- include/linux/scatterlist.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scatterlist.h b/include/linux/scatterlist.h index 6dd2ddbc6230..874b50c232de 100644 --- a/include/linux/scatterlist.h +++ b/include/linux/scatterlist.h @@ -267,10 +267,13 @@ void sg_free_table(struct sg_table *); int __sg_alloc_table(struct sg_table *, unsigned int, unsigned int, struct scatterlist *, gfp_t, sg_alloc_fn *); int sg_alloc_table(struct sg_table *, unsigned int, gfp_t); -int sg_alloc_table_from_pages(struct sg_table *sgt, - struct page **pages, unsigned int n_pages, - unsigned int offset, unsigned long size, - gfp_t gfp_mask); +int __sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, + unsigned int n_pages, unsigned int offset, + unsigned long size, unsigned int max_segment, + gfp_t gfp_mask); +int sg_alloc_table_from_pages(struct sg_table *sgt, struct page **pages, + unsigned int n_pages, unsigned int offset, + unsigned long size, gfp_t gfp_mask); size_t sg_copy_buffer(struct scatterlist *sgl, unsigned int nents, void *buf, size_t buflen, off_t skip, bool to_buffer); -- cgit v1.2.3 From 439e7271dc2b63de379e37971dc2f64d71e24f8a Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Thu, 31 Aug 2017 16:37:41 -0400 Subject: livepatch: introduce shadow variable API Add exported API for livepatch modules: klp_shadow_get() klp_shadow_alloc() klp_shadow_get_or_alloc() klp_shadow_free() klp_shadow_free_all() that implement "shadow" variables, which allow callers to associate new shadow fields to existing data structures. This is intended to be used by livepatch modules seeking to emulate additions to data structure definitions. See Documentation/livepatch/shadow-vars.txt for a summary of the new shadow variable API, including a few common use cases. See samples/livepatch/livepatch-shadow-* for example modules that demonstrate shadow variables. [jkosina@suse.cz: fix __klp_shadow_get_or_alloc() comment as spotted by Josh] Signed-off-by: Joe Lawrence Acked-by: Josh Poimboeuf Acked-by: Miroslav Benes Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index 194991ef9347..d08eddc00497 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -164,6 +164,14 @@ static inline bool klp_have_reliable_stack(void) IS_ENABLED(CONFIG_HAVE_RELIABLE_STACKTRACE); } +void *klp_shadow_get(void *obj, unsigned long id); +void *klp_shadow_alloc(void *obj, unsigned long id, void *data, + size_t size, gfp_t gfp_flags); +void *klp_shadow_get_or_alloc(void *obj, unsigned long id, void *data, + size_t size, gfp_t gfp_flags); +void klp_shadow_free(void *obj, unsigned long id); +void klp_shadow_free_all(unsigned long id); + #else /* !CONFIG_LIVEPATCH */ static inline int klp_module_coming(struct module *mod) { return 0; } -- cgit v1.2.3 From f3ae7d9155c79bb8f97ca3ff61ea979dec402952 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 5 Sep 2017 16:43:49 +0200 Subject: dmaengine: xilinx_dma: Move enum xdma_ip_type to driver file The enum xdma_ip_type is only used inside the Xilinx DMA driver and not exported to any consumers (nor should it be). So move it from the global header to driver file itself. Signed-off-by: Lars-Peter Clausen Acked-by: Michal Simek Signed-off-by: Vinod Koul --- include/linux/dma/xilinx_dma.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma/xilinx_dma.h b/include/linux/dma/xilinx_dma.h index 3ae300052553..34b98f276ed0 100644 --- a/include/linux/dma/xilinx_dma.h +++ b/include/linux/dma/xilinx_dma.h @@ -41,20 +41,6 @@ struct xilinx_vdma_config { int ext_fsync; }; -/** - * enum xdma_ip_type: DMA IP type. - * - * XDMA_TYPE_AXIDMA: Axi dma ip. - * XDMA_TYPE_CDMA: Axi cdma ip. - * XDMA_TYPE_VDMA: Axi vdma ip. - * - */ -enum xdma_ip_type { - XDMA_TYPE_AXIDMA = 0, - XDMA_TYPE_CDMA, - XDMA_TYPE_VDMA, -}; - int xilinx_vdma_channel_set_config(struct dma_chan *dchan, struct xilinx_vdma_config *cfg); -- cgit v1.2.3 From 4b4e02c83167dca260e6bf974809979d44694e19 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Mon, 11 Sep 2017 20:32:07 -0700 Subject: typec: tcpm: Move out of staging Move tcpm (USB Type-C Port Manager) out of staging. Signed-off-by: Guenter Roeck Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/pd.h | 298 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/usb/pd_bdo.h | 31 +++++ include/linux/usb/pd_vdo.h | 251 ++++++++++++++++++++++++++++++++++++++ include/linux/usb/tcpm.h | 204 +++++++++++++++++++++++++++++++ 4 files changed, 784 insertions(+) create mode 100644 include/linux/usb/pd.h create mode 100644 include/linux/usb/pd_bdo.h create mode 100644 include/linux/usb/pd_vdo.h create mode 100644 include/linux/usb/tcpm.h (limited to 'include/linux') diff --git a/include/linux/usb/pd.h b/include/linux/usb/pd.h new file mode 100644 index 000000000000..e00051ced806 --- /dev/null +++ b/include/linux/usb/pd.h @@ -0,0 +1,298 @@ +/* + * Copyright 2015-2017 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_USB_PD_H +#define __LINUX_USB_PD_H + +#include +#include + +/* USB PD Messages */ +enum pd_ctrl_msg_type { + /* 0 Reserved */ + PD_CTRL_GOOD_CRC = 1, + PD_CTRL_GOTO_MIN = 2, + PD_CTRL_ACCEPT = 3, + PD_CTRL_REJECT = 4, + PD_CTRL_PING = 5, + PD_CTRL_PS_RDY = 6, + PD_CTRL_GET_SOURCE_CAP = 7, + PD_CTRL_GET_SINK_CAP = 8, + PD_CTRL_DR_SWAP = 9, + PD_CTRL_PR_SWAP = 10, + PD_CTRL_VCONN_SWAP = 11, + PD_CTRL_WAIT = 12, + PD_CTRL_SOFT_RESET = 13, + /* 14-15 Reserved */ +}; + +enum pd_data_msg_type { + /* 0 Reserved */ + PD_DATA_SOURCE_CAP = 1, + PD_DATA_REQUEST = 2, + PD_DATA_BIST = 3, + PD_DATA_SINK_CAP = 4, + /* 5-14 Reserved */ + PD_DATA_VENDOR_DEF = 15, +}; + +#define PD_REV10 0x0 +#define PD_REV20 0x1 + +#define PD_HEADER_CNT_SHIFT 12 +#define PD_HEADER_CNT_MASK 0x7 +#define PD_HEADER_ID_SHIFT 9 +#define PD_HEADER_ID_MASK 0x7 +#define PD_HEADER_PWR_ROLE BIT(8) +#define PD_HEADER_REV_SHIFT 6 +#define PD_HEADER_REV_MASK 0x3 +#define PD_HEADER_DATA_ROLE BIT(5) +#define PD_HEADER_TYPE_SHIFT 0 +#define PD_HEADER_TYPE_MASK 0xf + +#define PD_HEADER(type, pwr, data, id, cnt) \ + ((((type) & PD_HEADER_TYPE_MASK) << PD_HEADER_TYPE_SHIFT) | \ + ((pwr) == TYPEC_SOURCE ? PD_HEADER_PWR_ROLE : 0) | \ + ((data) == TYPEC_HOST ? PD_HEADER_DATA_ROLE : 0) | \ + (PD_REV20 << PD_HEADER_REV_SHIFT) | \ + (((id) & PD_HEADER_ID_MASK) << PD_HEADER_ID_SHIFT) | \ + (((cnt) & PD_HEADER_CNT_MASK) << PD_HEADER_CNT_SHIFT)) + +#define PD_HEADER_LE(type, pwr, data, id, cnt) \ + cpu_to_le16(PD_HEADER((type), (pwr), (data), (id), (cnt))) + +static inline unsigned int pd_header_cnt(u16 header) +{ + return (header >> PD_HEADER_CNT_SHIFT) & PD_HEADER_CNT_MASK; +} + +static inline unsigned int pd_header_cnt_le(__le16 header) +{ + return pd_header_cnt(le16_to_cpu(header)); +} + +static inline unsigned int pd_header_type(u16 header) +{ + return (header >> PD_HEADER_TYPE_SHIFT) & PD_HEADER_TYPE_MASK; +} + +static inline unsigned int pd_header_type_le(__le16 header) +{ + return pd_header_type(le16_to_cpu(header)); +} + +static inline unsigned int pd_header_msgid(u16 header) +{ + return (header >> PD_HEADER_ID_SHIFT) & PD_HEADER_ID_MASK; +} + +static inline unsigned int pd_header_msgid_le(__le16 header) +{ + return pd_header_msgid(le16_to_cpu(header)); +} + +#define PD_MAX_PAYLOAD 7 + +/** + * struct pd_message - PD message as seen on wire + * @header: PD message header + * @payload: PD message payload + */ +struct pd_message { + __le16 header; + __le32 payload[PD_MAX_PAYLOAD]; +} __packed; + +/* PDO: Power Data Object */ +#define PDO_MAX_OBJECTS 7 + +enum pd_pdo_type { + PDO_TYPE_FIXED = 0, + PDO_TYPE_BATT = 1, + PDO_TYPE_VAR = 2, +}; + +#define PDO_TYPE_SHIFT 30 +#define PDO_TYPE_MASK 0x3 + +#define PDO_TYPE(t) ((t) << PDO_TYPE_SHIFT) + +#define PDO_VOLT_MASK 0x3ff +#define PDO_CURR_MASK 0x3ff +#define PDO_PWR_MASK 0x3ff + +#define PDO_FIXED_DUAL_ROLE BIT(29) /* Power role swap supported */ +#define PDO_FIXED_SUSPEND BIT(28) /* USB Suspend supported (Source) */ +#define PDO_FIXED_HIGHER_CAP BIT(28) /* Requires more than vSafe5V (Sink) */ +#define PDO_FIXED_EXTPOWER BIT(27) /* Externally powered */ +#define PDO_FIXED_USB_COMM BIT(26) /* USB communications capable */ +#define PDO_FIXED_DATA_SWAP BIT(25) /* Data role swap supported */ +#define PDO_FIXED_VOLT_SHIFT 10 /* 50mV units */ +#define PDO_FIXED_CURR_SHIFT 0 /* 10mA units */ + +#define PDO_FIXED_VOLT(mv) ((((mv) / 50) & PDO_VOLT_MASK) << PDO_FIXED_VOLT_SHIFT) +#define PDO_FIXED_CURR(ma) ((((ma) / 10) & PDO_CURR_MASK) << PDO_FIXED_CURR_SHIFT) + +#define PDO_FIXED(mv, ma, flags) \ + (PDO_TYPE(PDO_TYPE_FIXED) | (flags) | \ + PDO_FIXED_VOLT(mv) | PDO_FIXED_CURR(ma)) + +#define PDO_BATT_MAX_VOLT_SHIFT 20 /* 50mV units */ +#define PDO_BATT_MIN_VOLT_SHIFT 10 /* 50mV units */ +#define PDO_BATT_MAX_PWR_SHIFT 0 /* 250mW units */ + +#define PDO_BATT_MIN_VOLT(mv) ((((mv) / 50) & PDO_VOLT_MASK) << PDO_BATT_MIN_VOLT_SHIFT) +#define PDO_BATT_MAX_VOLT(mv) ((((mv) / 50) & PDO_VOLT_MASK) << PDO_BATT_MAX_VOLT_SHIFT) +#define PDO_BATT_MAX_POWER(mw) ((((mw) / 250) & PDO_PWR_MASK) << PDO_BATT_MAX_PWR_SHIFT) + +#define PDO_BATT(min_mv, max_mv, max_mw) \ + (PDO_TYPE(PDO_TYPE_BATT) | PDO_BATT_MIN_VOLT(min_mv) | \ + PDO_BATT_MAX_VOLT(max_mv) | PDO_BATT_MAX_POWER(max_mw)) + +#define PDO_VAR_MAX_VOLT_SHIFT 20 /* 50mV units */ +#define PDO_VAR_MIN_VOLT_SHIFT 10 /* 50mV units */ +#define PDO_VAR_MAX_CURR_SHIFT 0 /* 10mA units */ + +#define PDO_VAR_MIN_VOLT(mv) ((((mv) / 50) & PDO_VOLT_MASK) << PDO_VAR_MIN_VOLT_SHIFT) +#define PDO_VAR_MAX_VOLT(mv) ((((mv) / 50) & PDO_VOLT_MASK) << PDO_VAR_MAX_VOLT_SHIFT) +#define PDO_VAR_MAX_CURR(ma) ((((ma) / 10) & PDO_CURR_MASK) << PDO_VAR_MAX_CURR_SHIFT) + +#define PDO_VAR(min_mv, max_mv, max_ma) \ + (PDO_TYPE(PDO_TYPE_VAR) | PDO_VAR_MIN_VOLT(min_mv) | \ + PDO_VAR_MAX_VOLT(max_mv) | PDO_VAR_MAX_CURR(max_ma)) + +static inline enum pd_pdo_type pdo_type(u32 pdo) +{ + return (pdo >> PDO_TYPE_SHIFT) & PDO_TYPE_MASK; +} + +static inline unsigned int pdo_fixed_voltage(u32 pdo) +{ + return ((pdo >> PDO_FIXED_VOLT_SHIFT) & PDO_VOLT_MASK) * 50; +} + +static inline unsigned int pdo_min_voltage(u32 pdo) +{ + return ((pdo >> PDO_VAR_MIN_VOLT_SHIFT) & PDO_VOLT_MASK) * 50; +} + +static inline unsigned int pdo_max_voltage(u32 pdo) +{ + return ((pdo >> PDO_VAR_MAX_VOLT_SHIFT) & PDO_VOLT_MASK) * 50; +} + +static inline unsigned int pdo_max_current(u32 pdo) +{ + return ((pdo >> PDO_VAR_MAX_CURR_SHIFT) & PDO_CURR_MASK) * 10; +} + +static inline unsigned int pdo_max_power(u32 pdo) +{ + return ((pdo >> PDO_BATT_MAX_PWR_SHIFT) & PDO_PWR_MASK) * 250; +} + +/* RDO: Request Data Object */ +#define RDO_OBJ_POS_SHIFT 28 +#define RDO_OBJ_POS_MASK 0x7 +#define RDO_GIVE_BACK BIT(27) /* Supports reduced operating current */ +#define RDO_CAP_MISMATCH BIT(26) /* Not satisfied by source caps */ +#define RDO_USB_COMM BIT(25) /* USB communications capable */ +#define RDO_NO_SUSPEND BIT(24) /* USB Suspend not supported */ + +#define RDO_PWR_MASK 0x3ff +#define RDO_CURR_MASK 0x3ff + +#define RDO_FIXED_OP_CURR_SHIFT 10 +#define RDO_FIXED_MAX_CURR_SHIFT 0 + +#define RDO_OBJ(idx) (((idx) & RDO_OBJ_POS_MASK) << RDO_OBJ_POS_SHIFT) + +#define PDO_FIXED_OP_CURR(ma) ((((ma) / 10) & RDO_CURR_MASK) << RDO_FIXED_OP_CURR_SHIFT) +#define PDO_FIXED_MAX_CURR(ma) ((((ma) / 10) & RDO_CURR_MASK) << RDO_FIXED_MAX_CURR_SHIFT) + +#define RDO_FIXED(idx, op_ma, max_ma, flags) \ + (RDO_OBJ(idx) | (flags) | \ + PDO_FIXED_OP_CURR(op_ma) | PDO_FIXED_MAX_CURR(max_ma)) + +#define RDO_BATT_OP_PWR_SHIFT 10 /* 250mW units */ +#define RDO_BATT_MAX_PWR_SHIFT 0 /* 250mW units */ + +#define RDO_BATT_OP_PWR(mw) ((((mw) / 250) & RDO_PWR_MASK) << RDO_BATT_OP_PWR_SHIFT) +#define RDO_BATT_MAX_PWR(mw) ((((mw) / 250) & RDO_PWR_MASK) << RDO_BATT_MAX_PWR_SHIFT) + +#define RDO_BATT(idx, op_mw, max_mw, flags) \ + (RDO_OBJ(idx) | (flags) | \ + RDO_BATT_OP_PWR(op_mw) | RDO_BATT_MAX_PWR(max_mw)) + +static inline unsigned int rdo_index(u32 rdo) +{ + return (rdo >> RDO_OBJ_POS_SHIFT) & RDO_OBJ_POS_MASK; +} + +static inline unsigned int rdo_op_current(u32 rdo) +{ + return ((rdo >> RDO_FIXED_OP_CURR_SHIFT) & RDO_CURR_MASK) * 10; +} + +static inline unsigned int rdo_max_current(u32 rdo) +{ + return ((rdo >> RDO_FIXED_MAX_CURR_SHIFT) & + RDO_CURR_MASK) * 10; +} + +static inline unsigned int rdo_op_power(u32 rdo) +{ + return ((rdo >> RDO_BATT_OP_PWR_SHIFT) & RDO_PWR_MASK) * 250; +} + +static inline unsigned int rdo_max_power(u32 rdo) +{ + return ((rdo >> RDO_BATT_MAX_PWR_SHIFT) & RDO_PWR_MASK) * 250; +} + +/* USB PD timers and counters */ +#define PD_T_NO_RESPONSE 5000 /* 4.5 - 5.5 seconds */ +#define PD_T_DB_DETECT 10000 /* 10 - 15 seconds */ +#define PD_T_SEND_SOURCE_CAP 150 /* 100 - 200 ms */ +#define PD_T_SENDER_RESPONSE 60 /* 24 - 30 ms, relaxed */ +#define PD_T_SOURCE_ACTIVITY 45 +#define PD_T_SINK_ACTIVITY 135 +#define PD_T_SINK_WAIT_CAP 240 +#define PD_T_PS_TRANSITION 500 +#define PD_T_SRC_TRANSITION 35 +#define PD_T_DRP_SNK 40 +#define PD_T_DRP_SRC 30 +#define PD_T_PS_SOURCE_OFF 920 +#define PD_T_PS_SOURCE_ON 480 +#define PD_T_PS_HARD_RESET 30 +#define PD_T_SRC_RECOVER 760 +#define PD_T_SRC_RECOVER_MAX 1000 +#define PD_T_SRC_TURN_ON 275 +#define PD_T_SAFE_0V 650 +#define PD_T_VCONN_SOURCE_ON 100 +#define PD_T_SINK_REQUEST 100 /* 100 ms minimum */ +#define PD_T_ERROR_RECOVERY 100 /* minimum 25 is insufficient */ +#define PD_T_SRCSWAPSTDBY 625 /* Maximum of 650ms */ +#define PD_T_NEWSRC 250 /* Maximum of 275ms */ + +#define PD_T_DRP_TRY 100 /* 75 - 150 ms */ +#define PD_T_DRP_TRYWAIT 600 /* 400 - 800 ms */ + +#define PD_T_CC_DEBOUNCE 200 /* 100 - 200 ms */ +#define PD_T_PD_DEBOUNCE 20 /* 10 - 20 ms */ + +#define PD_N_CAPS_COUNT (PD_T_NO_RESPONSE / PD_T_SEND_SOURCE_CAP) +#define PD_N_HARD_RESET_COUNT 2 + +#endif /* __LINUX_USB_PD_H */ diff --git a/include/linux/usb/pd_bdo.h b/include/linux/usb/pd_bdo.h new file mode 100644 index 000000000000..90b94d9fea5d --- /dev/null +++ b/include/linux/usb/pd_bdo.h @@ -0,0 +1,31 @@ +/* + * Copyright 2015-2017 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_USB_PD_BDO_H +#define __LINUX_USB_PD_BDO_H + +/* BDO : BIST Data Object */ +#define BDO_MODE_RECV (0 << 28) +#define BDO_MODE_TRANSMIT (1 << 28) +#define BDO_MODE_COUNTERS (2 << 28) +#define BDO_MODE_CARRIER0 (3 << 28) +#define BDO_MODE_CARRIER1 (4 << 28) +#define BDO_MODE_CARRIER2 (5 << 28) +#define BDO_MODE_CARRIER3 (6 << 28) +#define BDO_MODE_EYE (7 << 28) +#define BDO_MODE_TESTDATA (8 << 28) + +#define BDO_MODE_MASK(mode) ((mode) & 0xf0000000) + +#endif diff --git a/include/linux/usb/pd_vdo.h b/include/linux/usb/pd_vdo.h new file mode 100644 index 000000000000..d92259f8de0a --- /dev/null +++ b/include/linux/usb/pd_vdo.h @@ -0,0 +1,251 @@ +/* + * Copyright 2015-2017 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_USB_PD_VDO_H +#define __LINUX_USB_PD_VDO_H + +#include "pd.h" + +/* + * VDO : Vendor Defined Message Object + * VDM object is minimum of VDM header + 6 additional data objects. + */ + +#define VDO_MAX_OBJECTS 6 +#define VDO_MAX_SIZE (VDO_MAX_OBJECTS + 1) + +/* + * VDM header + * ---------- + * <31:16> :: SVID + * <15> :: VDM type ( 1b == structured, 0b == unstructured ) + * <14:13> :: Structured VDM version (can only be 00 == 1.0 currently) + * <12:11> :: reserved + * <10:8> :: object position (1-7 valid ... used for enter/exit mode only) + * <7:6> :: command type (SVDM only?) + * <5> :: reserved (SVDM), command type (UVDM) + * <4:0> :: command + */ +#define VDO(vid, type, custom) \ + (((vid) << 16) | \ + ((type) << 15) | \ + ((custom) & 0x7FFF)) + +#define VDO_SVDM_TYPE (1 << 15) +#define VDO_SVDM_VERS(x) ((x) << 13) +#define VDO_OPOS(x) ((x) << 8) +#define VDO_CMDT(x) ((x) << 6) +#define VDO_OPOS_MASK VDO_OPOS(0x7) +#define VDO_CMDT_MASK VDO_CMDT(0x3) + +#define CMDT_INIT 0 +#define CMDT_RSP_ACK 1 +#define CMDT_RSP_NAK 2 +#define CMDT_RSP_BUSY 3 + +/* reserved for SVDM ... for Google UVDM */ +#define VDO_SRC_INITIATOR (0 << 5) +#define VDO_SRC_RESPONDER (1 << 5) + +#define CMD_DISCOVER_IDENT 1 +#define CMD_DISCOVER_SVID 2 +#define CMD_DISCOVER_MODES 3 +#define CMD_ENTER_MODE 4 +#define CMD_EXIT_MODE 5 +#define CMD_ATTENTION 6 + +#define VDO_CMD_VENDOR(x) (((10 + (x)) & 0x1f)) + +/* ChromeOS specific commands */ +#define VDO_CMD_VERSION VDO_CMD_VENDOR(0) +#define VDO_CMD_SEND_INFO VDO_CMD_VENDOR(1) +#define VDO_CMD_READ_INFO VDO_CMD_VENDOR(2) +#define VDO_CMD_REBOOT VDO_CMD_VENDOR(5) +#define VDO_CMD_FLASH_ERASE VDO_CMD_VENDOR(6) +#define VDO_CMD_FLASH_WRITE VDO_CMD_VENDOR(7) +#define VDO_CMD_ERASE_SIG VDO_CMD_VENDOR(8) +#define VDO_CMD_PING_ENABLE VDO_CMD_VENDOR(10) +#define VDO_CMD_CURRENT VDO_CMD_VENDOR(11) +#define VDO_CMD_FLIP VDO_CMD_VENDOR(12) +#define VDO_CMD_GET_LOG VDO_CMD_VENDOR(13) +#define VDO_CMD_CCD_EN VDO_CMD_VENDOR(14) + +#define PD_VDO_VID(vdo) ((vdo) >> 16) +#define PD_VDO_SVDM(vdo) (((vdo) >> 15) & 1) +#define PD_VDO_OPOS(vdo) (((vdo) >> 8) & 0x7) +#define PD_VDO_CMD(vdo) ((vdo) & 0x1f) +#define PD_VDO_CMDT(vdo) (((vdo) >> 6) & 0x3) + +/* + * SVDM Identity request -> response + * + * Request is simply properly formatted SVDM header + * + * Response is 4 data objects: + * [0] :: SVDM header + * [1] :: Identitiy header + * [2] :: Cert Stat VDO + * [3] :: (Product | Cable) VDO + * [4] :: AMA VDO + * + */ +#define VDO_INDEX_HDR 0 +#define VDO_INDEX_IDH 1 +#define VDO_INDEX_CSTAT 2 +#define VDO_INDEX_CABLE 3 +#define VDO_INDEX_PRODUCT 3 +#define VDO_INDEX_AMA 4 + +/* + * SVDM Identity Header + * -------------------- + * <31> :: data capable as a USB host + * <30> :: data capable as a USB device + * <29:27> :: product type + * <26> :: modal operation supported (1b == yes) + * <25:16> :: Reserved, Shall be set to zero + * <15:0> :: USB-IF assigned VID for this cable vendor + */ +#define IDH_PTYPE_UNDEF 0 +#define IDH_PTYPE_HUB 1 +#define IDH_PTYPE_PERIPH 2 +#define IDH_PTYPE_PCABLE 3 +#define IDH_PTYPE_ACABLE 4 +#define IDH_PTYPE_AMA 5 + +#define VDO_IDH(usbh, usbd, ptype, is_modal, vid) \ + ((usbh) << 31 | (usbd) << 30 | ((ptype) & 0x7) << 27 \ + | (is_modal) << 26 | ((vid) & 0xffff)) + +#define PD_IDH_PTYPE(vdo) (((vdo) >> 27) & 0x7) +#define PD_IDH_VID(vdo) ((vdo) & 0xffff) +#define PD_IDH_MODAL_SUPP(vdo) ((vdo) & (1 << 26)) + +/* + * Cert Stat VDO + * ------------- + * <31:0> : USB-IF assigned XID for this cable + */ +#define PD_CSTAT_XID(vdo) (vdo) + +/* + * Product VDO + * ----------- + * <31:16> : USB Product ID + * <15:0> : USB bcdDevice + */ +#define VDO_PRODUCT(pid, bcd) (((pid) & 0xffff) << 16 | ((bcd) & 0xffff)) +#define PD_PRODUCT_PID(vdo) (((vdo) >> 16) & 0xffff) + +/* + * Cable VDO + * --------- + * <31:28> :: Cable HW version + * <27:24> :: Cable FW version + * <23:20> :: Reserved, Shall be set to zero + * <19:18> :: type-C to Type-A/B/C (00b == A, 01 == B, 10 == C) + * <17> :: Type-C to Plug/Receptacle (0b == plug, 1b == receptacle) + * <16:13> :: cable latency (0001 == <10ns(~1m length)) + * <12:11> :: cable termination type (11b == both ends active VCONN req) + * <10> :: SSTX1 Directionality support (0b == fixed, 1b == cfgable) + * <9> :: SSTX2 Directionality support + * <8> :: SSRX1 Directionality support + * <7> :: SSRX2 Directionality support + * <6:5> :: Vbus current handling capability + * <4> :: Vbus through cable (0b == no, 1b == yes) + * <3> :: SOP" controller present? (0b == no, 1b == yes) + * <2:0> :: USB SS Signaling support + */ +#define CABLE_ATYPE 0 +#define CABLE_BTYPE 1 +#define CABLE_CTYPE 2 +#define CABLE_PLUG 0 +#define CABLE_RECEPTACLE 1 +#define CABLE_CURR_1A5 0 +#define CABLE_CURR_3A 1 +#define CABLE_CURR_5A 2 +#define CABLE_USBSS_U2_ONLY 0 +#define CABLE_USBSS_U31_GEN1 1 +#define CABLE_USBSS_U31_GEN2 2 +#define VDO_CABLE(hw, fw, cbl, gdr, lat, term, tx1d, tx2d, rx1d, rx2d, cur,\ + vps, sopp, usbss) \ + (((hw) & 0x7) << 28 | ((fw) & 0x7) << 24 | ((cbl) & 0x3) << 18 \ + | (gdr) << 17 | ((lat) & 0x7) << 13 | ((term) & 0x3) << 11 \ + | (tx1d) << 10 | (tx2d) << 9 | (rx1d) << 8 | (rx2d) << 7 \ + | ((cur) & 0x3) << 5 | (vps) << 4 | (sopp) << 3 \ + | ((usbss) & 0x7)) + +/* + * AMA VDO + * --------- + * <31:28> :: Cable HW version + * <27:24> :: Cable FW version + * <23:12> :: Reserved, Shall be set to zero + * <11> :: SSTX1 Directionality support (0b == fixed, 1b == cfgable) + * <10> :: SSTX2 Directionality support + * <9> :: SSRX1 Directionality support + * <8> :: SSRX2 Directionality support + * <7:5> :: Vconn power + * <4> :: Vconn power required + * <3> :: Vbus power required + * <2:0> :: USB SS Signaling support + */ +#define VDO_AMA(hw, fw, tx1d, tx2d, rx1d, rx2d, vcpwr, vcr, vbr, usbss) \ + (((hw) & 0x7) << 28 | ((fw) & 0x7) << 24 \ + | (tx1d) << 11 | (tx2d) << 10 | (rx1d) << 9 | (rx2d) << 8 \ + | ((vcpwr) & 0x7) << 5 | (vcr) << 4 | (vbr) << 3 \ + | ((usbss) & 0x7)) + +#define PD_VDO_AMA_VCONN_REQ(vdo) (((vdo) >> 4) & 1) +#define PD_VDO_AMA_VBUS_REQ(vdo) (((vdo) >> 3) & 1) + +#define AMA_VCONN_PWR_1W 0 +#define AMA_VCONN_PWR_1W5 1 +#define AMA_VCONN_PWR_2W 2 +#define AMA_VCONN_PWR_3W 3 +#define AMA_VCONN_PWR_4W 4 +#define AMA_VCONN_PWR_5W 5 +#define AMA_VCONN_PWR_6W 6 +#define AMA_USBSS_U2_ONLY 0 +#define AMA_USBSS_U31_GEN1 1 +#define AMA_USBSS_U31_GEN2 2 +#define AMA_USBSS_BBONLY 3 + +/* + * SVDM Discover SVIDs request -> response + * + * Request is properly formatted VDM Header with discover SVIDs command. + * Response is a set of SVIDs of all all supported SVIDs with all zero's to + * mark the end of SVIDs. If more than 12 SVIDs are supported command SHOULD be + * repeated. + */ +#define VDO_SVID(svid0, svid1) (((svid0) & 0xffff) << 16 | ((svid1) & 0xffff)) +#define PD_VDO_SVID_SVID0(vdo) ((vdo) >> 16) +#define PD_VDO_SVID_SVID1(vdo) ((vdo) & 0xffff) + +/* USB-IF SIDs */ +#define USB_SID_PD 0xff00 /* power delivery */ +#define USB_SID_DISPLAYPORT 0xff01 +#define USB_SID_MHL 0xff02 /* Mobile High-Definition Link */ + +/* VDM command timeouts (in ms) */ + +#define PD_T_VDM_UNSTRUCTURED 500 +#define PD_T_VDM_BUSY 100 +#define PD_T_VDM_WAIT_MODE_E 100 +#define PD_T_VDM_SNDR_RSP 30 +#define PD_T_VDM_E_MODE 25 +#define PD_T_VDM_RCVR_RSP 15 + +#endif /* __LINUX_USB_PD_VDO_H */ diff --git a/include/linux/usb/tcpm.h b/include/linux/usb/tcpm.h new file mode 100644 index 000000000000..073197f0d2bb --- /dev/null +++ b/include/linux/usb/tcpm.h @@ -0,0 +1,204 @@ +/* + * Copyright 2015-2017 Google, Inc + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_USB_TCPM_H +#define __LINUX_USB_TCPM_H + +#include +#include +#include "pd.h" + +enum typec_cc_status { + TYPEC_CC_OPEN, + TYPEC_CC_RA, + TYPEC_CC_RD, + TYPEC_CC_RP_DEF, + TYPEC_CC_RP_1_5, + TYPEC_CC_RP_3_0, +}; + +enum typec_cc_polarity { + TYPEC_POLARITY_CC1, + TYPEC_POLARITY_CC2, +}; + +/* Time to wait for TCPC to complete transmit */ +#define PD_T_TCPC_TX_TIMEOUT 100 /* in ms */ +#define PD_ROLE_SWAP_TIMEOUT (MSEC_PER_SEC * 10) + +enum tcpm_transmit_status { + TCPC_TX_SUCCESS = 0, + TCPC_TX_DISCARDED = 1, + TCPC_TX_FAILED = 2, +}; + +enum tcpm_transmit_type { + TCPC_TX_SOP = 0, + TCPC_TX_SOP_PRIME = 1, + TCPC_TX_SOP_PRIME_PRIME = 2, + TCPC_TX_SOP_DEBUG_PRIME = 3, + TCPC_TX_SOP_DEBUG_PRIME_PRIME = 4, + TCPC_TX_HARD_RESET = 5, + TCPC_TX_CABLE_RESET = 6, + TCPC_TX_BIST_MODE_2 = 7 +}; + +/** + * struct tcpc_config - Port configuration + * @src_pdo: PDO parameters sent to port partner as response to + * PD_CTRL_GET_SOURCE_CAP message + * @nr_src_pdo: Number of entries in @src_pdo + * @snk_pdo: PDO parameters sent to partner as response to + * PD_CTRL_GET_SINK_CAP message + * @nr_snk_pdo: Number of entries in @snk_pdo + * @max_snk_mv: Maximum acceptable sink voltage in mV + * @max_snk_ma: Maximum sink current in mA + * @max_snk_mw: Maximum required sink power in mW + * @operating_snk_mw: + * Required operating sink power in mW + * @type: Port type (TYPEC_PORT_DFP, TYPEC_PORT_UFP, or + * TYPEC_PORT_DRP) + * @default_role: + * Default port role (TYPEC_SINK or TYPEC_SOURCE). + * Set to TYPEC_NO_PREFERRED_ROLE if no default role. + * @try_role_hw:True if try.{Src,Snk} is implemented in hardware + * @alt_modes: List of supported alternate modes + */ +struct tcpc_config { + const u32 *src_pdo; + unsigned int nr_src_pdo; + + const u32 *snk_pdo; + unsigned int nr_snk_pdo; + + const u32 *snk_vdo; + unsigned int nr_snk_vdo; + + unsigned int max_snk_mv; + unsigned int max_snk_ma; + unsigned int max_snk_mw; + unsigned int operating_snk_mw; + + enum typec_port_type type; + enum typec_role default_role; + bool try_role_hw; /* try.{src,snk} implemented in hardware */ + + const struct typec_altmode_desc *alt_modes; +}; + +enum tcpc_usb_switch { + TCPC_USB_SWITCH_CONNECT, + TCPC_USB_SWITCH_DISCONNECT, +}; + +/* Mux state attributes */ +#define TCPC_MUX_USB_ENABLED BIT(0) /* USB enabled */ +#define TCPC_MUX_DP_ENABLED BIT(1) /* DP enabled */ +#define TCPC_MUX_POLARITY_INVERTED BIT(2) /* Polarity inverted */ + +/* Mux modes, decoded to attributes */ +enum tcpc_mux_mode { + TYPEC_MUX_NONE = 0, /* Open switch */ + TYPEC_MUX_USB = TCPC_MUX_USB_ENABLED, /* USB only */ + TYPEC_MUX_DP = TCPC_MUX_DP_ENABLED, /* DP only */ + TYPEC_MUX_DOCK = TCPC_MUX_USB_ENABLED | /* Both USB and DP */ + TCPC_MUX_DP_ENABLED, +}; + +struct tcpc_mux_dev { + int (*set)(struct tcpc_mux_dev *dev, enum tcpc_mux_mode mux_mode, + enum tcpc_usb_switch usb_config, + enum typec_cc_polarity polarity); + bool dfp_only; + void *priv_data; +}; + +/** + * struct tcpc_dev - Port configuration and callback functions + * @config: Pointer to port configuration + * @get_vbus: Called to read current VBUS state + * @get_current_limit: + * Optional; called by the tcpm core when configured as a snk + * and cc=Rp-def. This allows the tcpm to provide a fallback + * current-limit detection method for the cc=Rp-def case. + * For example, some tcpcs may include BC1.2 charger detection + * and use that in this case. + * @set_cc: Called to set value of CC pins + * @get_cc: Called to read current CC pin values + * @set_polarity: + * Called to set polarity + * @set_vconn: Called to enable or disable VCONN + * @set_vbus: Called to enable or disable VBUS + * @set_current_limit: + * Optional; called to set current limit as negotiated + * with partner. + * @set_pd_rx: Called to enable or disable reception of PD messages + * @set_roles: Called to set power and data roles + * @start_drp_toggling: + * Optional; if supported by hardware, called to start DRP + * toggling. DRP toggling is stopped automatically if + * a connection is established. + * @try_role: Optional; called to set a preferred role + * @pd_transmit:Called to transmit PD message + * @mux: Pointer to multiplexer data + */ +struct tcpc_dev { + const struct tcpc_config *config; + + int (*init)(struct tcpc_dev *dev); + int (*get_vbus)(struct tcpc_dev *dev); + int (*get_current_limit)(struct tcpc_dev *dev); + int (*set_cc)(struct tcpc_dev *dev, enum typec_cc_status cc); + int (*get_cc)(struct tcpc_dev *dev, enum typec_cc_status *cc1, + enum typec_cc_status *cc2); + int (*set_polarity)(struct tcpc_dev *dev, + enum typec_cc_polarity polarity); + int (*set_vconn)(struct tcpc_dev *dev, bool on); + int (*set_vbus)(struct tcpc_dev *dev, bool on, bool charge); + int (*set_current_limit)(struct tcpc_dev *dev, u32 max_ma, u32 mv); + int (*set_pd_rx)(struct tcpc_dev *dev, bool on); + int (*set_roles)(struct tcpc_dev *dev, bool attached, + enum typec_role role, enum typec_data_role data); + int (*start_drp_toggling)(struct tcpc_dev *dev, + enum typec_cc_status cc); + int (*try_role)(struct tcpc_dev *dev, int role); + int (*pd_transmit)(struct tcpc_dev *dev, enum tcpm_transmit_type type, + const struct pd_message *msg); + struct tcpc_mux_dev *mux; +}; + +struct tcpm_port; + +struct tcpm_port *tcpm_register_port(struct device *dev, struct tcpc_dev *tcpc); +void tcpm_unregister_port(struct tcpm_port *port); + +void tcpm_update_source_capabilities(struct tcpm_port *port, const u32 *pdo, + unsigned int nr_pdo); +void tcpm_update_sink_capabilities(struct tcpm_port *port, const u32 *pdo, + unsigned int nr_pdo, + unsigned int max_snk_mv, + unsigned int max_snk_ma, + unsigned int max_snk_mw, + unsigned int operating_snk_mw); + +void tcpm_vbus_change(struct tcpm_port *port); +void tcpm_cc_change(struct tcpm_port *port); +void tcpm_pd_receive(struct tcpm_port *port, + const struct pd_message *msg); +void tcpm_pd_transmit_complete(struct tcpm_port *port, + enum tcpm_transmit_status status); +void tcpm_pd_hard_reset(struct tcpm_port *port); +void tcpm_tcpc_reset(struct tcpm_port *port); + +#endif /* __LINUX_USB_TCPM_H */ -- cgit v1.2.3 From 14157f861437ebe2d624b0a845b91bbdf8ca9a2d Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 13 Sep 2017 11:05:50 +0900 Subject: mtd: nand: introduce NAND_ROW_ADDR_3 flag Several drivers check ->chipsize to see if the third row address cycle is needed. Instead of embedding magic sizes such as 32MB, 128MB in drivers, introduce a new flag NAND_ROW_ADDR_3 for clean-up. Since nand_scan_ident() knows well about the device, it can handle this properly. The flag is set if the row address bit width is greater than 16. Delete comments such as "One more address cycle for ..." because intention is now clear enough from the code. Signed-off-by: Masahiro Yamada Acked-by: Wenyou Yang Signed-off-by: Boris Brezillon --- include/linux/mtd/rawnand.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/rawnand.h b/include/linux/mtd/rawnand.h index 2b05f4273bab..749bb08c4772 100644 --- a/include/linux/mtd/rawnand.h +++ b/include/linux/mtd/rawnand.h @@ -177,6 +177,9 @@ enum nand_ecc_algo { */ #define NAND_NEED_SCRAMBLING 0x00002000 +/* Device needs 3rd row address cycle */ +#define NAND_ROW_ADDR_3 0x00004000 + /* Options valid for Samsung large page devices */ #define NAND_SAMSUNG_LP_OPTIONS NAND_CACHEPRG -- cgit v1.2.3 From ef838a81dd4de1e08454406812e42d7b9b417c4d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Wed, 13 Sep 2017 10:18:27 +0200 Subject: serial: Add common rs485 device tree parsing function MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Several drivers have the same device tree parsing code. Create a common helper function for it. This patch bases on work done by Sascha Hauer. Signed-off-by: Uwe Kleine-König Signed-off-by: Greg Kroah-Hartman --- include/linux/serial_core.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/serial_core.h b/include/linux/serial_core.h index 5553e04e59c9..37b044e78333 100644 --- a/include/linux/serial_core.h +++ b/include/linux/serial_core.h @@ -501,4 +501,9 @@ static inline int uart_handle_break(struct uart_port *port) (cflag) & CRTSCTS || \ !((cflag) & CLOCAL)) +/* + * Common device tree parsing helpers + */ +void of_get_rs485_mode(struct device_node *np, struct serial_rs485 *rs485conf); + #endif /* LINUX_SERIAL_CORE_H */ -- cgit v1.2.3 From f4ac6476945ff62939420bcf8266e39f8d5d54bd Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Thu, 14 Sep 2017 12:35:36 +0200 Subject: libata: Add new med_power_with_dipm link_power_management_policy setting As described by Matthew Garret quite a while back: https://mjg59.dreamwidth.org/34868.html Intel CPUs starting with the Haswell generation need SATA links to power down for the "package" part of the CPU to reach low power-states like PC7 / P8 which bring a significant power-saving with them. The default max_performance lpm policy does not allow for these high PC states, both the medium_power and min_power policies do allow this. The min_power policy saves significantly more power, but there are some reports of some disks / SSDs not liking min_power leading to system crashes and in some cases even data corruption has been reported. Matthew has found a document documenting the default settings of Intel's IRST Windows driver with which most laptops ship: https://www-ssl.intel.com/content/dam/doc/reference-guide/sata-devices-implementation-recommendations.pdf Matthew wrote a patch changing med_power to match those defaults, but that never got anywhere as some people where reporting issues with the patch-set that patch was a part of. This commit is another attempt to make the default IRST driver settings available under Linux, but instead of changing medium_power and potentially introducing regressions, this commit adds a new med_power_with_dipm setting which is identical to the existing medium_power accept that it enables dipm on top, which makes it match the Windows IRST driver settings, which should hopefully be safe to use on most devices. The med_power_with_dipm setting is close to min_power, except that: a) It does not use host-initiated slumber mode (ASP not set), but it does allow device-initiated slumber b) It does not enable DevSlp mode On my T440s test laptop I get the following power savings when idle: medium_power 0.9W med_power_with_dipm 1.2W min_power 1.2W Suggested-by: Matthew Garrett Cc: Matthew Garrett Signed-off-by: Hans de Goede Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 931c32f1f18d..ed9826b21c5e 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -522,6 +522,7 @@ enum ata_lpm_policy { ATA_LPM_UNKNOWN, ATA_LPM_MAX_POWER, ATA_LPM_MED_POWER, + ATA_LPM_MED_POWER_WITH_DIPM, /* Med power + DIPM as win IRST does */ ATA_LPM_MIN_POWER, }; -- cgit v1.2.3 From a5c6951c49fbf7509419d877620625ca3cdae9b1 Mon Sep 17 00:00:00 2001 From: Robert Jarzmik Date: Wed, 13 Sep 2017 21:37:17 +0200 Subject: mfd: wm97xx-core: core support for wm97xx Codec The WM9705, WM9712 and WM9713 are highly integrated codecs, with an audio codec, DAC and ADC, GPIO unit and a touchscreen interface. Historically the support was spread across drivers/input/touchscreen and sound/soc/codecs. The sharing was done through ac97 bus sharing. This model will not withstand the new AC97 bus model, where codecs are discovered on runtime. Signed-off-by: Robert Jarzmik Acked-by: Charles Keepax Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/wm97xx.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) create mode 100644 include/linux/mfd/wm97xx.h (limited to 'include/linux') diff --git a/include/linux/mfd/wm97xx.h b/include/linux/mfd/wm97xx.h new file mode 100644 index 000000000000..45fb54f19d09 --- /dev/null +++ b/include/linux/mfd/wm97xx.h @@ -0,0 +1,25 @@ +/* + * wm97xx client interface + * + * Copyright (C) 2017 Robert Jarzmik + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + */ + +#ifndef __LINUX_MFD_WM97XX_H +#define __LINUX_MFD_WM97XX_H + +struct regmap; +struct wm97xx_batt_pdata; +struct snd_ac97; + +struct wm97xx_platform_data { + struct snd_ac97 *ac97; + struct regmap *regmap; + struct wm97xx_batt_pdata *batt_pdata; +}; + +#endif -- cgit v1.2.3 From f454322efbf6faee695f517c6b52c4dc03cacd3e Mon Sep 17 00:00:00 2001 From: "Dmitry V. Levin" Date: Tue, 22 Aug 2017 02:16:11 +0300 Subject: signal: replace sigset_to_compat() with put_compat_sigset() There are 4 callers of sigset_to_compat() in the entire kernel. One is in sparc compat rt_sigaction(2), the rest are in kernel/signal.c itself. All are followed by copy_to_user(), and all but the sparc one are under "if it's big-endian..." ifdefs. Let's transform sigset_to_compat() into put_compat_sigset() that also calls copy_to_user(). Suggested-by: Al Viro Signed-off-by: Dmitry V. Levin Signed-off-by: Al Viro --- include/linux/compat.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index a5619de3437d..ab1baa79abe8 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -456,7 +456,8 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); extern void sigset_from_compat(sigset_t *set, const compat_sigset_t *compat); -extern void sigset_to_compat(compat_sigset_t *compat, const sigset_t *set); +extern int put_compat_sigset(compat_sigset_t __user *compat, + const sigset_t *set, unsigned int size); asmlinkage long compat_sys_migrate_pages(compat_pid_t pid, compat_ulong_t maxnode, const compat_ulong_t __user *old_nodes, -- cgit v1.2.3 From b8e8e1aa9f14110da180569908bbe538c9e9dc63 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Sep 2017 20:42:54 -0400 Subject: get rid of {get,put}_compat_itimerspec() no users left Signed-off-by: Al Viro --- include/linux/compat.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index ab1baa79abe8..21d30be5c0a5 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -443,11 +443,6 @@ static inline int compat_timespec_compare(struct compat_timespec *lhs, return lhs->tv_nsec - rhs->tv_nsec; } -extern int get_compat_itimerspec(struct itimerspec *dst, - const struct compat_itimerspec __user *src); -extern int put_compat_itimerspec(struct compat_itimerspec __user *dst, - const struct itimerspec *src); - asmlinkage long compat_sys_gettimeofday(struct compat_timeval __user *tv, struct timezone __user *tz); asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, -- cgit v1.2.3 From 3968cf623892d710e651070243fd16af312a9797 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 3 Sep 2017 21:45:17 -0400 Subject: get_compat_sigset() similar to put_compat_sigset() Signed-off-by: Al Viro --- include/linux/compat.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index 21d30be5c0a5..57cb6ecafa86 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -450,7 +450,7 @@ asmlinkage long compat_sys_settimeofday(struct compat_timeval __user *tv, asmlinkage long compat_sys_adjtimex(struct compat_timex __user *utp); -extern void sigset_from_compat(sigset_t *set, const compat_sigset_t *compat); +extern int get_compat_sigset(sigset_t *set, const compat_sigset_t __user *compat); extern int put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, unsigned int size); -- cgit v1.2.3 From bffa72cf7f9df842f0016ba03586039296b4caaf Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 19 Sep 2017 05:14:24 -0700 Subject: net: sk_buff rbnode reorg skb->rbnode shares space with skb->next, skb->prev and skb->tstamp Current uses (TCP receive ofo queue and netem) need to save/restore tstamp, while skb->dev is either NULL (TCP) or a constant for a given queue (netem). Since we plan using an RB tree for TCP retransmit queue to speedup SACK processing with large BDP, this patch exchanges skb->dev and skb->tstamp. This saves some overhead in both TCP and netem. v2: removes the swtstamp field from struct tcp_skb_cb Signed-off-by: Eric Dumazet Cc: Soheil Hassas Yeganeh Cc: Wei Wang Cc: Willem de Bruijn Acked-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/linux/skbuff.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 72299ef00061..492828801acb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -661,8 +661,12 @@ struct sk_buff { struct sk_buff *prev; union { - ktime_t tstamp; - u64 skb_mstamp; + struct net_device *dev; + /* Some protocols might use this space to store information, + * while device pointer would be NULL. + * UDP receive path is one user. + */ + unsigned long dev_scratch; }; }; struct rb_node rbnode; /* used in netem & tcp stack */ @@ -670,12 +674,8 @@ struct sk_buff { struct sock *sk; union { - struct net_device *dev; - /* Some protocols might use this space to store information, - * while device pointer would be NULL. - * UDP receive path is one user. - */ - unsigned long dev_scratch; + ktime_t tstamp; + u64 skb_mstamp; }; /* * This is the control buffer. It is free to use for every -- cgit v1.2.3 From 85e7dd3f871b988702973c80d9ef128e10dd3dad Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Mon, 4 Sep 2017 16:41:53 +0100 Subject: ASoC: arizona: Add support for setting the output volume limits The output volume limits allow signals to be limited to specific levels appropriate for the hardware attached. As this is a property of the hardware itself these will be configured through device tree. Signed-off-by: Charles Keepax Acked-by: Lee Jones Signed-off-by: Mark Brown --- include/linux/mfd/arizona/pdata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index bfeecf179895..f72dc53848d7 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -174,6 +174,9 @@ struct arizona_pdata { /** Mode for outputs */ int out_mono[ARIZONA_MAX_OUTPUT]; + /** Limit output volumes */ + unsigned int out_vol_limit[2 * ARIZONA_MAX_OUTPUT]; + /** PDM speaker mute setting */ unsigned int spk_mute[ARIZONA_MAX_PDM_SPK]; -- cgit v1.2.3 From 3f7632e1ba2c6f5ca2499d1ee8acf95599d4b7b6 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Tue, 12 Sep 2017 01:44:44 +0200 Subject: dmaengine: List all allowed values for src/dst_addr_width in kernel doc MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Commit 93c6ee94c140 ("dma: Support for 3 bytes word size") and commit 534a729866f9 ("dmaengine: Add 16 bytes, 32 bytes and 64 bytes bus widths") added additional values for the allowed word size, but omitted these from the struct dma_slave_config documentation. Signed-off-by: Stefan Brüns Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 8319101170fc..2910e7dadc7f 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -329,7 +329,7 @@ enum dma_slave_buswidth { * @src_addr_width: this is the width in bytes of the source (RX) * register where DMA data shall be read. If the source * is memory this may be ignored depending on architecture. - * Legal values: 1, 2, 4, 8. + * Legal values: 1, 2, 3, 4, 8, 16, 32, 64. * @dst_addr_width: same as src_addr_width but for destination * target (TX) mutatis mutandis. * @src_maxburst: the maximum number of words (note: words, as in -- cgit v1.2.3 From c2cbd4276eea1d3b204754653dd74d6e10ca207f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20Br=C3=BCns?= Date: Tue, 12 Sep 2017 01:44:45 +0200 Subject: dmaengine: Mark struct dma_slave_caps kernel-doc correctly, clarify MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit struct dma_slave_caps documentation omitted the correct kernel-doc opening comment mark. Document byte granularity and interpretation of the src/dst_addr_widths bit flag fields used by struct dma_slave_caps and struct dma_device. Add punctuation to their "directions" member documentations, and cleanup wording of the description. Signed-off-by: Stefan Brüns Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 2910e7dadc7f..f838764993eb 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -404,14 +404,16 @@ enum dma_residue_granularity { DMA_RESIDUE_GRANULARITY_BURST = 2, }; -/* struct dma_slave_caps - expose capabilities of a slave channel only - * - * @src_addr_widths: bit mask of src addr widths the channel supports - * @dst_addr_widths: bit mask of dstn addr widths the channel supports - * @directions: bit mask of slave direction the channel supported - * since the enum dma_transfer_direction is not defined as bits for each - * type of direction, the dma controller should fill (1 << ) and same - * should be checked by controller as well +/** + * struct dma_slave_caps - expose capabilities of a slave channel only + * @src_addr_widths: bit mask of src addr widths the channel supports. + * Width is specified in bytes, e.g. for a channel supporting + * a width of 4 the mask should have BIT(4) set. + * @dst_addr_widths: bit mask of dst addr widths the channel supports + * @directions: bit mask of slave directions the channel supports. + * Since the enum dma_transfer_direction is not defined as bit flag for + * each type, the dma controller should set BIT() and same + * should be checked by controller as well * @max_burst: max burst capability per-transfer * @cmd_pause: true, if pause and thereby resume is supported * @cmd_terminate: true, if terminate cmd is supported @@ -678,11 +680,13 @@ struct dma_filter { * @dev_id: unique device ID * @dev: struct device reference for dma mapping api * @src_addr_widths: bit mask of src addr widths the device supports + * Width is specified in bytes, e.g. for a device supporting + * a width of 4 the mask should have BIT(4) set. * @dst_addr_widths: bit mask of dst addr widths the device supports - * @directions: bit mask of slave direction the device supports since - * the enum dma_transfer_direction is not defined as bits for - * each type of direction, the dma controller should fill (1 << - * ) and same should be checked by controller as well + * @directions: bit mask of slave directions the device supports. + * Since the enum dma_transfer_direction is not defined as bit flag for + * each type, the dma controller should set BIT() and same + * should be checked by controller as well * @max_burst: max burst capability per-transfer * @residue_granularity: granularity of the transfer residue reported * by tx_status -- cgit v1.2.3 From 6e617de84e87d626d1e976fc30e1322239fd4d2d Mon Sep 17 00:00:00 2001 From: Paolo Abeni Date: Wed, 20 Sep 2017 18:26:53 +0200 Subject: net: avoid a full fib lookup when rp_filter is disabled. Since commit 1dced6a85482 ("ipv4: Restore accept_local behaviour in fib_validate_source()") a full fib lookup is needed even if the rp_filter is disabled, if accept_local is false - which is the default. What we really need in the above scenario is just checking that the source IP address is not local, and in most case we can do that is a cheaper way looking up the ifaddr hash table. This commit adds a helper for such lookup, and uses it to validate the src address when rp_filter is disabled and no 'local' routes are created by the user space in the relevant namespace. A new ipv4 netns flag is added to account for such routes. We need that to preserve the same behavior we had before this patch. It also drops the checks to bail early from __fib_validate_source, added by the commit 1dced6a85482 ("ipv4: Restore accept_local behaviour in fib_validate_source()") they do not give any measurable performance improvement: if we do the lookup with are on a slower path. This improves UDP performances for unconnected sockets when rp_filter is disabled by 5% and also gives small but measurable performance improvement for TCP flood scenarios. v1 -> v2: - use the ifaddr lookup helper in __ip_dev_find(), as suggested by Eric - fall-back to full lookup if custom local routes are present Signed-off-by: Paolo Abeni Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index fb3f809e34e4..751d051f0bc7 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -179,6 +179,7 @@ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope); struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); +struct in_ifaddr *inet_lookup_ifaddr_rcu(struct net *net, __be32 addr); static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) { return !((addr^ifa->ifa_address)&ifa->ifa_mask); -- cgit v1.2.3 From a9a1d2a7827c9cf780966d0879c73ef5a91380e9 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 22 Sep 2017 11:02:10 +0200 Subject: pinctrl/gpio: Unify namespace for cross-calls The pinctrl_request_gpio() and pinctrl_free_gpio() break the nice namespacing in the other cross-calls like pinctrl_gpio_foo(). Just rename them and all references so we have one namespace with all cross-calls under pinctrl_gpio_*(). Signed-off-by: Linus Walleij --- include/linux/pinctrl/consumer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/consumer.h b/include/linux/pinctrl/consumer.h index a0f2aba72fa9..0412cc9833e9 100644 --- a/include/linux/pinctrl/consumer.h +++ b/include/linux/pinctrl/consumer.h @@ -25,8 +25,8 @@ struct device; #ifdef CONFIG_PINCTRL /* External interface to pin control */ -extern int pinctrl_request_gpio(unsigned gpio); -extern void pinctrl_free_gpio(unsigned gpio); +extern int pinctrl_gpio_request(unsigned gpio); +extern void pinctrl_gpio_free(unsigned gpio); extern int pinctrl_gpio_direction_input(unsigned gpio); extern int pinctrl_gpio_direction_output(unsigned gpio); extern int pinctrl_gpio_set_config(unsigned gpio, unsigned long config); @@ -62,12 +62,12 @@ static inline int pinctrl_pm_select_idle_state(struct device *dev) #else /* !CONFIG_PINCTRL */ -static inline int pinctrl_request_gpio(unsigned gpio) +static inline int pinctrl_gpio_request(unsigned gpio) { return 0; } -static inline void pinctrl_free_gpio(unsigned gpio) +static inline void pinctrl_gpio_free(unsigned gpio) { } -- cgit v1.2.3 From 242c1a28eb61cb34974e8aa05235d84355940a8a Mon Sep 17 00:00:00 2001 From: Gao Feng Date: Fri, 22 Sep 2017 10:25:22 +0800 Subject: net: Remove useless function skb_header_release There is no one which would invokes the function skb_header_release. So just remove it now. Signed-off-by: Gao Feng Signed-off-by: David S. Miller --- include/linux/skbuff.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 492828801acb..f9db5539a6fb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1456,28 +1456,9 @@ static inline int skb_header_unclone(struct sk_buff *skb, gfp_t pri) return 0; } -/** - * skb_header_release - release reference to header - * @skb: buffer to operate on - * - * Drop a reference to the header part of the buffer. This is done - * by acquiring a payload reference. You must not read from the header - * part of skb->data after this. - * Note : Check if you can use __skb_header_release() instead. - */ -static inline void skb_header_release(struct sk_buff *skb) -{ - BUG_ON(skb->nohdr); - skb->nohdr = 1; - atomic_add(1 << SKB_DATAREF_SHIFT, &skb_shinfo(skb)->dataref); -} - /** * __skb_header_release - release reference to header * @skb: buffer to operate on - * - * Variant of skb_header_release() assuming skb is private to caller. - * We can avoid one atomic operation. */ static inline void __skb_header_release(struct sk_buff *skb) { -- cgit v1.2.3 From cfb766da54d98ceb145e1bb0bd11c559569dcbfc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Sep 2017 08:12:04 -0700 Subject: sched/cputime: Expose cputime_adjust() Will be used by basic cgroup resource stat reporting later. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Cc: Ingo Molnar Cc: Li Zefan Cc: Johannes Weiner --- include/linux/sched/cputime.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/cputime.h b/include/linux/sched/cputime.h index 4c5b9735c1ae..9251044335c5 100644 --- a/include/linux/sched/cputime.h +++ b/include/linux/sched/cputime.h @@ -53,7 +53,8 @@ static inline void task_cputime_scaled(struct task_struct *t, extern void task_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); extern void thread_group_cputime_adjusted(struct task_struct *p, u64 *ut, u64 *st); - +extern void cputime_adjust(struct task_cputime *curr, struct prev_cputime *prev, + u64 *ut, u64 *st); /* * Thread group CPU time accounting. -- cgit v1.2.3 From d2cc5ed6949085cfba30ec5228816cf6eb1d02b9 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Sep 2017 08:12:04 -0700 Subject: cpuacct: Introduce cgroup_account_cputime[_field]() Introduce cgroup_account_cputime[_field]() which wrap cpuacct_charge() and cgroup_account_field(). This doesn't introduce any functional changes and will be used to add cgroup basic resource accounting. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Cc: Ingo Molnar --- include/linux/cgroup.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index d023ac5e377f..6cd579329310 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -22,6 +22,7 @@ #include #include #include +#include #include @@ -688,6 +689,43 @@ static inline void cgroup_path_from_kernfs_id(const union kernfs_node_id *id, char *buf, size_t buflen) {} #endif /* !CONFIG_CGROUPS */ +/* + * Basic resource stats. + */ +#ifdef CONFIG_CGROUPS + +#ifdef CONFIG_CGROUP_CPUACCT +void cpuacct_charge(struct task_struct *tsk, u64 cputime); +void cpuacct_account_field(struct task_struct *tsk, int index, u64 val); +#else +static inline void cpuacct_charge(struct task_struct *tsk, u64 cputime) {} +static inline void cpuacct_account_field(struct task_struct *tsk, int index, + u64 val) {} +#endif + +static inline void cgroup_account_cputime(struct task_struct *task, + u64 delta_exec) +{ + cpuacct_charge(task, delta_exec); +} + +static inline void cgroup_account_cputime_field(struct task_struct *task, + enum cpu_usage_stat index, + u64 delta_exec) +{ + cpuacct_account_field(task, index, delta_exec); +} + +#else /* CONFIG_CGROUPS */ + +static inline void cgroup_account_cputime(struct task_struct *task, + u64 delta_exec) {} +static inline void cgroup_account_cputime_field(struct task_struct *task, + enum cpu_usage_stat index, + u64 delta_exec) {} + +#endif /* CONFIG_CGROUPS */ + /* * sock->sk_cgrp_data handling. For more info, see sock_cgroup_data * definition in cgroup-defs.h. -- cgit v1.2.3 From 041cd640b2f3c5607171c59d8712b503659d21f7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 25 Sep 2017 08:12:05 -0700 Subject: cgroup: Implement cgroup2 basic CPU usage accounting In cgroup1, while cpuacct isn't actually controlling any resources, it is a separate controller due to combination of two factors - 1. enabling cpu controller has significant side effects, and 2. we have to pick one of the hierarchies to account CPU usages on. cpuacct controller is effectively used to designate a hierarchy to track CPU usages on. cgroup2's unified hierarchy removes the second reason and we can account basic CPU usages by default. While we can use cpuacct for this purpose, both its interface and implementation leave a lot to be desired - it collects and exposes two sources of truth which don't agree with each other and some of the exposed statistics don't make much sense. Also, it propagates all the way up the hierarchy on each accounting event which is unnecessary. This patch adds basic resource accounting mechanism to cgroup2's unified hierarchy and accounts CPU usages using it. * All accountings are done per-cpu and don't propagate immediately. It just bumps the per-cgroup per-cpu counters and links to the parent's updated list if not already on it. * On a read, the per-cpu counters are collected into the global ones and then propagated upwards. Only the per-cpu counters which have changed since the last read are propagated. * CPU usage stats are collected and shown in "cgroup.stat" with "cpu." prefix. Total usage is collected from scheduling events. User/sys breakdown is sourced from tick sampling and adjusted to the usage using cputime_adjust(). This keeps the accounting side hot path O(1) and per-cpu and the read side O(nr_updated_since_last_read). v2: Minor changes and documentation updates as suggested by Waiman and Roman. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra Cc: Ingo Molnar Cc: Li Zefan Cc: Johannes Weiner Cc: Waiman Long Cc: Roman Gushchin --- include/linux/cgroup-defs.h | 57 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/cgroup.h | 22 +++++++++++++++++ 2 files changed, 79 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index ade4a78a54c2..3e55bbd31ad1 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -16,6 +16,7 @@ #include #include #include +#include #include #include @@ -254,6 +255,57 @@ struct css_set { struct rcu_head rcu_head; }; +/* + * cgroup basic resource usage statistics. Accounting is done per-cpu in + * cgroup_cpu_stat which is then lazily propagated up the hierarchy on + * reads. + * + * When a stat gets updated, the cgroup_cpu_stat and its ancestors are + * linked into the updated tree. On the following read, propagation only + * considers and consumes the updated tree. This makes reading O(the + * number of descendants which have been active since last read) instead of + * O(the total number of descendants). + * + * This is important because there can be a lot of (draining) cgroups which + * aren't active and stat may be read frequently. The combination can + * become very expensive. By propagating selectively, increasing reading + * frequency decreases the cost of each read. + */ +struct cgroup_cpu_stat { + /* + * ->sync protects all the current counters. These are the only + * fields which get updated in the hot path. + */ + struct u64_stats_sync sync; + struct task_cputime cputime; + + /* + * Snapshots at the last reading. These are used to calculate the + * deltas to propagate to the global counters. + */ + struct task_cputime last_cputime; + + /* + * Child cgroups with stat updates on this cpu since the last read + * are linked on the parent's ->updated_children through + * ->updated_next. + * + * In addition to being more compact, singly-linked list pointing + * to the cgroup makes it unnecessary for each per-cpu struct to + * point back to the associated cgroup. + * + * Protected by per-cpu cgroup_cpu_stat_lock. + */ + struct cgroup *updated_children; /* terminated by self cgroup */ + struct cgroup *updated_next; /* NULL iff not on the list */ +}; + +struct cgroup_stat { + /* per-cpu statistics are collected into the folowing global counters */ + struct task_cputime cputime; + struct prev_cputime prev_cputime; +}; + struct cgroup { /* self css with NULL ->ss, points back to this cgroup */ struct cgroup_subsys_state self; @@ -353,6 +405,11 @@ struct cgroup { */ struct cgroup *dom_cgrp; + /* cgroup basic resource statistics */ + struct cgroup_cpu_stat __percpu *cpu_stat; + struct cgroup_stat pending_stat; /* pending from children */ + struct cgroup_stat stat; + /* * list of pidlists, up to two for each namespace (one for procs, one * for tasks); created on demand. diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 6cd579329310..328a70ce0e23 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -703,17 +703,39 @@ static inline void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) {} #endif +void cgroup_stat_show_cputime(struct seq_file *seq, const char *prefix); + +void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec); +void __cgroup_account_cputime_field(struct cgroup *cgrp, + enum cpu_usage_stat index, u64 delta_exec); + static inline void cgroup_account_cputime(struct task_struct *task, u64 delta_exec) { + struct cgroup *cgrp; + cpuacct_charge(task, delta_exec); + + rcu_read_lock(); + cgrp = task_dfl_cgroup(task); + if (cgroup_parent(cgrp)) + __cgroup_account_cputime(cgrp, delta_exec); + rcu_read_unlock(); } static inline void cgroup_account_cputime_field(struct task_struct *task, enum cpu_usage_stat index, u64 delta_exec) { + struct cgroup *cgrp; + cpuacct_account_field(task, index, delta_exec); + + rcu_read_lock(); + cgrp = task_dfl_cgroup(task); + if (cgroup_parent(cgrp)) + __cgroup_account_cputime_field(cgrp, index, delta_exec); + rcu_read_unlock(); } #else /* CONFIG_CGROUPS */ -- cgit v1.2.3 From 07557ccb8c83f315e409ddee181e9ffbf87c6ad1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Sep 2017 23:29:05 +0200 Subject: genirq/msi: Capture device name for debugfs For debugging the allocation of unused or potentially leaked interrupt descriptor it's helpful to have some information about the site which allocated them. In case of MSI this is simple because the caller hands the device struct pointer into the domain allocation function. Duplicate the device name and show it in the debugfs entry of the interrupt descriptor. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Yu Chen Acked-by: Juergen Gross Cc: Boris Ostrovsky Cc: Tony Luck Cc: Marc Zyngier Cc: Alok Kataria Cc: Joerg Roedel Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Christoph Hellwig Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Paolo Bonzini Cc: Rui Zhang Cc: "K. Y. Srinivasan" Cc: Arjan van de Ven Cc: Dan Williams Cc: Len Brown Link: https://lkml.kernel.org/r/20170913213152.433038426@linutronix.de --- include/linux/irqdesc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 3e90a094798d..b55b113c049b 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -93,6 +93,7 @@ struct irq_desc { #endif #ifdef CONFIG_GENERIC_IRQ_DEBUGFS struct dentry *debugfs_file; + const char *dev_name; #endif #ifdef CONFIG_SPARSE_IRQ struct rcu_head rcu; -- cgit v1.2.3 From c3e7239a7f43ce1ff407df5f5944bf0d42dc21bf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Sep 2017 23:29:06 +0200 Subject: irqdomain/debugfs: Provide domain specific debug callback Some interrupt domains like the X86 vector domain has special requirements for debugging, like showing the vector usage on the CPUs. Add a callback to the irqdomain ops which can be filled in by domains which require it and add conditional invocations to the irqdomain and the per irq debug files. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Yu Chen Acked-by: Juergen Gross Cc: Boris Ostrovsky Cc: Tony Luck Cc: Marc Zyngier Cc: Alok Kataria Cc: Joerg Roedel Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Christoph Hellwig Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Paolo Bonzini Cc: Rui Zhang Cc: "K. Y. Srinivasan" Cc: Arjan van de Ven Cc: Dan Williams Cc: Len Brown Link: https://lkml.kernel.org/r/20170913213152.512937505@linutronix.de --- include/linux/irqdomain.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 81e4889ca6dd..1fb50438a868 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -40,6 +40,7 @@ struct of_device_id; struct irq_chip; struct irq_data; struct cpumask; +struct seq_file; /* Number of irqs reserved for a legacy isa controller */ #define NUM_ISA_INTERRUPTS 16 @@ -104,7 +105,6 @@ struct irq_domain_ops { int (*xlate)(struct irq_domain *d, struct device_node *node, const u32 *intspec, unsigned int intsize, unsigned long *out_hwirq, unsigned int *out_type); - #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY /* extended V2 interfaces to support hierarchy irq_domains */ int (*alloc)(struct irq_domain *d, unsigned int virq, @@ -116,6 +116,10 @@ struct irq_domain_ops { int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); #endif +#ifdef CONFIG_GENERIC_IRQ_DEBUGFS + void (*debug_show)(struct seq_file *m, struct irq_domain *d, + struct irq_data *irqd, int ind); +#endif }; extern struct irq_domain_ops irq_generic_chip_ops; -- cgit v1.2.3 From 457f6d35072f395508255ef09fd08f824382cf85 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Sep 2017 23:29:07 +0200 Subject: genirq: Make state consistent for !IRQ_DOMAIN_HIERARCHY In the !IRQ_DOMAIN_HIERARCHY cas the activation stubs are not setting/clearing the activation status bits. This is not a problem at the moment, but upcoming changes require a correct status. Add the set/clear incovations to the stub functions and move them to the core internal header to avoid duplication and visibility outside the core. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Yu Chen Acked-by: Juergen Gross Cc: Boris Ostrovsky Cc: Tony Luck Cc: Marc Zyngier Cc: Alok Kataria Cc: Joerg Roedel Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Christoph Hellwig Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Paolo Bonzini Cc: Rui Zhang Cc: "K. Y. Srinivasan" Cc: Arjan van de Ven Cc: Dan Williams Cc: Len Brown Link: https://lkml.kernel.org/r/20170913213152.591985591@linutronix.de --- include/linux/irqdomain.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 1fb50438a868..de06ce992a2d 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -511,8 +511,6 @@ static inline bool irq_domain_is_msi_remap(struct irq_domain *domain) extern bool irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain); #else /* CONFIG_IRQ_DOMAIN_HIERARCHY */ -static inline void irq_domain_activate_irq(struct irq_data *data) { } -static inline void irq_domain_deactivate_irq(struct irq_data *data) { } static inline int irq_domain_alloc_irqs(struct irq_domain *domain, unsigned int nr_irqs, int node, void *arg) { @@ -561,8 +559,6 @@ irq_domain_hierarchical_is_msi_remap(struct irq_domain *domain) #else /* CONFIG_IRQ_DOMAIN */ static inline void irq_dispose_mapping(unsigned int virq) { } -static inline void irq_domain_activate_irq(struct irq_data *data) { } -static inline void irq_domain_deactivate_irq(struct irq_data *data) { } static inline struct irq_domain *irq_find_matching_fwnode( struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token) { -- cgit v1.2.3 From 72491643469aab736536ae71dcd199b19dabd891 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Sep 2017 23:29:10 +0200 Subject: genirq/irqdomain: Update irq_domain_ops.activate() signature The irq_domain_ops.activate() callback has no return value and no way to tell the function that the activation is early. The upcoming changes to support a reservation scheme which allows to assign interrupt vectors on x86 only when the interrupt is actually requested requires: - A return value, so activation can fail at request_irq() time - Information that the activate invocation is early, i.e. before request_irq(). Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Yu Chen Acked-by: Juergen Gross Cc: Boris Ostrovsky Cc: Tony Luck Cc: Marc Zyngier Cc: Alok Kataria Cc: Joerg Roedel Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Christoph Hellwig Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Paolo Bonzini Cc: Rui Zhang Cc: "K. Y. Srinivasan" Cc: Arjan van de Ven Cc: Dan Williams Cc: Len Brown Link: https://lkml.kernel.org/r/20170913213152.848490816@linutronix.de --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index de06ce992a2d..9bc07f59b090 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -111,7 +111,7 @@ struct irq_domain_ops { unsigned int nr_irqs, void *arg); void (*free)(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs); - void (*activate)(struct irq_domain *d, struct irq_data *irq_data); + int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early); void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); -- cgit v1.2.3 From bb9b428a5c832d7abb494fbabac37c515c01c6c4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Sep 2017 23:29:11 +0200 Subject: genirq/irqdomain: Allow irq_domain_activate_irq() to fail Allow irq_domain_activate_irq() to fail. This is required to support a reservation and late vector assignment scheme. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Yu Chen Acked-by: Juergen Gross Cc: Boris Ostrovsky Cc: Tony Luck Cc: Marc Zyngier Cc: Alok Kataria Cc: Joerg Roedel Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Christoph Hellwig Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Paolo Bonzini Cc: Rui Zhang Cc: "K. Y. Srinivasan" Cc: Arjan van de Ven Cc: Dan Williams Cc: Len Brown Link: https://lkml.kernel.org/r/20170913213152.933882227@linutronix.de --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 9bc07f59b090..8fb877121984 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -441,7 +441,7 @@ extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, const struct cpumask *affinity); extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); -extern void irq_domain_activate_irq(struct irq_data *irq_data); +extern int irq_domain_activate_irq(struct irq_data *irq_data); extern void irq_domain_deactivate_irq(struct irq_data *irq_data); static inline int irq_domain_alloc_irqs(struct irq_domain *domain, -- cgit v1.2.3 From 42e1cc2dc5b698181ab1ffb7972bd880230c506e Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Sep 2017 23:29:12 +0200 Subject: genirq/irqdomain: Propagate early activation Propagate the early activation mode to the irqdomain activate() callbacks. This is required for the upcoming reservation, late vector assignment scheme, so that the early activation call can act accordingly. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Yu Chen Acked-by: Juergen Gross Cc: Boris Ostrovsky Cc: Tony Luck Cc: Marc Zyngier Cc: Alok Kataria Cc: Joerg Roedel Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Christoph Hellwig Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Paolo Bonzini Cc: Rui Zhang Cc: "K. Y. Srinivasan" Cc: Arjan van de Ven Cc: Dan Williams Cc: Len Brown Link: https://lkml.kernel.org/r/20170913213153.028353660@linutronix.de --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 8fb877121984..7d0c6c144708 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -441,7 +441,7 @@ extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc, const struct cpumask *affinity); extern void irq_domain_free_irqs(unsigned int virq, unsigned int nr_irqs); -extern int irq_domain_activate_irq(struct irq_data *irq_data); +extern int irq_domain_activate_irq(struct irq_data *irq_data, bool early); extern void irq_domain_deactivate_irq(struct irq_data *irq_data); static inline int irq_domain_alloc_irqs(struct irq_domain *domain, -- cgit v1.2.3 From 22d0b12f3560d3b3264ee79faa1c05a5060fb916 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Sep 2017 23:29:13 +0200 Subject: genirq/irqdomain: Add force reactivation flag to irq domains Allow irqdomains to tell the core code, that after early activation the interrupt needs to be reactivated at request_irq() time. This allows reservation of vectors at early activation time and actual vector assignment at request_irq() time. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Yu Chen Acked-by: Juergen Gross Cc: Boris Ostrovsky Cc: Tony Luck Cc: Marc Zyngier Cc: Alok Kataria Cc: Joerg Roedel Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Christoph Hellwig Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Paolo Bonzini Cc: Rui Zhang Cc: "K. Y. Srinivasan" Cc: Arjan van de Ven Cc: Dan Williams Cc: Len Brown Link: https://lkml.kernel.org/r/20170913213153.106242536@linutronix.de --- include/linux/msi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 80e3b562bef6..eff16ef81f43 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -283,6 +283,11 @@ enum { MSI_FLAG_PCI_MSIX = (1 << 3), /* Needs early activate, required for PCI */ MSI_FLAG_ACTIVATE_EARLY = (1 << 4), + /* + * Must reactivate when irq is started even when + * MSI_FLAG_ACTIVATE_EARLY has been set. + */ + MSI_FLAG_MUST_REACTIVATE = (1 << 5), }; int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, -- cgit v1.2.3 From 2f75d9e1c90511bff6d1ce4de94503cc28fec032 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 13 Sep 2017 23:29:14 +0200 Subject: genirq: Implement bitmap matrix allocator Implement the infrastructure for a simple bitmap based allocator, which will replace the x86 vector allocator. It's in the core code as other architectures might be able to reuse/extend it. For now it only implements allocations for single CPUs, but it's simple to add multi CPU allocation support if required. The concept is rather simple: Global information: system_vector bitmap global accounting PerCPU information: allocation bitmap managed allocation bitmap local accounting The system vector bitmap is used to exclude vectors system wide from the allocation space. The allocation bitmap is used to keep track of per cpu used vectors. The managed allocation bitmap is used to reserve vectors for managed interrupts. When a regular (non managed) interrupt allocation happens then the following rule applies: tmpmap = system_map | alloc_map | managed_map find_zero_bit(tmpmap) Oring the bitmaps together gives the real available space. The same rule applies for reserving a managed interrupt vector. But contrary to the regular interrupts the reservation only marks the bit in the managed map and therefor excludes it from the regular allocations. The managed map is only cleaned out when the a managed interrupt is completely released and it stays alive accross CPU offline/online operations. For managed interrupt allocations the rule is: tmpmap = managed_map & ~alloc_map find_first_bit(tmpmap) This returns the first bit which is in the managed map, but not yet allocated in the allocation map. The allocation marks it in the allocation map and hands it back to the caller for use. The rest of the code are helper functions to handle the various requirements and the accounting which are necessary to replace the x86 vector allocation code. The result is a single patch as the evolution of this infrastructure cannot be represented in bits and pieces. Signed-off-by: Thomas Gleixner Tested-by: Juergen Gross Tested-by: Yu Chen Acked-by: Juergen Gross Cc: Boris Ostrovsky Cc: Tony Luck Cc: Marc Zyngier Cc: Alok Kataria Cc: Joerg Roedel Cc: "Rafael J. Wysocki" Cc: Steven Rostedt Cc: Christoph Hellwig Cc: Peter Zijlstra Cc: Borislav Petkov Cc: Paolo Bonzini Cc: Rui Zhang Cc: "K. Y. Srinivasan" Cc: Arjan van de Ven Cc: Dan Williams Cc: Chris Metcalf Cc: Len Brown Link: https://lkml.kernel.org/r/20170913213153.185437174@linutronix.de --- include/linux/irq.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index d4728bf6a537..fda8da7c45e7 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -1113,6 +1113,28 @@ static inline u32 irq_reg_readl(struct irq_chip_generic *gc, return readl(gc->reg_base + reg_offset); } +struct irq_matrix; +struct irq_matrix *irq_alloc_matrix(unsigned int matrix_bits, + unsigned int alloc_start, + unsigned int alloc_end); +void irq_matrix_online(struct irq_matrix *m); +void irq_matrix_offline(struct irq_matrix *m); +void irq_matrix_assign_system(struct irq_matrix *m, unsigned int bit, bool replace); +int irq_matrix_reserve_managed(struct irq_matrix *m, const struct cpumask *msk); +void irq_matrix_remove_managed(struct irq_matrix *m, const struct cpumask *msk); +int irq_matrix_alloc_managed(struct irq_matrix *m, unsigned int cpu); +void irq_matrix_reserve(struct irq_matrix *m); +void irq_matrix_remove_reserved(struct irq_matrix *m); +int irq_matrix_alloc(struct irq_matrix *m, const struct cpumask *msk, + bool reserved, unsigned int *mapped_cpu); +void irq_matrix_free(struct irq_matrix *m, unsigned int cpu, + unsigned int bit, bool managed); +void irq_matrix_assign(struct irq_matrix *m, unsigned int bit); +unsigned int irq_matrix_available(struct irq_matrix *m, bool cpudown); +unsigned int irq_matrix_allocated(struct irq_matrix *m); +unsigned int irq_matrix_reserved(struct irq_matrix *m); +void irq_matrix_debug_show(struct seq_file *sf, struct irq_matrix *m, int ind); + /* Contrary to Linux irqs, for hardware irqs the irq number 0 is valid */ #define INVALID_HWIRQ (~0UL) irq_hw_number_t ipi_get_hwirq(unsigned int irq, unsigned int cpu); -- cgit v1.2.3 From 1d66af81905a4e2f3d03913f5449a8e9b5d3facd Mon Sep 17 00:00:00 2001 From: Danilo Krummrich Date: Fri, 9 Jun 2017 00:29:00 +0200 Subject: clk: bcm2835: remove remains from stub clk driver This commit removes the fixed clocks introduced as a stub clock driver added with commit 75fabc3f6448 ("ARM: bcm2835: add stub clock driver"). Originally they were used to drive the AMBA bus and PL011 uart driver. Now these clocks are derived by the CPRMAN clock driver and configured in DT. Additionally, get rid of init_machine function in bcm2835 board file as there's nothing to do any longer. Signed-off-by: Danilo Krummrich Acked-by: Eric Anholt Acked-by: Stephen Boyd Signed-off-by: Stefan Wahren --- include/linux/clk/bcm2835.h | 24 ------------------------ 1 file changed, 24 deletions(-) delete mode 100644 include/linux/clk/bcm2835.h (limited to 'include/linux') diff --git a/include/linux/clk/bcm2835.h b/include/linux/clk/bcm2835.h deleted file mode 100644 index aa937f6c17da..000000000000 --- a/include/linux/clk/bcm2835.h +++ /dev/null @@ -1,24 +0,0 @@ -/* - * Copyright (C) 2010 Broadcom - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - */ - -#ifndef __LINUX_CLK_BCM2835_H_ -#define __LINUX_CLK_BCM2835_H_ - -void __init bcm2835_init_clocks(void); - -#endif -- cgit v1.2.3 From 88bbe85dcd37aa2662c1a83962c15009fc12503e Mon Sep 17 00:00:00 2001 From: Stefan Wahren Date: Sun, 6 Aug 2017 17:52:02 +0200 Subject: irqchip: bcm2836: Move SMP startup code to arch/arm (v2) In order to easily provide SMP for BCM2837 on 32-bit and 64-bit the SMP startup code was placed in irq-bcm2836. That's not the right approach. So move this code where it belongs. Signed-off-by: Stefan Wahren Fixes: 41f4988cc287 ("irqchip/bcm2836: Add SMP support for the 2836") Tested-by: Eric Anholt Acked-by: Marc Zyngier --- include/linux/irqchip/irq-bcm2836.h | 70 +++++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) create mode 100644 include/linux/irqchip/irq-bcm2836.h (limited to 'include/linux') diff --git a/include/linux/irqchip/irq-bcm2836.h b/include/linux/irqchip/irq-bcm2836.h new file mode 100644 index 000000000000..218a6e1b18d8 --- /dev/null +++ b/include/linux/irqchip/irq-bcm2836.h @@ -0,0 +1,70 @@ +/* + * Root interrupt controller for the BCM2836 (Raspberry Pi 2). + * + * Copyright 2015 Broadcom + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define LOCAL_CONTROL 0x000 +#define LOCAL_PRESCALER 0x008 + +/* + * The low 2 bits identify the CPU that the GPU IRQ goes to, and the + * next 2 bits identify the CPU that the GPU FIQ goes to. + */ +#define LOCAL_GPU_ROUTING 0x00c +/* When setting bits 0-3, enables PMU interrupts on that CPU. */ +#define LOCAL_PM_ROUTING_SET 0x010 +/* When setting bits 0-3, disables PMU interrupts on that CPU. */ +#define LOCAL_PM_ROUTING_CLR 0x014 +/* + * The low 4 bits of this are the CPU's timer IRQ enables, and the + * next 4 bits are the CPU's timer FIQ enables (which override the IRQ + * bits). + */ +#define LOCAL_TIMER_INT_CONTROL0 0x040 +/* + * The low 4 bits of this are the CPU's per-mailbox IRQ enables, and + * the next 4 bits are the CPU's per-mailbox FIQ enables (which + * override the IRQ bits). + */ +#define LOCAL_MAILBOX_INT_CONTROL0 0x050 +/* + * The CPU's interrupt status register. Bits are defined by the the + * LOCAL_IRQ_* bits below. + */ +#define LOCAL_IRQ_PENDING0 0x060 +/* Same status bits as above, but for FIQ. */ +#define LOCAL_FIQ_PENDING0 0x070 +/* + * Mailbox write-to-set bits. There are 16 mailboxes, 4 per CPU, and + * these bits are organized by mailbox number and then CPU number. We + * use mailbox 0 for IPIs. The mailbox's interrupt is raised while + * any bit is set. + */ +#define LOCAL_MAILBOX0_SET0 0x080 +#define LOCAL_MAILBOX3_SET0 0x08c +/* Mailbox write-to-clear bits. */ +#define LOCAL_MAILBOX0_CLR0 0x0c0 +#define LOCAL_MAILBOX3_CLR0 0x0cc + +#define LOCAL_IRQ_CNTPSIRQ 0 +#define LOCAL_IRQ_CNTPNSIRQ 1 +#define LOCAL_IRQ_CNTHPIRQ 2 +#define LOCAL_IRQ_CNTVIRQ 3 +#define LOCAL_IRQ_MAILBOX0 4 +#define LOCAL_IRQ_MAILBOX1 5 +#define LOCAL_IRQ_MAILBOX2 6 +#define LOCAL_IRQ_MAILBOX3 7 +#define LOCAL_IRQ_GPU_FAST 8 +#define LOCAL_IRQ_PMU_FAST 9 +#define LAST_IRQ LOCAL_IRQ_PMU_FAST -- cgit v1.2.3 From 4c3711d7fb4763c63b2654f2d07cbe21ca5aadd4 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 31 Aug 2017 17:12:48 +0200 Subject: timekeeping: Provide NMI safe access to clock realtime The configurable printk timestamping wants access to clock realtime. Right now there is no ktime_get_real_fast_ns() accessor because reading the monotonic base and the realtime offset cannot be done atomically. Contrary to boot time this offset can change during runtime and cause half updated readouts. struct tk_read_base was fully packed when the fast timekeeper access was implemented. commit ceea5e3771ed ("time: Fix clock->read(clock) race around clocksource changes") removed the 'read' function pointer from the structure, but of course left the comment stale. So now the structure can fit a new 64bit member w/o violating the cache line constraints. Add real_base to tk_read_base and update it in the fast timekeeper update sequence. Implement an accessor which follows the same scheme as the accessor to clock monotonic, but uses the new real_base to access clock real time. The runtime overhead for updating real_base is minimal as it just adds two cache hot values and stores them into an already dirtied cache line along with the other fast timekeeper updates. Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: John Stultz Cc: Peter Zijlstra Link: https://lkml.kernel.org/r/1505757060-2004-3-git-send-email-prarit@redhat.com --- include/linux/timekeeper_internal.h | 6 +++++- include/linux/timekeeping.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 0a0a53daf2a2..98f645ee8409 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -13,19 +13,22 @@ /** * struct tk_read_base - base structure for timekeeping readout * @clock: Current clocksource used for timekeeping. - * @read: Read function of @clock * @mask: Bitmask for two's complement subtraction of non 64bit clocks * @cycle_last: @clock cycle value at last update * @mult: (NTP adjusted) multiplier for scaled math conversion * @shift: Shift value for scaled math conversion * @xtime_nsec: Shifted (fractional) nano seconds offset for readout * @base: ktime_t (nanoseconds) base time for readout + * @base_real: Nanoseconds base value for clock REALTIME readout * * This struct has size 56 byte on 64 bit. Together with a seqcount it * occupies a single 64byte cache line. * * The struct is separate from struct timekeeper as it is also used * for a fast NMI safe accessors. + * + * @base_real is for the fast NMI safe accessor to allow reading clock + * realtime from any context. */ struct tk_read_base { struct clocksource *clock; @@ -35,6 +38,7 @@ struct tk_read_base { u32 shift; u64 xtime_nsec; ktime_t base; + u64 base_real; }; /** diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index ddc229ff6d1e..eb98cbdbb323 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -239,6 +239,7 @@ static inline u64 ktime_get_raw_ns(void) extern u64 ktime_get_mono_fast_ns(void); extern u64 ktime_get_raw_fast_ns(void); extern u64 ktime_get_boot_fast_ns(void); +extern u64 ktime_get_real_fast_ns(void); /* * Timespec interfaces utilizing the ktime based ones -- cgit v1.2.3 From a380f2edef65b2447a043251bb3c00a9d2153a8b Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Mon, 25 Sep 2017 14:56:44 +0200 Subject: PM / core: Drop legacy class suspend/resume operations There are no classes using the legacy suspend/resume operations in the tree any more, so drop these operations and update the code referring to them accordingly. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- include/linux/device.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 1d2607923a24..c1527f887050 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -372,9 +372,6 @@ int subsys_virtual_register(struct bus_type *subsys, * @devnode: Callback to provide the devtmpfs. * @class_release: Called to release this class. * @dev_release: Called to release the device. - * @suspend: Used to put the device to sleep mode, usually to a low power - * state. - * @resume: Used to bring the device from the sleep mode. * @shutdown_pre: Called at shut-down time before driver shutdown. * @ns_type: Callbacks so sysfs can detemine namespaces. * @namespace: Namespace of the device belongs to this class. @@ -402,8 +399,6 @@ struct class { void (*class_release)(struct class *class); void (*dev_release)(struct device *dev); - int (*suspend)(struct device *dev, pm_message_t state); - int (*resume)(struct device *dev); int (*shutdown_pre)(struct device *dev); const struct kobj_ns_type_operations *ns_type; -- cgit v1.2.3 From 003278e431bffa4070d18c821eff1d95867f24db Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Tue, 26 Sep 2017 09:14:07 +0200 Subject: nfs_common: convert int to bool Since __state_in_grace return only true/false, make it return bool instead of int. Same change for the two user of it, locks_in_grace/opens_in_grace Signed-off-by: Corentin Labbe Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 339e73742e73..8cc0493c7c39 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -970,8 +970,8 @@ struct lock_manager { struct net; void locks_start_grace(struct net *, struct lock_manager *); void locks_end_grace(struct lock_manager *); -int locks_in_grace(struct net *); -int opens_in_grace(struct net *); +bool locks_in_grace(struct net *); +bool opens_in_grace(struct net *); /* that will die - we need it for nfs_lock_info */ #include -- cgit v1.2.3 From 05e3db95ebfc5c06a29a1d8c7a3e02f46f3a25a7 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 14 Sep 2017 14:02:04 -0700 Subject: kthread: add a mechanism to store cgroup info kthread usually runs jobs on behalf of other threads. The jobs should be charged to cgroup of original threads. But the jobs run in a kthread, where we lose the cgroup context of original threads. The patch adds a machanism to record cgroup info of original threads in kthread context. Later we can retrieve the cgroup info and attach the cgroup info to jobs. Since this mechanism is only required by kthread, we store the cgroup info in kthread data instead of generic task_struct. Acked-by: Tejun Heo Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/kthread.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 82e197eeac91..bd4369c83dfb 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -3,6 +3,7 @@ /* Simple interface for creating and stopping kernel threads without mess. */ #include #include +#include __printf(4, 5) struct task_struct *kthread_create_on_node(int (*threadfn)(void *data), @@ -198,4 +199,14 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); +#ifdef CONFIG_CGROUPS +void kthread_associate_blkcg(struct cgroup_subsys_state *css); +struct cgroup_subsys_state *kthread_blkcg(void); +#else +static inline void kthread_associate_blkcg(struct cgroup_subsys_state *css) { } +static inline struct cgroup_subsys_state *kthread_blkcg(void) +{ + return NULL; +} +#endif #endif /* _LINUX_KTHREAD_H */ -- cgit v1.2.3 From af551fb3be26a22b7a6b345b3b7e7e6acfc41758 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 14 Sep 2017 14:02:05 -0700 Subject: blkcg: delete unused APIs Nobody uses the APIs right now. Acked-by: Tejun Heo Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 -- include/linux/blk-cgroup.h | 12 ------------ 2 files changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 275c91c99516..9c75f58f6a50 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -522,13 +522,11 @@ do { \ #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); -int bio_associate_current(struct bio *bio); void bio_disassociate_task(struct bio *bio); void bio_clone_blkcg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ static inline int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { return 0; } -static inline int bio_associate_current(struct bio *bio) { return -ENOENT; } static inline void bio_disassociate_task(struct bio *bio) { } static inline void bio_clone_blkcg_association(struct bio *dst, struct bio *src) { } diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 9d92153dd856..0cfa8d2ef4d6 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -235,12 +235,6 @@ static inline struct blkcg *bio_blkcg(struct bio *bio) return task_blkcg(current); } -static inline struct cgroup_subsys_state * -task_get_blkcg_css(struct task_struct *task) -{ - return task_get_css(task, io_cgrp_id); -} - /** * blkcg_parent - get the parent of a blkcg * @blkcg: blkcg of interest @@ -735,12 +729,6 @@ struct blkcg_policy { #define blkcg_root_css ((struct cgroup_subsys_state *)ERR_PTR(-EINVAL)) -static inline struct cgroup_subsys_state * -task_get_blkcg_css(struct task_struct *task) -{ - return NULL; -} - #ifdef CONFIG_BLOCK static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, void *key) { return NULL; } -- cgit v1.2.3 From 902ec5b6de0678ac2effba0faaafdd1c3e7fcf2e Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 14 Sep 2017 14:02:06 -0700 Subject: block: make blkcg aware of kthread stored original cgroup info bio_blkcg is the only API to get cgroup info for a bio right now. If bio_blkcg finds current task is a kthread and has original blkcg associated, it will use the css instead of associating the bio to current task. This makes it possible that kthread dispatches bios on behalf of other threads. Acked-by: Tejun Heo Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 0cfa8d2ef4d6..f57e54d64529 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -19,6 +19,7 @@ #include #include #include +#include /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) @@ -223,16 +224,16 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) return css ? container_of(css, struct blkcg, css) : NULL; } -static inline struct blkcg *task_blkcg(struct task_struct *tsk) -{ - return css_to_blkcg(task_css(tsk, io_cgrp_id)); -} - static inline struct blkcg *bio_blkcg(struct bio *bio) { + struct cgroup_subsys_state *css; + if (bio && bio->bi_css) return css_to_blkcg(bio->bi_css); - return task_blkcg(current); + css = kthread_blkcg(); + if (css) + return css_to_blkcg(css); + return css_to_blkcg(task_css(current, io_cgrp_id)); } /** -- cgit v1.2.3 From 0b508bc926bdced678febee2a2b8cdba0a19e481 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 26 Sep 2017 11:02:12 -0700 Subject: block: fix a build error The code is only for blkcg not for all cgroups Fixes: d4478e92d618 ("block/loop: make loop cgroup aware") Reported-by: kbuild test robot Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/kthread.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index bd4369c83dfb..fb201842c635 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -199,7 +199,7 @@ bool kthread_cancel_delayed_work_sync(struct kthread_delayed_work *work); void kthread_destroy_worker(struct kthread_worker *worker); -#ifdef CONFIG_CGROUPS +#ifdef CONFIG_BLK_CGROUP void kthread_associate_blkcg(struct cgroup_subsys_state *css); struct cgroup_subsys_state *kthread_blkcg(void); #else -- cgit v1.2.3 From 1e99c497012cd8647972876f1bd18545bc907aea Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Sun, 24 Sep 2017 12:09:45 +0300 Subject: qed: iWARP - Add check for errors on a SYN packet A SYN packet which arrives with errors from FW should be dropped. This required adding an additional field to the ll2 rx completion data. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index dd7a3b86bb9e..89fa0bbd54f3 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -101,6 +101,7 @@ struct qed_ll2_comp_rx_data { void *cookie; dma_addr_t rx_buf_addr; u16 parse_flags; + u16 err_flags; u16 vlan; bool b_last_packet; u8 connection_handle; -- cgit v1.2.3 From 6aaae2b6c4330a46204bca042f1d2f41e8e18dea Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:50 +0200 Subject: bpf: rename bpf_compute_data_end into bpf_compute_data_pointers Just do the rename into bpf_compute_data_pointers() as we'll add one more pointer here to recompute. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/filter.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index d29e58fde364..052bab3d62e7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -496,10 +496,13 @@ struct xdp_buff { void *data_hard_start; }; -/* compute the linear packet data range [data, data_end) which - * will be accessed by cls_bpf, act_bpf and lwt programs +/* Compute the linear packet data range [data, data_end) which + * will be accessed by various program types (cls_bpf, act_bpf, + * lwt, ...). Subsystems allowing direct data access must (!) + * ensure that cb[] area can be written to when BPF program is + * invoked (otherwise cb[] save/restore is necessary). */ -static inline void bpf_compute_data_end(struct sk_buff *skb) +static inline void bpf_compute_data_pointers(struct sk_buff *skb) { struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; -- cgit v1.2.3 From de8f3a83b0a0fddb2cf56e7a718127e9619ea3da Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Mon, 25 Sep 2017 02:25:51 +0200 Subject: bpf: add meta pointer for direct access This work enables generic transfer of metadata from XDP into skb. The basic idea is that we can make use of the fact that the resulting skb must be linear and already comes with a larger headroom for supporting bpf_xdp_adjust_head(), which mangles xdp->data. Here, we base our work on a similar principle and introduce a small helper bpf_xdp_adjust_meta() for adjusting a new pointer called xdp->data_meta. Thus, the packet has a flexible and programmable room for meta data, followed by the actual packet data. struct xdp_buff is therefore laid out that we first point to data_hard_start, then data_meta directly prepended to data followed by data_end marking the end of packet. bpf_xdp_adjust_head() takes into account whether we have meta data already prepended and if so, memmove()s this along with the given offset provided there's enough room. xdp->data_meta is optional and programs are not required to use it. The rationale is that when we process the packet in XDP (e.g. as DoS filter), we can push further meta data along with it for the XDP_PASS case, and give the guarantee that a clsact ingress BPF program on the same device can pick this up for further post-processing. Since we work with skb there, we can also set skb->mark, skb->priority or other skb meta data out of BPF, thus having this scratch space generic and programmable allows for more flexibility than defining a direct 1:1 transfer of potentially new XDP members into skb (it's also more efficient as we don't need to initialize/handle each of such new members). The facility also works together with GRO aggregation. The scratch space at the head of the packet can be multiple of 4 byte up to 32 byte large. Drivers not yet supporting xdp->data_meta can simply be set up with xdp->data_meta as xdp->data + 1 as bpf_xdp_adjust_meta() will detect this and bail out, such that the subsequent match against xdp->data for later access is guaranteed to fail. The verifier treats xdp->data_meta/xdp->data the same way as we treat xdp->data/xdp->data_end pointer comparisons. The requirement for doing the compare against xdp->data is that it hasn't been modified from it's original address we got from ctx access. It may have a range marking already from prior successful xdp->data/xdp->data_end pointer comparisons though. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + include/linux/filter.h | 21 ++++++++++++++-- include/linux/skbuff.h | 68 ++++++++++++++++++++++++++++++++++++++++++++++++-- 3 files changed, 86 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 8390859e79e7..2b672c50f160 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -137,6 +137,7 @@ enum bpf_reg_type { PTR_TO_MAP_VALUE, /* reg points to map element value */ PTR_TO_MAP_VALUE_OR_NULL,/* points to map elem value or NULL */ PTR_TO_STACK, /* reg == frame_pointer + offset */ + PTR_TO_PACKET_META, /* skb->data - meta_len */ PTR_TO_PACKET, /* reg points to skb->data */ PTR_TO_PACKET_END, /* skb->data + headlen */ }; diff --git a/include/linux/filter.h b/include/linux/filter.h index 052bab3d62e7..911d454af107 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -487,12 +487,14 @@ struct sk_filter { struct bpf_skb_data_end { struct qdisc_skb_cb qdisc_cb; + void *data_meta; void *data_end; }; struct xdp_buff { void *data; void *data_end; + void *data_meta; void *data_hard_start; }; @@ -507,7 +509,8 @@ static inline void bpf_compute_data_pointers(struct sk_buff *skb) struct bpf_skb_data_end *cb = (struct bpf_skb_data_end *)skb->cb; BUILD_BUG_ON(sizeof(*cb) > FIELD_SIZEOF(struct sk_buff, cb)); - cb->data_end = skb->data + skb_headlen(skb); + cb->data_meta = skb->data - skb_metadata_len(skb); + cb->data_end = skb->data + skb_headlen(skb); } static inline u8 *bpf_skb_cb(struct sk_buff *skb) @@ -728,8 +731,22 @@ int xdp_do_redirect(struct net_device *dev, struct bpf_prog *prog); void xdp_do_flush_map(void); +/* Drivers not supporting XDP metadata can use this helper, which + * rejects any room expansion for metadata as a result. + */ +static __always_inline void +xdp_set_data_meta_invalid(struct xdp_buff *xdp) +{ + xdp->data_meta = xdp->data + 1; +} + +static __always_inline bool +xdp_data_meta_unsupported(const struct xdp_buff *xdp) +{ + return unlikely(xdp->data_meta > xdp->data); +} + void bpf_warn_invalid_xdp_action(u32 act); -void bpf_warn_invalid_xdp_redirect(u32 ifindex); struct sock *do_sk_redirect_map(void); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index f9db5539a6fb..19e64bfb1a66 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -489,8 +489,9 @@ int skb_zerocopy_iter_stream(struct sock *sk, struct sk_buff *skb, * the end of the header data, ie. at skb->end. */ struct skb_shared_info { - unsigned short _unused; - unsigned char nr_frags; + __u8 __unused; + __u8 meta_len; + __u8 nr_frags; __u8 tx_flags; unsigned short gso_size; /* Warning: this field is not always filled in (UFO)! */ @@ -3400,6 +3401,69 @@ static inline ktime_t net_invalid_timestamp(void) return 0; } +static inline u8 skb_metadata_len(const struct sk_buff *skb) +{ + return skb_shinfo(skb)->meta_len; +} + +static inline void *skb_metadata_end(const struct sk_buff *skb) +{ + return skb_mac_header(skb); +} + +static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, + const struct sk_buff *skb_b, + u8 meta_len) +{ + const void *a = skb_metadata_end(skb_a); + const void *b = skb_metadata_end(skb_b); + /* Using more efficient varaiant than plain call to memcmp(). */ +#if defined(CONFIG_HAVE_EFFICIENT_UNALIGNED_ACCESS) && BITS_PER_LONG == 64 + u64 diffs = 0; + + switch (meta_len) { +#define __it(x, op) (x -= sizeof(u##op)) +#define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) + case 32: diffs |= __it_diff(a, b, 64); + case 24: diffs |= __it_diff(a, b, 64); + case 16: diffs |= __it_diff(a, b, 64); + case 8: diffs |= __it_diff(a, b, 64); + break; + case 28: diffs |= __it_diff(a, b, 64); + case 20: diffs |= __it_diff(a, b, 64); + case 12: diffs |= __it_diff(a, b, 64); + case 4: diffs |= __it_diff(a, b, 32); + break; + } + return diffs; +#else + return memcmp(a - meta_len, b - meta_len, meta_len); +#endif +} + +static inline bool skb_metadata_differs(const struct sk_buff *skb_a, + const struct sk_buff *skb_b) +{ + u8 len_a = skb_metadata_len(skb_a); + u8 len_b = skb_metadata_len(skb_b); + + if (!(len_a | len_b)) + return false; + + return len_a != len_b ? + true : __skb_metadata_differs(skb_a, skb_b, len_a); +} + +static inline void skb_metadata_set(struct sk_buff *skb, u8 meta_len) +{ + skb_shinfo(skb)->meta_len = meta_len; +} + +static inline void skb_metadata_clear(struct sk_buff *skb) +{ + skb_metadata_set(skb, 0); +} + struct sk_buff *skb_clone_sk(struct sk_buff *skb); #ifdef CONFIG_NETWORK_PHY_TIMESTAMPING -- cgit v1.2.3 From 404376af788a76cca760efdc05f26fd73bd94b17 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sun, 17 Sep 2017 19:07:10 -0700 Subject: Documentation: kernel-api: add bitmap operations from linux/bitmap.h Add to kernel-api Bitmap Operations section. Fix kernel-doc nitpicks in . Signed-off-by: Randy Dunlap Acked-by: Yury Norov Signed-off-by: Jonathan Corbet --- include/linux/bitmap.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 700cf5f67118..5c4178016b1e 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -360,8 +360,9 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen, return __bitmap_parse(buf, buflen, 0, maskp, nmaskbits); } -/* +/** * BITMAP_FROM_U64() - Represent u64 value in the format suitable for bitmap. + * @n: u64 value * * Linux bitmaps are internally arrays of unsigned longs, i.e. 32-bit * integers in 32-bit environment, and 64-bit integers in 64-bit one. @@ -392,14 +393,14 @@ static inline int bitmap_parse(const char *buf, unsigned int buflen, ((unsigned long) ((u64)(n) >> 32)) #endif -/* +/** * bitmap_from_u64 - Check and swap words within u64. * @mask: source bitmap * @dst: destination bitmap * - * In 32-bit Big Endian kernel, when using (u32 *)(&val)[*] + * In 32-bit Big Endian kernel, when using ``(u32 *)(&val)[*]`` * to read u64 mask, we will get the wrong word. - * That is "(u32 *)(&val)[0]" gets the upper 32 bits, + * That is ``(u32 *)(&val)[0]`` gets the upper 32 bits, * but we expect the lower 32-bits of u64. */ static inline void bitmap_from_u64(unsigned long *dst, u64 mask) -- cgit v1.2.3 From e60aa7b53845a261dd419652f12ab9f89e668843 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 21 Sep 2017 16:52:44 +0100 Subject: iommu/iova: Extend rbtree node caching The cached node mechanism provides a significant performance benefit for allocations using a 32-bit DMA mask, but in the case of non-PCI devices or where the 32-bit space is full, the loss of this benefit can be significant - on large systems there can be many thousands of entries in the tree, such that walking all the way down to find free space every time becomes increasingly awful. Maintain a similar cached node for the whole IOVA space as a superset of the 32-bit space so that performance can remain much more consistent. Inspired by work by Zhen Lei . Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iova.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index d179b9bf7814..69ea3e258ff2 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -70,7 +70,8 @@ struct iova_fq { struct iova_domain { spinlock_t iova_rbtree_lock; /* Lock to protect update of rbtree */ struct rb_root rbroot; /* iova domain rbtree root */ - struct rb_node *cached32_node; /* Save last alloced node */ + struct rb_node *cached_node; /* Save last alloced node */ + struct rb_node *cached32_node; /* Save last 32-bit alloced node */ unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; -- cgit v1.2.3 From aa3ac9469c1850ed00741955b975c3a19029763a Mon Sep 17 00:00:00 2001 From: Zhen Lei Date: Thu, 21 Sep 2017 16:52:45 +0100 Subject: iommu/iova: Make dma_32bit_pfn implicit Now that the cached node optimisation can apply to all allocations, the couple of users which were playing tricks with dma_32bit_pfn in order to benefit from it can stop doing so. Conversely, there is also no need for all the other users to explicitly calculate a 'real' 32-bit PFN, when init_iova_domain() can happily do that itself from the page granularity. CC: Thierry Reding CC: Jonathan Hunter CC: David Airlie CC: Sudeep Dutt CC: Ashutosh Dixit Signed-off-by: Zhen Lei Tested-by: Ard Biesheuvel Tested-by: Zhen Lei Tested-by: Nate Watterson [rm: use iova_shift(), rewrote commit message] Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iova.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index 69ea3e258ff2..953cfd20f152 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -154,7 +154,7 @@ struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, unsigned long pfn_32bit); + unsigned long start_pfn); int init_iova_flush_queue(struct iova_domain *iovad, iova_flush_cb flush_cb, iova_entry_dtor entry_dtor); struct iova *find_iova(struct iova_domain *iovad, unsigned long pfn); @@ -230,8 +230,7 @@ static inline void copy_reserved_iova(struct iova_domain *from, static inline void init_iova_domain(struct iova_domain *iovad, unsigned long granule, - unsigned long start_pfn, - unsigned long pfn_32bit) + unsigned long start_pfn) { } -- cgit v1.2.3 From bb68b2fbfbd643d4407541f9c7a16a2c9b3a57c7 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 21 Sep 2017 16:52:46 +0100 Subject: iommu/iova: Add rbtree anchor node Add a permanent dummy IOVA reservation to the rbtree, such that we can always access the top of the address space instantly. The immediate benefit is that we remove the overhead of the rb_last() traversal when not using the cached node, but it also paves the way for further simplifications. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iova.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index 953cfd20f152..c696ee81054e 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -75,6 +75,7 @@ struct iova_domain { unsigned long granule; /* pfn granularity for this domain */ unsigned long start_pfn; /* Lower limit for this domain */ unsigned long dma_32bit_pfn; + struct iova anchor; /* rbtree lookup anchor */ struct iova_rcache rcaches[IOVA_RANGE_CACHE_MAX_SIZE]; /* IOVA range caches */ iova_flush_cb flush_cb; /* Call-Back function to flush IOMMU -- cgit v1.2.3 From 310ebbba3b7396b00bce08a33f1d2de2c74fa257 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:12 +0200 Subject: ipmr: Add reference count to MFC entries Next commits will introduce MFC notifications through the atomic fib_notification chain, thus allowing modules to be aware of MFC entries. Due to the fact that modules may need to hold a reference to an MFC entry, add reference count to MFC entries to prevent them from being freed while these modules use them. The reference counting is done only on resolved MFC entries currently. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index d7f63339ef0b..10028f208efb 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -109,6 +109,7 @@ struct mfc_cache_cmp_arg { * @wrong_if: number of wrong source interface hits * @lastuse: time of last use of the group (traffic or update) * @ttls: OIF TTL threshold array + * @refcount: reference count for this entry * @list: global entry list * @rcu: used for entry destruction */ @@ -138,6 +139,7 @@ struct mfc_cache { unsigned long wrong_if; unsigned long lastuse; unsigned char ttls[MAXVIFS]; + refcount_t refcount; } res; } mfc_un; struct list_head list; @@ -148,4 +150,23 @@ struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, struct rtmsg *rtm, u32 portid); + +#ifdef CONFIG_IP_MROUTE +void ipmr_cache_free(struct mfc_cache *mfc_cache); +#else +static inline void ipmr_cache_free(struct mfc_cache *mfc_cache) +{ +} +#endif + +static inline void ipmr_cache_put(struct mfc_cache *c) +{ + if (refcount_dec_and_test(&c->mfc_un.res.refcount)) + ipmr_cache_free(c); +} +static inline void ipmr_cache_hold(struct mfc_cache *c) +{ + refcount_inc(&c->mfc_un.res.refcount); +} + #endif -- cgit v1.2.3 From 4d65b9487831170e699b2fc64a91b839d729bd78 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:13 +0200 Subject: ipmr: Add FIB notification access functions Make the ipmr module register as a FIB notifier. To do that, implement both the ipmr_seq_read and ipmr_dump ops. The ipmr_seq_read op returns a sequence counter that is incremented on every notification related operation done by the ipmr. To implement that, add a sequence counter in the netns_ipv4 struct and increment it whenever a new MFC route or VIF are added or deleted. The sequence operations are protected by the RTNL lock. The ipmr_dump iterates the list of MFC routes and the list of VIF entries and sends notifications about them. The entries dump is done under RCU where the VIF dump uses the mrt_lock too, as the vif->dev field can change under RCU. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 10028f208efb..54c5cb82ddcb 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #ifdef CONFIG_IP_MROUTE @@ -58,6 +59,14 @@ struct vif_device { int link; /* Physical interface index */ }; +struct vif_entry_notifier_info { + struct fib_notifier_info info; + struct net_device *dev; + vifi_t vif_index; + unsigned short vif_flags; + u32 tb_id; +}; + #define VIFF_STATIC 0x8000 #define VIF_EXISTS(_mrt, _idx) ((_mrt)->vif_table[_idx].dev != NULL) @@ -146,6 +155,12 @@ struct mfc_cache { struct rcu_head rcu; }; +struct mfc_entry_notifier_info { + struct fib_notifier_info info; + struct mfc_cache *mfc; + u32 tb_id; +}; + struct rtmsg; int ipmr_get_route(struct net *net, struct sk_buff *skb, __be32 saddr, __be32 daddr, -- cgit v1.2.3 From c7c0bbeae9501a7e42f2fd306d6a6399aca688b6 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:15 +0200 Subject: net: ipmr: Add MFC offload indication Allow drivers, registered to the fib notification chain indicate whether a multicast MFC route is offloaded or not, similarly to unicast routes. The indication of whether a route is offloaded is done using the mfc_flags field on an mfc_cache struct, and the information is sent to the userspace via the RTNetlink interface only. Currently, MFC routes are either offloaded or not, thus there is no need to add per-VIF offload indication. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 54c5cb82ddcb..5566580811ce 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -90,9 +90,11 @@ struct mr_table { /* mfc_flags: * MFC_STATIC - the entry was added statically (not by a routing daemon) + * MFC_OFFLOAD - the entry was offloaded to the hardware */ enum { MFC_STATIC = BIT(0), + MFC_OFFLOAD = BIT(1), }; struct mfc_cache_cmp_arg { -- cgit v1.2.3 From 478e4c2f0067d57d7c17059caafab026ca32084a Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Wed, 27 Sep 2017 08:23:16 +0200 Subject: net: mroute: Check if rule is a default rule When the ipmr starts, it adds one default FIB rule that matches all packets and sends them to the DEFAULT (multicast) FIB table. A more complex rule can be added by user to specify that for a specific interface, a packet should be look up at either an arbitrary table or according to the l3mdev of the interface. For drivers willing to offload the ipmr logic into a hardware but don't want to offload all the FIB rules functionality, provide a function that can indicate whether the FIB rule is the default multicast rule, thus only one routing table is needed. This way, a driver can register to the FIB notification chain, get notifications about FIB rules added and trigger some kind of an internal abort mechanism when a non default rule is added by the user. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Reviewed-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index 5566580811ce..b072a84fbe1c 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -5,6 +5,7 @@ #include #include #include +#include #include #include @@ -19,6 +20,7 @@ int ip_mroute_getsockopt(struct sock *, int, char __user *, int __user *); int ipmr_ioctl(struct sock *sk, int cmd, void __user *arg); int ipmr_compat_ioctl(struct sock *sk, unsigned int cmd, void __user *arg); int ip_mr_init(void); +bool ipmr_rule_default(const struct fib_rule *rule); #else static inline int ip_mroute_setsockopt(struct sock *sock, int optname, char __user *optval, unsigned int optlen) @@ -46,6 +48,11 @@ static inline int ip_mroute_opt(int opt) { return 0; } + +static inline bool ipmr_rule_default(const struct fib_rule *rule) +{ + return true; +} #endif struct vif_device { -- cgit v1.2.3 From 511d57dc71a22514e106f79a878e788cb22f73e3 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Wed, 30 Aug 2017 08:04:24 -0500 Subject: ipmi: Get the device id through a function This makes getting the device id consistent, and make it possible to add a function to fetch it dynamically later. Signed-off-by: Corey Minyard --- include/linux/ipmi.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index f1045b2c6a00..80fc3f798984 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -113,9 +113,9 @@ int ipmi_create_user(unsigned int if_num, int ipmi_destroy_user(ipmi_user_t user); /* Get the IPMI version of the BMC we are talking to. */ -void ipmi_get_version(ipmi_user_t user, - unsigned char *major, - unsigned char *minor); +int ipmi_get_version(ipmi_user_t user, + unsigned char *major, + unsigned char *minor); /* Set and get the slave address and LUN that we will use for our source messages. Note that this affects the interface, not just -- cgit v1.2.3 From c468f911b73beb39b20f7e5f97a35d41f038b31b Mon Sep 17 00:00:00 2001 From: Jeremy Kerr Date: Fri, 25 Aug 2017 15:47:23 +0800 Subject: ipmi: Make ipmi_demangle_device_id more generic Currently, ipmi_demagle_device_id requires a full response buffer in its data argument. This means we can't use it to parse a response in a struct ipmi_recv_msg, which has the netfn and cmd as separate bytes. This change alters the definition and users of ipmi_demangle_device_id to use a split netfn, cmd and data buffer, so it can be used with non-sequential responses. Signed-off-by: Jeremy Kerr Fixed the ipmi_ssif.c and ipmi_si_intf.c changes to use data from the response, not the data from the message, when passing info to the ipmi_demangle_device_id() function. Signed-off-by: Corey Minyard --- include/linux/ipmi_smi.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index f8cea14485dd..75542c837c07 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -162,27 +162,27 @@ struct ipmi_device_id { #define ipmi_version_major(v) ((v)->ipmi_version & 0xf) #define ipmi_version_minor(v) ((v)->ipmi_version >> 4) -/* Take a pointer to a raw data buffer and a length and extract device - id information from it. The first byte of data must point to the - netfn << 2, the data should be of the format: - netfn << 2, cmd, completion code, data - as normally comes from a device interface. */ -static inline int ipmi_demangle_device_id(const unsigned char *data, +/* Take a pointer to an IPMI response and extract device id information from + * it. @netfn is in the IPMI_NETFN_ format, so may need to be shifted from + * a SI response. + */ +static inline int ipmi_demangle_device_id(uint8_t netfn, uint8_t cmd, + const unsigned char *data, unsigned int data_len, struct ipmi_device_id *id) { - if (data_len < 9) + if (data_len < 7) return -EINVAL; - if (data[0] != IPMI_NETFN_APP_RESPONSE << 2 || - data[1] != IPMI_GET_DEVICE_ID_CMD) + if (netfn != IPMI_NETFN_APP_RESPONSE || cmd != IPMI_GET_DEVICE_ID_CMD) /* Strange, didn't get the response we expected. */ return -EINVAL; - if (data[2] != 0) + if (data[0] != 0) /* That's odd, it shouldn't be able to fail. */ return -EINVAL; - data += 3; - data_len -= 3; + data++; + data_len--; + id->device_id = data[0]; id->device_revision = data[1]; id->firmware_revision_1 = data[2]; -- cgit v1.2.3 From 1e5058ea21010883b1e1d288637f7390bb8d1c61 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Thu, 31 Aug 2017 16:45:40 -0500 Subject: ipmi: Remove the device id from ipmi_register_smi() It's no longer used, dynamic device id handling is in place now. Signed-off-by: Corey Minyard --- include/linux/ipmi_smi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 75542c837c07..97771e36b7f0 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -214,7 +214,6 @@ static inline int ipmi_demangle_device_id(uint8_t netfn, uint8_t cmd, call. */ int ipmi_register_smi(const struct ipmi_smi_handlers *handlers, void *send_info, - struct ipmi_device_id *device_id, struct device *dev, unsigned char slave_addr); -- cgit v1.2.3 From da61e7308df868c20a41861c0a11f6eefd822993 Mon Sep 17 00:00:00 2001 From: Corentin Labbe Date: Mon, 18 Sep 2017 20:10:36 +0200 Subject: sbus: char: Move D7S_MINOR to include/linux/miscdevice.h This patch move the define for D7S_MINOR to include/linux/miscdevice.h. It's better that all minor number are in the same place. Signed-off-by: Corentin Labbe Signed-off-by: David S. Miller --- include/linux/miscdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 58751eae5f77..05c3892b3e92 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -35,6 +35,7 @@ #define HWRNG_MINOR 183 #define MICROCODE_MINOR 184 #define IRNET_MINOR 187 +#define D7S_MINOR 193 #define VFIO_MINOR 196 #define TUN_MINOR 200 #define CUSE_MINOR 203 -- cgit v1.2.3 From 55f91cb6f1dfc873359674f35a8ffb1e78429d22 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Sat, 16 Sep 2017 15:51:25 -0500 Subject: ipmi: Make the IPMI proc interface configurable So we can remove it later. Signed-off-by: Corey Minyard --- include/linux/ipmi_smi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipmi_smi.h b/include/linux/ipmi_smi.h index 97771e36b7f0..5be51281e14d 100644 --- a/include/linux/ipmi_smi.h +++ b/include/linux/ipmi_smi.h @@ -241,11 +241,13 @@ static inline void ipmi_free_smi_msg(struct ipmi_smi_msg *msg) msg->done(msg); } +#ifdef CONFIG_IPMI_PROC_INTERFACE /* Allow the lower layer to add things to the proc filesystem directory for this interface. Note that the entry will automatically be dstroyed when the interface is destroyed. */ int ipmi_smi_add_proc_entry(ipmi_smi_t smi, char *name, const struct file_operations *proc_ops, void *data); +#endif #endif /* __LINUX_IPMI_SMI_H */ -- cgit v1.2.3 From 95e300c052fd9dbb05f289a912c138ed03320ec5 Mon Sep 17 00:00:00 2001 From: Corey Minyard Date: Mon, 18 Sep 2017 12:38:17 -0500 Subject: ipmi: Make the DMI probe into a generic platform probe Rework the DMI probe function to be a generic platform probe, and then rework the DMI code (and a few other things) to use the more generic information. This is so other things can declare platform IPMI devices. Signed-off-by: Corey Minyard --- include/linux/ipmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipmi.h b/include/linux/ipmi.h index 80fc3f798984..f4ffacf4fe9d 100644 --- a/include/linux/ipmi.h +++ b/include/linux/ipmi.h @@ -277,7 +277,7 @@ int ipmi_validate_addr(struct ipmi_addr *addr, int len); */ enum ipmi_addr_src { SI_INVALID = 0, SI_HOTMOD, SI_HARDCODED, SI_SPMI, SI_ACPI, SI_SMBIOS, - SI_PCI, SI_DEVICETREE, SI_LAST + SI_PCI, SI_DEVICETREE, SI_PLATFORM, SI_LAST }; const char *ipmi_addr_src_to_str(enum ipmi_addr_src src); -- cgit v1.2.3 From 6ade97da601f8af793f6c7a861af754d0f0b6767 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Tue, 26 Sep 2017 23:12:28 +0300 Subject: arp: make arp_hdr_len() return unsigned int Negative ARP header length are not a thing. Constify arguments while I'm at it. Space savings: add/remove: 0/0 grow/shrink: 0/1 up/down: 0/-3 (-3) function old new delta arpt_do_table 1163 1160 -3 Signed-off-by: Alexey Dobriyan Signed-off-by: David S. Miller --- include/linux/if_arp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_arp.h b/include/linux/if_arp.h index 3355efc89781..6756fea18b69 100644 --- a/include/linux/if_arp.h +++ b/include/linux/if_arp.h @@ -31,7 +31,7 @@ static inline struct arphdr *arp_hdr(const struct sk_buff *skb) return (struct arphdr *)skb_network_header(skb); } -static inline int arp_hdr_len(struct net_device *dev) +static inline unsigned int arp_hdr_len(const struct net_device *dev) { switch (dev->type) { #if IS_ENABLED(CONFIG_FIREWIRE_NET) -- cgit v1.2.3 From cb4d2b3f03d8eed90be3a194e5b54b734ec4bbe9 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:52 -0700 Subject: bpf: Add name, load_time, uid and map_ids to bpf_prog_info The patch adds name and load_time to struct bpf_prog_aux. They are also exported to bpf_prog_info. The bpf_prog's name is passed by userspace during BPF_PROG_LOAD. The kernel only stores the first (BPF_PROG_NAME_LEN - 1) bytes and the name stored in the kernel is always \0 terminated. The kernel will reject name that contains characters other than isalnum() and '_'. It will also reject name that is not null terminated. The existing 'user->uid' of the bpf_prog_aux is also exported to the bpf_prog_info as created_by_uid. The existing 'used_maps' of the bpf_prog_aux is exported to the newly added members 'nr_map_ids' and 'map_ids' of the bpf_prog_info. On the input, nr_map_ids tells how big the userspace's map_ids buffer is. On the output, nr_map_ids tells the exact user_map_cnt and it will only copy up to the userspace's map_ids buffer is allowed. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 2b672c50f160..33ccc474fb04 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -187,6 +187,8 @@ struct bpf_prog_aux { struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; + u64 load_time; /* ns since boottime */ + u8 name[BPF_OBJ_NAME_LEN]; union { struct work_struct work; struct rcu_head rcu; -- cgit v1.2.3 From ad5b177bd73f5107d97c36f56395c4281fb6f089 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Wed, 27 Sep 2017 14:37:53 -0700 Subject: bpf: Add map_name to bpf_map_info This patch allows userspace to specify a name for a map during BPF_MAP_CREATE. The map's name can later be exported to user space via BPF_OBJ_GET_INFO_BY_FD. Signed-off-by: Martin KaFai Lau Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 33ccc474fb04..252f4bc9eb25 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,6 +56,7 @@ struct bpf_map { struct work_struct work; atomic_t usercnt; struct bpf_map *inner_map_meta; + u8 name[BPF_OBJ_NAME_LEN]; }; /* function argument constraints */ -- cgit v1.2.3 From 1ea6c46a23f1213d1972bfae220db5c165e27bba Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 May 2017 15:59:54 +0200 Subject: sched/fair: Propagate an effective runnable_load_avg The load balancer uses runnable_load_avg as load indicator. For !cgroup this is: runnable_load_avg = \Sum se->avg.load_avg ; where se->on_rq That is, a direct sum of all runnable tasks on that runqueue. As opposed to load_avg, which is a sum of all tasks on the runqueue, which includes a blocked component. However, in the cgroup case, this comes apart since the group entities are always runnable, even if most of their constituent entities are blocked. Therefore introduce a runnable_weight which for task entities is the same as the regular weight, but for group entities is a fraction of the entity weight and represents the runnable part of the group runqueue. Then propagate this load through the PELT hierarchy to arrive at an effective runnable load avgerage -- which we should not confuse with the canonical runnable load average. Suggested-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 26a7df4e558c..bdd6ad6fcce1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -331,9 +331,11 @@ struct load_weight { struct sched_avg { u64 last_update_time; u64 load_sum; + u64 runnable_load_sum; u32 util_sum; u32 period_contrib; unsigned long load_avg; + unsigned long runnable_load_avg; unsigned long util_avg; }; @@ -376,6 +378,7 @@ struct sched_statistics { struct sched_entity { /* For load-balancing: */ struct load_weight load; + unsigned long runnable_weight; struct rb_node run_node; struct list_head group_node; unsigned int on_rq; -- cgit v1.2.3 From a3f5aa907340b5d7b54223ddbaa90410f168864d Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Fri, 14 Jul 2017 09:27:08 -0400 Subject: i40e: Enable VF to negotiate number of allocated queues Currently the PF allocates a default number of queues for each VF and cannot be changed. This patch enables the VF to request a different number of queues allocated to it. This patch also adds a new virtchnl op and capability flag to facilitate this negotiation. After the PF receives a request message, it will set a requested number of queues for that VF. Then when the VF resets, its VSI will get a new number of queues allocated to it. This is a best effort request and since we only allocate a guaranteed default number, if the VF tries to ask for more than the guaranteed number, there may not be enough in HW to accommodate it unless other queues for other VFs are freed. It should also be noted decreasing the number queues allocated to a VF to below the default will NOT enable the allocation of more than 32 VFs per PF and will not free queues guaranteed to each VF by default. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 2b038442c352..60e5d90cb18a 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -135,6 +135,7 @@ enum virtchnl_ops { VIRTCHNL_OP_SET_RSS_HENA = 26, VIRTCHNL_OP_ENABLE_VLAN_STRIPPING = 27, VIRTCHNL_OP_DISABLE_VLAN_STRIPPING = 28, + VIRTCHNL_OP_REQUEST_QUEUES = 29, }; /* This macro is used to generate a compilation error if a structure @@ -235,6 +236,7 @@ VIRTCHNL_CHECK_STRUCT_LEN(16, virtchnl_vsi_resource); #define VIRTCHNL_VF_OFFLOAD_RSS_AQ 0x00000008 #define VIRTCHNL_VF_OFFLOAD_RSS_REG 0x00000010 #define VIRTCHNL_VF_OFFLOAD_WB_ON_ITR 0x00000020 +#define VIRTCHNL_VF_OFFLOAD_REQ_QUEUES 0x00000040 #define VIRTCHNL_VF_OFFLOAD_VLAN 0x00010000 #define VIRTCHNL_VF_OFFLOAD_RX_POLLING 0x00020000 #define VIRTCHNL_VF_OFFLOAD_RSS_PCTYPE_V2 0x00040000 @@ -325,6 +327,21 @@ struct virtchnl_vsi_queue_config_info { struct virtchnl_queue_pair_info qpair[1]; }; +/* VIRTCHNL_OP_REQUEST_QUEUES + * VF sends this message to request the PF to allocate additional queues to + * this VF. Each VF gets a guaranteed number of queues on init but asking for + * additional queues must be negotiated. This is a best effort request as it + * is possible the PF does not have enough queues left to support the request. + * If the PF cannot support the number requested it will respond with the + * maximum number it is able to support; otherwise it will respond with the + * number requested. + */ + +/* VF resource request */ +struct virtchnl_vf_res_request { + u16 num_queue_pairs; +}; + VIRTCHNL_CHECK_STRUCT_LEN(72, virtchnl_vsi_queue_config_info); /* VIRTCHNL_OP_CONFIG_IRQ_MAP @@ -691,6 +708,9 @@ virtchnl_vc_validate_vf_msg(struct virtchnl_version_info *ver, u32 v_opcode, case VIRTCHNL_OP_ENABLE_VLAN_STRIPPING: case VIRTCHNL_OP_DISABLE_VLAN_STRIPPING: break; + case VIRTCHNL_OP_REQUEST_QUEUES: + valid_len = sizeof(struct virtchnl_vf_res_request); + break; /* These are always errors coming from the VF. */ case VIRTCHNL_OP_EVENT: case VIRTCHNL_OP_UNKNOWN: -- cgit v1.2.3 From 2f657a600409f1961d67642fe384a9d4be71d36a Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Fri, 29 Sep 2017 17:19:20 -0400 Subject: net: dsa: change dsa_ptr for a dsa_port With DSA, a master net device (CPU facing interface) has a dsa_ptr pointer to which hangs a dsa_switch_tree. This is not correct because a master interface is wired to a dedicated switch port, and because we can theoretically have several master interfaces pointing to several CPU ports of the same switch fabric. Change the master interface's dsa_ptr for the CPU dsa_port pointer. This is a step towards supporting multiple CPU ports. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index f535779d9dc1..e1d6ef130611 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -55,7 +55,7 @@ struct netpoll_info; struct device; struct phy_device; -struct dsa_switch_tree; +struct dsa_port; /* 802.11 specific */ struct wireless_dev; @@ -1752,7 +1752,7 @@ struct net_device { struct vlan_info __rcu *vlan_info; #endif #if IS_ENABLED(CONFIG_NET_DSA) - struct dsa_switch_tree *dsa_ptr; + struct dsa_port *dsa_ptr; #endif #if IS_ENABLED(CONFIG_TIPC) struct tipc_bearer __rcu *tipc_ptr; -- cgit v1.2.3 From 19fe5f643f89f29c1a16bc474d91506b0e9a6232 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 1 Oct 2017 17:55:54 -0400 Subject: iomap: Switch from blkno to disk offset Replace iomap->blkno, the sector number, with iomap->addr, the disk offset in bytes. For invalid disk offsets, use the special value IOMAP_NULL_ADDR instead of IOMAP_NULL_BLOCK. This allows to use iomap for mappings which are not block aligned, such as inline data on ext4. Signed-off-by: Andreas Gruenbacher Signed-off-by: Theodore Ts'o Reviewed-by: Darrick J. Wong # iomap, xfs Reviewed-by: Jan Kara --- include/linux/iomap.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index f64dc6ce5161..7b8a615fa021 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -15,8 +15,8 @@ struct vm_fault; */ #define IOMAP_HOLE 0x01 /* no blocks allocated, need allocation */ #define IOMAP_DELALLOC 0x02 /* delayed allocation blocks */ -#define IOMAP_MAPPED 0x03 /* blocks allocated @blkno */ -#define IOMAP_UNWRITTEN 0x04 /* blocks allocated @blkno in unwritten state */ +#define IOMAP_MAPPED 0x03 /* blocks allocated at @addr */ +#define IOMAP_UNWRITTEN 0x04 /* blocks allocated at @addr in unwritten state */ /* * Flags for all iomap mappings: @@ -30,12 +30,12 @@ struct vm_fault; #define IOMAP_F_SHARED 0x20 /* block shared with another file */ /* - * Magic value for blkno: + * Magic value for addr: */ -#define IOMAP_NULL_BLOCK -1LL /* blkno is not valid */ +#define IOMAP_NULL_ADDR -1ULL /* addr is not valid */ struct iomap { - sector_t blkno; /* 1st sector of mapping, 512b units */ + u64 addr; /* disk offset of mapping, bytes */ loff_t offset; /* file offset of mapping, bytes */ u64 length; /* length of mapping, bytes */ u16 type; /* type of mapping */ -- cgit v1.2.3 From 9ca250a5137f3df7ffac58b49660beadd5926ace Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 1 Oct 2017 17:56:54 -0400 Subject: iomap: Add IOMAP_F_DATA_INLINE flag Add a new IOMAP_F_DATA_INLINE flag to indicate that a mapping is in a disk area that contains data as well as metadata. In iomap_fiemap, map this flag to FIEMAP_EXTENT_DATA_INLINE. Signed-off-by: Andreas Gruenbacher Signed-off-by: Theodore Ts'o Reviewed-by: Jan Kara --- include/linux/iomap.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 7b8a615fa021..2b0790dbd6ea 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -26,8 +26,9 @@ struct vm_fault; /* * Flags that only need to be reported for IOMAP_REPORT requests: */ -#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */ -#define IOMAP_F_SHARED 0x20 /* block shared with another file */ +#define IOMAP_F_MERGED 0x10 /* contains multiple blocks/extents */ +#define IOMAP_F_SHARED 0x20 /* block shared with another file */ +#define IOMAP_F_DATA_INLINE 0x40 /* data inline in the inode */ /* * Magic value for addr: -- cgit v1.2.3 From 4f210c2938053e894ee00500dd0661f7a27a6ff4 Mon Sep 17 00:00:00 2001 From: Jaejoong Kim Date: Wed, 20 Sep 2017 18:40:36 +0900 Subject: HID: add comment for power callback in struct hid_ll_driver There is a missing comment in struct hid_ll_driver. So, add it. Signed-off-by: Jaejoong Kim Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index ab05a86269dc..7c3d4a17bbde 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -753,6 +753,7 @@ struct hid_driver { * @stop: called on remove * @open: called by input layer on open * @close: called by input layer on close + * @power: request underlying hardware to enter requested power mode * @parse: this method is called only once to parse the device data, * shouldn't allocate anything to not leak memory * @request: send report request to device (e.g. feature report) -- cgit v1.2.3 From 66b1bedf662518e9b6367990a87e9601b35a94c1 Mon Sep 17 00:00:00 2001 From: Avraham Stern Date: Fri, 29 Sep 2017 14:21:14 +0300 Subject: ieee80211: Add WFA TPC report element OUI type Add Transmit Power Control OUI type definition for WLAN_OUI_MICROSOFT. Signed-off-by: Avraham Stern Signed-off-by: Luca Coelho Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 55a604ad459f..ee6657a0ed69 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2445,6 +2445,7 @@ enum ieee80211_sa_query_action { #define WLAN_OUI_TYPE_MICROSOFT_WPA 1 #define WLAN_OUI_TYPE_MICROSOFT_WMM 2 #define WLAN_OUI_TYPE_MICROSOFT_WPS 4 +#define WLAN_OUI_TYPE_MICROSOFT_TPC 8 /* * WMM/802.11e Tspec Element -- cgit v1.2.3 From f2f2efb807d339513199b1bb771806c90cce83ae Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:28 +0300 Subject: byteorder: Move {cpu_to_be32, be32_to_cpu}_array() from Thunderbolt to core We will be using these when communicating XDomain discovery protocol over Thunderbolt link but they might be useful for other drivers as well. Make them available through byteorder/generic.h. Suggested-by: Andy Shevchenko Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/byteorder/generic.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/byteorder/generic.h b/include/linux/byteorder/generic.h index 89f67c1c3160..805d16654459 100644 --- a/include/linux/byteorder/generic.h +++ b/include/linux/byteorder/generic.h @@ -170,4 +170,20 @@ static inline void be64_add_cpu(__be64 *var, u64 val) *var = cpu_to_be64(be64_to_cpu(*var) + val); } +static inline void cpu_to_be32_array(__be32 *dst, const u32 *src, size_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] = cpu_to_be32(src[i]); +} + +static inline void be32_to_cpu_array(u32 *dst, const __be32 *src, size_t len) +{ + int i; + + for (i = 0; i < len; i++) + dst[i] = be32_to_cpu(src[i]); +} + #endif /* _LINUX_BYTEORDER_GENERIC_H */ -- cgit v1.2.3 From cdae7c07e3e3509eaabc18c1640a55dc5b99c179 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:30 +0300 Subject: thunderbolt: Add support for XDomain properties Thunderbolt XDomain discovery protocol uses directories which contain properties and other directories to exchange information about what capabilities the remote host supports. This also includes identification information like device ID and name. This adds support for parsing and formatting these properties and establishes an API drivers can use in addition to the core Thunderbolt driver. This API is exposed in a new header: include/linux/thunderbolt.h. This code is based on the work done by Amir Levy and Michael Jamet. Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 89 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 89 insertions(+) create mode 100644 include/linux/thunderbolt.h (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h new file mode 100644 index 000000000000..96561c1265ae --- /dev/null +++ b/include/linux/thunderbolt.h @@ -0,0 +1,89 @@ +/* + * Thunderbolt service API + * + * Copyright (C) 2017, Intel Corporation + * Authors: Michael Jamet + * Mika Westerberg + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef THUNDERBOLT_H_ +#define THUNDERBOLT_H_ + +#include +#include + +/** + * struct tb_property_dir - XDomain property directory + * @uuid: Directory UUID or %NULL if root directory + * @properties: List of properties in this directory + * + * User needs to provide serialization if needed. + */ +struct tb_property_dir { + const uuid_t *uuid; + struct list_head properties; +}; + +enum tb_property_type { + TB_PROPERTY_TYPE_UNKNOWN = 0x00, + TB_PROPERTY_TYPE_DIRECTORY = 0x44, + TB_PROPERTY_TYPE_DATA = 0x64, + TB_PROPERTY_TYPE_TEXT = 0x74, + TB_PROPERTY_TYPE_VALUE = 0x76, +}; + +#define TB_PROPERTY_KEY_SIZE 8 + +/** + * struct tb_property - XDomain property + * @list: Used to link properties together in a directory + * @key: Key for the property (always terminated). + * @type: Type of the property + * @length: Length of the property data in dwords + * @value: Property value + * + * Users use @type to determine which field in @value is filled. + */ +struct tb_property { + struct list_head list; + char key[TB_PROPERTY_KEY_SIZE + 1]; + enum tb_property_type type; + size_t length; + union { + struct tb_property_dir *dir; + u8 *data; + char *text; + u32 immediate; + } value; +}; + +struct tb_property_dir *tb_property_parse_dir(const u32 *block, + size_t block_len); +ssize_t tb_property_format_dir(const struct tb_property_dir *dir, u32 *block, + size_t block_len); +struct tb_property_dir *tb_property_create_dir(const uuid_t *uuid); +void tb_property_free_dir(struct tb_property_dir *dir); +int tb_property_add_immediate(struct tb_property_dir *parent, const char *key, + u32 value); +int tb_property_add_data(struct tb_property_dir *parent, const char *key, + const void *buf, size_t buflen); +int tb_property_add_text(struct tb_property_dir *parent, const char *key, + const char *text); +int tb_property_add_dir(struct tb_property_dir *parent, const char *key, + struct tb_property_dir *dir); +void tb_property_remove(struct tb_property *tb_property); +struct tb_property *tb_property_find(struct tb_property_dir *dir, + const char *key, enum tb_property_type type); +struct tb_property *tb_property_get_next(struct tb_property_dir *dir, + struct tb_property *prev); + +#define tb_property_for_each(dir, property) \ + for (property = tb_property_get_next(dir, NULL); \ + property; \ + property = tb_property_get_next(dir, property)) + +#endif /* THUNDERBOLT_H_ */ -- cgit v1.2.3 From eaf8ff35a345449207ad116e2574c19780ec9a98 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:31 +0300 Subject: thunderbolt: Move enum tb_cfg_pkg_type to thunderbolt.h These will be needed by Thunderbolt services when sending and receiving XDomain control messages. While there change TB_CFG_PKG_PREPARE_TO_SLEEP value to be decimal in order to be consistent with other members. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 96561c1265ae..b512b1e2b4f2 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -1,6 +1,7 @@ /* * Thunderbolt service API * + * Copyright (C) 2014 Andreas Noever * Copyright (C) 2017, Intel Corporation * Authors: Michael Jamet * Mika Westerberg @@ -16,6 +17,22 @@ #include #include +enum tb_cfg_pkg_type { + TB_CFG_PKG_READ = 1, + TB_CFG_PKG_WRITE = 2, + TB_CFG_PKG_ERROR = 3, + TB_CFG_PKG_NOTIFY_ACK = 4, + TB_CFG_PKG_EVENT = 5, + TB_CFG_PKG_XDOMAIN_REQ = 6, + TB_CFG_PKG_XDOMAIN_RESP = 7, + TB_CFG_PKG_OVERRIDE = 8, + TB_CFG_PKG_RESET = 9, + TB_CFG_PKG_ICM_EVENT = 10, + TB_CFG_PKG_ICM_CMD = 11, + TB_CFG_PKG_ICM_RESP = 12, + TB_CFG_PKG_PREPARE_TO_SLEEP = 13, +}; + /** * struct tb_property_dir - XDomain property directory * @uuid: Directory UUID or %NULL if root directory -- cgit v1.2.3 From 9e99b9f4d5c36340dabda6d14053195b2a43796b Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:32 +0300 Subject: thunderbolt: Move thunderbolt domain structure to thunderbolt.h These are needed by Thunderbolt services so move them to thunderbolt.h to make sure they are available outside of drivers/thunderbolt. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index b512b1e2b4f2..910b1bf92112 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -14,7 +14,9 @@ #ifndef THUNDERBOLT_H_ #define THUNDERBOLT_H_ +#include #include +#include #include enum tb_cfg_pkg_type { @@ -33,6 +35,49 @@ enum tb_cfg_pkg_type { TB_CFG_PKG_PREPARE_TO_SLEEP = 13, }; +/** + * enum tb_security_level - Thunderbolt security level + * @TB_SECURITY_NONE: No security, legacy mode + * @TB_SECURITY_USER: User approval required at minimum + * @TB_SECURITY_SECURE: One time saved key required at minimum + * @TB_SECURITY_DPONLY: Only tunnel Display port (and USB) + */ +enum tb_security_level { + TB_SECURITY_NONE, + TB_SECURITY_USER, + TB_SECURITY_SECURE, + TB_SECURITY_DPONLY, +}; + +/** + * struct tb - main thunderbolt bus structure + * @dev: Domain device + * @lock: Big lock. Must be held when accessing any struct + * tb_switch / struct tb_port. + * @nhi: Pointer to the NHI structure + * @ctl: Control channel for this domain + * @wq: Ordered workqueue for all domain specific work + * @root_switch: Root switch of this domain + * @cm_ops: Connection manager specific operations vector + * @index: Linux assigned domain number + * @security_level: Current security level + * @privdata: Private connection manager specific data + */ +struct tb { + struct device dev; + struct mutex lock; + struct tb_nhi *nhi; + struct tb_ctl *ctl; + struct workqueue_struct *wq; + struct tb_switch *root_switch; + const struct tb_cm_ops *cm_ops; + int index; + enum tb_security_level security_level; + unsigned long privdata[0]; +}; + +extern struct bus_type tb_bus_type; + /** * struct tb_property_dir - XDomain property directory * @uuid: Directory UUID or %NULL if root directory -- cgit v1.2.3 From e69b71f8458b78a2ef44e3d07374a8f46e45123d Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:33 +0300 Subject: thunderbolt: Move tb_switch_phy_port_from_link() to thunderbolt.h A Thunderbolt service might need to find the physical port from a link the cable is connected to. For instance networking driver uses this information to generate MAC address according the Apple ThunderboltIP protocol. Move this function to thunderbolt.h and rename it to tb_phy_port_from_link() to reflect the fact that it does not take switch as parameter. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 910b1bf92112..43b8d1e09341 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -78,6 +78,13 @@ struct tb { extern struct bus_type tb_bus_type; +#define TB_LINKS_PER_PHY_PORT 2 + +static inline unsigned int tb_phy_port_from_link(unsigned int link) +{ + return (link - 1) / TB_LINKS_PER_PHY_PORT; +} + /** * struct tb_property_dir - XDomain property directory * @uuid: Directory UUID or %NULL if root directory -- cgit v1.2.3 From d1ff70241a275133e1a0258b7c23588b122276c8 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:34 +0300 Subject: thunderbolt: Add support for XDomain discovery protocol When two hosts are connected over a Thunderbolt cable, there is a protocol they can use to communicate capabilities supported by the host. The discovery protocol uses automatically configured control channel (ring 0) and is build on top of request/response transactions using special XDomain primitives provided by the Thunderbolt base protocol. The capabilities consists of a root directory block of basic properties used for identification of the host, and then there can be zero or more directories each describing a Thunderbolt service and its capabilities. Once both sides have discovered what is supported the two hosts can setup high-speed DMA paths and transfer data to the other side using whatever protocol was agreed based on the properties. The software protocol used to communicate which DMA paths to enable is service specific. This patch adds support for the XDomain discovery protocol to the Thunderbolt bus. We model each remote host connection as a Linux XDomain device. For each Thunderbolt service found supported on the XDomain device, we create Linux Thunderbolt service device which Thunderbolt service drivers can then bind to based on the protocol identification information retrieved from the property directory describing the service. This code is based on the work done by Amir Levy and Michael Jamet. Signed-off-by: Michael Jamet Signed-off-by: Mika Westerberg Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/mod_devicetable.h | 26 +++++ include/linux/thunderbolt.h | 242 ++++++++++++++++++++++++++++++++++++++++ 2 files changed, 268 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 694cebb50f72..7625c3b81f84 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -683,5 +683,31 @@ struct fsl_mc_device_id { const char obj_type[16]; }; +/** + * struct tb_service_id - Thunderbolt service identifiers + * @match_flags: Flags used to match the structure + * @protocol_key: Protocol key the service supports + * @protocol_id: Protocol id the service supports + * @protocol_version: Version of the protocol + * @protocol_revision: Revision of the protocol software + * @driver_data: Driver specific data + * + * Thunderbolt XDomain services are exposed as devices where each device + * carries the protocol information the service supports. Thunderbolt + * XDomain service drivers match against that information. + */ +struct tb_service_id { + __u32 match_flags; + char protocol_key[8 + 1]; + __u32 protocol_id; + __u32 protocol_version; + __u32 protocol_revision; + kernel_ulong_t driver_data; +}; + +#define TBSVC_MATCH_PROTOCOL_KEY 0x0001 +#define TBSVC_MATCH_PROTOCOL_ID 0x0002 +#define TBSVC_MATCH_PROTOCOL_VERSION 0x0004 +#define TBSVC_MATCH_PROTOCOL_REVISION 0x0008 #endif /* LINUX_MOD_DEVICETABLE_H */ diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 43b8d1e09341..18c0e3d5e85c 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -17,6 +17,7 @@ #include #include #include +#include #include enum tb_cfg_pkg_type { @@ -77,6 +78,8 @@ struct tb { }; extern struct bus_type tb_bus_type; +extern struct device_type tb_service_type; +extern struct device_type tb_xdomain_type; #define TB_LINKS_PER_PHY_PORT 2 @@ -155,4 +158,243 @@ struct tb_property *tb_property_get_next(struct tb_property_dir *dir, property; \ property = tb_property_get_next(dir, property)) +int tb_register_property_dir(const char *key, struct tb_property_dir *dir); +void tb_unregister_property_dir(const char *key, struct tb_property_dir *dir); + +/** + * struct tb_xdomain - Cross-domain (XDomain) connection + * @dev: XDomain device + * @tb: Pointer to the domain + * @remote_uuid: UUID of the remote domain (host) + * @local_uuid: Cached local UUID + * @route: Route string the other domain can be reached + * @vendor: Vendor ID of the remote domain + * @device: Device ID of the demote domain + * @lock: Lock to serialize access to the following fields of this structure + * @vendor_name: Name of the vendor (or %NULL if not known) + * @device_name: Name of the device (or %NULL if not known) + * @is_unplugged: The XDomain is unplugged + * @resume: The XDomain is being resumed + * @transmit_path: HopID which the remote end expects us to transmit + * @transmit_ring: Local ring (hop) where outgoing packets are pushed + * @receive_path: HopID which we expect the remote end to transmit + * @receive_ring: Local ring (hop) where incoming packets arrive + * @service_ids: Used to generate IDs for the services + * @properties: Properties exported by the remote domain + * @property_block_gen: Generation of @properties + * @properties_lock: Lock protecting @properties. + * @get_properties_work: Work used to get remote domain properties + * @properties_retries: Number of times left to read properties + * @properties_changed_work: Work used to notify the remote domain that + * our properties have changed + * @properties_changed_retries: Number of times left to send properties + * changed notification + * @link: Root switch link the remote domain is connected (ICM only) + * @depth: Depth in the chain the remote domain is connected (ICM only) + * + * This structure represents connection across two domains (hosts). + * Each XDomain contains zero or more services which are exposed as + * &struct tb_service objects. + * + * Service drivers may access this structure if they need to enumerate + * non-standard properties but they need hold @lock when doing so + * because properties can be changed asynchronously in response to + * changes in the remote domain. + */ +struct tb_xdomain { + struct device dev; + struct tb *tb; + uuid_t *remote_uuid; + const uuid_t *local_uuid; + u64 route; + u16 vendor; + u16 device; + struct mutex lock; + const char *vendor_name; + const char *device_name; + bool is_unplugged; + bool resume; + u16 transmit_path; + u16 transmit_ring; + u16 receive_path; + u16 receive_ring; + struct ida service_ids; + struct tb_property_dir *properties; + u32 property_block_gen; + struct delayed_work get_properties_work; + int properties_retries; + struct delayed_work properties_changed_work; + int properties_changed_retries; + u8 link; + u8 depth; +}; + +int tb_xdomain_enable_paths(struct tb_xdomain *xd, u16 transmit_path, + u16 transmit_ring, u16 receive_path, + u16 receive_ring); +int tb_xdomain_disable_paths(struct tb_xdomain *xd); +struct tb_xdomain *tb_xdomain_find_by_uuid(struct tb *tb, const uuid_t *uuid); + +static inline struct tb_xdomain * +tb_xdomain_find_by_uuid_locked(struct tb *tb, const uuid_t *uuid) +{ + struct tb_xdomain *xd; + + mutex_lock(&tb->lock); + xd = tb_xdomain_find_by_uuid(tb, uuid); + mutex_unlock(&tb->lock); + + return xd; +} + +static inline struct tb_xdomain *tb_xdomain_get(struct tb_xdomain *xd) +{ + if (xd) + get_device(&xd->dev); + return xd; +} + +static inline void tb_xdomain_put(struct tb_xdomain *xd) +{ + if (xd) + put_device(&xd->dev); +} + +static inline bool tb_is_xdomain(const struct device *dev) +{ + return dev->type == &tb_xdomain_type; +} + +static inline struct tb_xdomain *tb_to_xdomain(struct device *dev) +{ + if (tb_is_xdomain(dev)) + return container_of(dev, struct tb_xdomain, dev); + return NULL; +} + +int tb_xdomain_response(struct tb_xdomain *xd, const void *response, + size_t size, enum tb_cfg_pkg_type type); +int tb_xdomain_request(struct tb_xdomain *xd, const void *request, + size_t request_size, enum tb_cfg_pkg_type request_type, + void *response, size_t response_size, + enum tb_cfg_pkg_type response_type, + unsigned int timeout_msec); + +/** + * tb_protocol_handler - Protocol specific handler + * @uuid: XDomain messages with this UUID are dispatched to this handler + * @callback: Callback called with the XDomain message. Returning %1 + * here tells the XDomain core that the message was handled + * by this handler and should not be forwared to other + * handlers. + * @data: Data passed with the callback + * @list: Handlers are linked using this + * + * Thunderbolt services can hook into incoming XDomain requests by + * registering protocol handler. Only limitation is that the XDomain + * discovery protocol UUID cannot be registered since it is handled by + * the core XDomain code. + * + * The @callback must check that the message is really directed to the + * service the driver implements. + */ +struct tb_protocol_handler { + const uuid_t *uuid; + int (*callback)(const void *buf, size_t size, void *data); + void *data; + struct list_head list; +}; + +int tb_register_protocol_handler(struct tb_protocol_handler *handler); +void tb_unregister_protocol_handler(struct tb_protocol_handler *handler); + +/** + * struct tb_service - Thunderbolt service + * @dev: XDomain device + * @id: ID of the service (shown in sysfs) + * @key: Protocol key from the properties directory + * @prtcid: Protocol ID from the properties directory + * @prtcvers: Protocol version from the properties directory + * @prtcrevs: Protocol software revision from the properties directory + * @prtcstns: Protocol settings mask from the properties directory + * + * Each domain exposes set of services it supports as collection of + * properties. For each service there will be one corresponding + * &struct tb_service. Service drivers are bound to these. + */ +struct tb_service { + struct device dev; + int id; + const char *key; + u32 prtcid; + u32 prtcvers; + u32 prtcrevs; + u32 prtcstns; +}; + +static inline struct tb_service *tb_service_get(struct tb_service *svc) +{ + if (svc) + get_device(&svc->dev); + return svc; +} + +static inline void tb_service_put(struct tb_service *svc) +{ + if (svc) + put_device(&svc->dev); +} + +static inline bool tb_is_service(const struct device *dev) +{ + return dev->type == &tb_service_type; +} + +static inline struct tb_service *tb_to_service(struct device *dev) +{ + if (tb_is_service(dev)) + return container_of(dev, struct tb_service, dev); + return NULL; +} + +/** + * tb_service_driver - Thunderbolt service driver + * @driver: Driver structure + * @probe: Called when the driver is probed + * @remove: Called when the driver is removed (optional) + * @shutdown: Called at shutdown time to stop the service (optional) + * @id_table: Table of service identifiers the driver supports + */ +struct tb_service_driver { + struct device_driver driver; + int (*probe)(struct tb_service *svc, const struct tb_service_id *id); + void (*remove)(struct tb_service *svc); + void (*shutdown)(struct tb_service *svc); + const struct tb_service_id *id_table; +}; + +#define TB_SERVICE(key, id) \ + .match_flags = TBSVC_MATCH_PROTOCOL_KEY | \ + TBSVC_MATCH_PROTOCOL_ID, \ + .protocol_key = (key), \ + .protocol_id = (id) + +int tb_register_service_driver(struct tb_service_driver *drv); +void tb_unregister_service_driver(struct tb_service_driver *drv); + +static inline void *tb_service_get_drvdata(const struct tb_service *svc) +{ + return dev_get_drvdata(&svc->dev); +} + +static inline void tb_service_set_drvdata(struct tb_service *svc, void *data) +{ + dev_set_drvdata(&svc->dev, data); +} + +static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc) +{ + return tb_to_xdomain(svc->dev.parent); +} + #endif /* THUNDERBOLT_H_ */ -- cgit v1.2.3 From 3b3d9f4da96493e4f68d0a80ab210763a24f8b33 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:37 +0300 Subject: thunderbolt: Export ring handling functions to modules These are used by Thunderbolt services to send and receive frames over the high-speed DMA rings. We also put the functions to tb_ namespace to make sure we do not collide with others and add missing kernel-doc comments for the exported functions. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 158 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 158 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 18c0e3d5e85c..9ddb83ad890f 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -15,10 +15,12 @@ #define THUNDERBOLT_H_ #include +#include #include #include #include #include +#include enum tb_cfg_pkg_type { TB_CFG_PKG_READ = 1, @@ -397,4 +399,160 @@ static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc) return tb_to_xdomain(svc->dev.parent); } +/** + * struct tb_nhi - thunderbolt native host interface + * @lock: Must be held during ring creation/destruction. Is acquired by + * interrupt_work when dispatching interrupts to individual rings. + * @pdev: Pointer to the PCI device + * @iobase: MMIO space of the NHI + * @tx_rings: All Tx rings available on this host controller + * @rx_rings: All Rx rings available on this host controller + * @msix_ida: Used to allocate MSI-X vectors for rings + * @going_away: The host controller device is about to disappear so when + * this flag is set, avoid touching the hardware anymore. + * @interrupt_work: Work scheduled to handle ring interrupt when no + * MSI-X is used. + * @hop_count: Number of rings (end point hops) supported by NHI. + */ +struct tb_nhi { + struct mutex lock; + struct pci_dev *pdev; + void __iomem *iobase; + struct tb_ring **tx_rings; + struct tb_ring **rx_rings; + struct ida msix_ida; + bool going_away; + struct work_struct interrupt_work; + u32 hop_count; +}; + +/** + * struct tb_ring - thunderbolt TX or RX ring associated with a NHI + * @lock: Lock serializing actions to this ring. Must be acquired after + * nhi->lock. + * @nhi: Pointer to the native host controller interface + * @size: Size of the ring + * @hop: Hop (DMA channel) associated with this ring + * @head: Head of the ring (write next descriptor here) + * @tail: Tail of the ring (complete next descriptor here) + * @descriptors: Allocated descriptors for this ring + * @queue: Queue holding frames to be transferred over this ring + * @in_flight: Queue holding frames that are currently in flight + * @work: Interrupt work structure + * @is_tx: Is the ring Tx or Rx + * @running: Is the ring running + * @irq: MSI-X irq number if the ring uses MSI-X. %0 otherwise. + * @vector: MSI-X vector number the ring uses (only set if @irq is > 0) + * @flags: Ring specific flags + * @sof_mask: Bit mask used to detect start of frame PDF + * @eof_mask: Bit mask used to detect end of frame PDF + */ +struct tb_ring { + struct mutex lock; + struct tb_nhi *nhi; + int size; + int hop; + int head; + int tail; + struct ring_desc *descriptors; + dma_addr_t descriptors_dma; + struct list_head queue; + struct list_head in_flight; + struct work_struct work; + bool is_tx:1; + bool running:1; + int irq; + u8 vector; + unsigned int flags; + u16 sof_mask; + u16 eof_mask; +}; + +/* Leave ring interrupt enabled on suspend */ +#define RING_FLAG_NO_SUSPEND BIT(0) +/* Configure the ring to be in frame mode */ +#define RING_FLAG_FRAME BIT(1) +/* Enable end-to-end flow control */ +#define RING_FLAG_E2E BIT(2) + +struct ring_frame; +typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); + +/** + * struct ring_frame - For use with ring_rx/ring_tx + * @buffer_phy: DMA mapped address of the frame + * @callback: Callback called when the frame is finished + * @list: Frame is linked to a queue using this + * @size: Size of the frame in bytes (%0 means %4096) + * @flags: Flags for the frame (see &enum ring_desc_flags) + * @eof: End of frame protocol defined field + * @sof: Start of frame protocol defined field + */ +struct ring_frame { + dma_addr_t buffer_phy; + ring_cb callback; + struct list_head list; + u32 size:12; + u32 flags:12; + u32 eof:4; + u32 sof:4; +}; + +/* Minimum size for ring_rx */ +#define TB_FRAME_SIZE 0x100 + +struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, + unsigned int flags); +struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, + unsigned int flags, u16 sof_mask, + u16 eof_mask); +void tb_ring_start(struct tb_ring *ring); +void tb_ring_stop(struct tb_ring *ring); +void tb_ring_free(struct tb_ring *ring); + +int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); + +/** + * tb_ring_rx() - enqueue a frame on an RX ring + * @ring: Ring to enqueue the frame + * @frame: Frame to enqueue + * + * @frame->buffer, @frame->buffer_phy and @frame->callback have to be set. The + * buffer must contain at least %TB_FRAME_SIZE bytes. + * + * @frame->callback will be invoked with @frame->size, @frame->flags, + * @frame->eof, @frame->sof set once the frame has been received. + * + * If ring_stop() is called after the packet has been enqueued + * @frame->callback will be called with canceled set to true. + * + * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + */ +static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) +{ + WARN_ON(ring->is_tx); + return __tb_ring_enqueue(ring, frame); +} + +/** + * tb_ring_tx() - enqueue a frame on an TX ring + * @ring: Ring the enqueue the frame + * @frame: Frame to enqueue + * + * @frame->buffer, @frame->buffer_phy, @frame->callback, @frame->size, + * @frame->eof and @frame->sof have to be set. + * + * @frame->callback will be invoked with once the frame has been transmitted. + * + * If ring_stop() is called after the packet has been enqueued @frame->callback + * will be called with canceled set to true. + * + * Return: Returns %-ESHUTDOWN if ring_stop has been called. Zero otherwise. + */ +static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) +{ + WARN_ON(!ring->is_tx); + return __tb_ring_enqueue(ring, frame); +} + #endif /* THUNDERBOLT_H_ */ -- cgit v1.2.3 From 2a91ec63f8a11e70d4b958dd4df867fec0247179 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:38 +0300 Subject: thunderbolt: Move ring descriptor flags to thunderbolt.h A Thunderbolt service driver might need to check if there was an error with the descriptor when in frame mode. We also add two Rx specific error flags RING_DESC_CRC_ERROR and RING_DESC_BUFFER_OVERRUN. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 9ddb83ad890f..e3b9af7be0ad 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -478,6 +478,24 @@ struct tb_ring { struct ring_frame; typedef void (*ring_cb)(struct tb_ring *, struct ring_frame *, bool canceled); +/** + * enum ring_desc_flags - Flags for DMA ring descriptor + * %RING_DESC_ISOCH: Enable isonchronous DMA (Tx only) + * %RING_DESC_CRC_ERROR: In frame mode CRC check failed for the frame (Rx only) + * %RING_DESC_COMPLETED: Descriptor completed (set by NHI) + * %RING_DESC_POSTED: Always set this + * %RING_DESC_BUFFER_OVERRUN: RX buffer overrun + * %RING_DESC_INTERRUPT: Request an interrupt on completion + */ +enum ring_desc_flags { + RING_DESC_ISOCH = 0x1, + RING_DESC_CRC_ERROR = 0x1, + RING_DESC_COMPLETED = 0x2, + RING_DESC_POSTED = 0x4, + RING_DESC_BUFFER_OVERRUN = 0x04, + RING_DESC_INTERRUPT = 0x8, +}; + /** * struct ring_frame - For use with ring_rx/ring_tx * @buffer_phy: DMA mapped address of the frame -- cgit v1.2.3 From 22b7de1000e66d739c431d6be4e7e97c69fa7c98 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:39 +0300 Subject: thunderbolt: Use spinlock in ring serialization This makes it possible to enqueue frames also from atomic context which is needed for example, when networking packets are sent over a Thunderbolt cable. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index e3b9af7be0ad..cf9e42db780f 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -448,7 +448,7 @@ struct tb_nhi { * @eof_mask: Bit mask used to detect end of frame PDF */ struct tb_ring { - struct mutex lock; + spinlock_t lock; struct tb_nhi *nhi; int size; int hop; -- cgit v1.2.3 From 59120e06101db72442acf4c8b364a0c76d8faa68 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:40 +0300 Subject: thunderbolt: Use spinlock in NHI serialization This is needed because ring polling functionality can be called from atomic contexts when networking and other high-speed traffic is transferred over a Thunderbolt cable. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index cf9e42db780f..d59e3f9a35c4 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -415,7 +415,7 @@ static inline struct tb_xdomain *tb_service_parent(struct tb_service *svc) * @hop_count: Number of rings (end point hops) supported by NHI. */ struct tb_nhi { - struct mutex lock; + spinlock_t lock; struct pci_dev *pdev; void __iomem *iobase; struct tb_ring **tx_rings; -- cgit v1.2.3 From 4ffe722eefcb07c76701f03e0d759fbaecedf79f Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:41 +0300 Subject: thunderbolt: Add polling mode for rings In order to support things like networking over Thunderbolt cable, there needs to be a way to switch the ring to a mode where it can be polled with the interrupt masked. We implement such mode so that the caller can allocate a ring by passing pointer to a function that is then called when an interrupt is triggered. Completed frames can be fetched using tb_ring_poll() and the interrupt can be re-enabled when the caller is finished with polling by using tb_ring_poll_complete(). Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 23 ++++++++++++++++------- 1 file changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index d59e3f9a35c4..36925e3aec7c 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -446,6 +446,9 @@ struct tb_nhi { * @flags: Ring specific flags * @sof_mask: Bit mask used to detect start of frame PDF * @eof_mask: Bit mask used to detect end of frame PDF + * @start_poll: Called when ring interrupt is triggered to start + * polling. Passing %NULL keeps the ring in interrupt mode. + * @poll_data: Data passed to @start_poll */ struct tb_ring { spinlock_t lock; @@ -466,6 +469,8 @@ struct tb_ring { unsigned int flags; u16 sof_mask; u16 eof_mask; + void (*start_poll)(void *data); + void *poll_data; }; /* Leave ring interrupt enabled on suspend */ @@ -499,7 +504,7 @@ enum ring_desc_flags { /** * struct ring_frame - For use with ring_rx/ring_tx * @buffer_phy: DMA mapped address of the frame - * @callback: Callback called when the frame is finished + * @callback: Callback called when the frame is finished (optional) * @list: Frame is linked to a queue using this * @size: Size of the frame in bytes (%0 means %4096) * @flags: Flags for the frame (see &enum ring_desc_flags) @@ -522,8 +527,8 @@ struct ring_frame { struct tb_ring *tb_ring_alloc_tx(struct tb_nhi *nhi, int hop, int size, unsigned int flags); struct tb_ring *tb_ring_alloc_rx(struct tb_nhi *nhi, int hop, int size, - unsigned int flags, u16 sof_mask, - u16 eof_mask); + unsigned int flags, u16 sof_mask, u16 eof_mask, + void (*start_poll)(void *), void *poll_data); void tb_ring_start(struct tb_ring *ring); void tb_ring_stop(struct tb_ring *ring); void tb_ring_free(struct tb_ring *ring); @@ -535,8 +540,8 @@ int __tb_ring_enqueue(struct tb_ring *ring, struct ring_frame *frame); * @ring: Ring to enqueue the frame * @frame: Frame to enqueue * - * @frame->buffer, @frame->buffer_phy and @frame->callback have to be set. The - * buffer must contain at least %TB_FRAME_SIZE bytes. + * @frame->buffer, @frame->buffer_phy have to be set. The buffer must + * contain at least %TB_FRAME_SIZE bytes. * * @frame->callback will be invoked with @frame->size, @frame->flags, * @frame->eof, @frame->sof set once the frame has been received. @@ -557,8 +562,8 @@ static inline int tb_ring_rx(struct tb_ring *ring, struct ring_frame *frame) * @ring: Ring the enqueue the frame * @frame: Frame to enqueue * - * @frame->buffer, @frame->buffer_phy, @frame->callback, @frame->size, - * @frame->eof and @frame->sof have to be set. + * @frame->buffer, @frame->buffer_phy, @frame->size, @frame->eof and + * @frame->sof have to be set. * * @frame->callback will be invoked with once the frame has been transmitted. * @@ -573,4 +578,8 @@ static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) return __tb_ring_enqueue(ring, frame); } +/* Used only when the ring is in polling mode */ +struct ring_frame *tb_ring_poll(struct tb_ring *ring); +void tb_ring_poll_complete(struct tb_ring *ring); + #endif /* THUNDERBOLT_H_ */ -- cgit v1.2.3 From 3304559e353f098d7e0ed5ca981e26c406513e12 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 2 Oct 2017 13:38:42 +0300 Subject: thunderbolt: Add function to retrieve DMA device for the ring This is needed when Thunderbolt service drivers need to DMA map memory before it is passed down to the ring. Signed-off-by: Mika Westerberg Reviewed-by: Michael Jamet Reviewed-by: Yehezkel Bernat Reviewed-by: Andy Shevchenko Signed-off-by: David S. Miller --- include/linux/thunderbolt.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/thunderbolt.h b/include/linux/thunderbolt.h index 36925e3aec7c..7b69853188b1 100644 --- a/include/linux/thunderbolt.h +++ b/include/linux/thunderbolt.h @@ -19,6 +19,7 @@ #include #include #include +#include #include #include @@ -582,4 +583,16 @@ static inline int tb_ring_tx(struct tb_ring *ring, struct ring_frame *frame) struct ring_frame *tb_ring_poll(struct tb_ring *ring); void tb_ring_poll_complete(struct tb_ring *ring); +/** + * tb_ring_dma_device() - Return device used for DMA mapping + * @ring: Ring whose DMA device is retrieved + * + * Use this function when you are mapping DMA for buffers that are + * passed to the ring for sending/receiving. + */ +static inline struct device *tb_ring_dma_device(struct tb_ring *ring) +{ + return &ring->nhi->pdev->dev; +} + #endif /* THUNDERBOLT_H_ */ -- cgit v1.2.3 From e7d5459dfaf613799915e901189d296bdc7534f9 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Tue, 26 Sep 2017 17:41:07 +0100 Subject: cpufreq: provide default frequency-invariance setter function Frequency-invariant accounting support based on the ratio of current frequency and maximum supported frequency is an optional feature an arch can implement. Since there are cpufreq drivers (e.g. cpufreq-dt) which can be build for different arch's a default implementation of the frequency-invariance setter function arch_set_freq_scale() is needed. This default implementation is an empty weak function which will be overwritten by a strong function in case the arch provides one. The setter function passes the cpumask of related (to the frequency change) cpus (online and offline cpus), the (new) current frequency and the maximum supported frequency. Signed-off-by: Dietmar Eggemann Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 537ff842ff73..28734ee185a7 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -919,6 +919,9 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) extern unsigned int arch_freq_get_on_cpu(int cpu); +extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, + unsigned long max_freq); + /* the following are really really optional */ extern struct freq_attr cpufreq_freq_attr_scaling_available_freqs; extern struct freq_attr cpufreq_freq_attr_scaling_boost_freqs; -- cgit v1.2.3 From 0e27c567d1673137b06aa96bb7aef635fb657dee Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Tue, 26 Sep 2017 17:41:10 +0100 Subject: drivers base/arch_topology: provide frequency-invariant accounting support Implements the arch-specific (arm and arm64) frequency-invariance setter function arch_set_freq_scale() which provides the following frequency scaling factor: current_freq(cpu) << SCHED_CAPACITY_SHIFT / max_supported_freq(cpu) One possible consumer of the frequency-invariance getter function topology_get_freq_scale() is the Per-Entity Load Tracking (PELT) mechanism of the task scheduler. Allow inlining of topology_get_freq_scale() into the task scheduler fast path (e.g. __update_load_avg_se()) by coding it as a static inline function in the arch topology header file. Signed-off-by: Dietmar Eggemann Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/arch_topology.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index 716ce587247e..f6e490312e4d 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -5,6 +5,7 @@ #define _LINUX_ARCH_TOPOLOGY_H_ #include +#include void topology_normalize_cpu_scale(void); @@ -16,4 +17,12 @@ unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu); void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); +DECLARE_PER_CPU(unsigned long, freq_scale); + +static inline +unsigned long topology_get_freq_scale(struct sched_domain *sd, int cpu) +{ + return per_cpu(freq_scale, cpu); +} + #endif /* _LINUX_ARCH_TOPOLOGY_H_ */ -- cgit v1.2.3 From 8216f588b52b61ce36fc0080218e4730435e58b7 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Tue, 26 Sep 2017 17:41:11 +0100 Subject: drivers base/arch_topology: allow inlining cpu-invariant accounting support Allow inlining of topology_get_cpu_scale() into the task scheduler fast path (e.g. __update_load_avg_se()) by coding it as a static inline function in the arch topology header file. Signed-off-by: Dietmar Eggemann Acked-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/arch_topology.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/arch_topology.h b/include/linux/arch_topology.h index f6e490312e4d..c189de3ef5df 100644 --- a/include/linux/arch_topology.h +++ b/include/linux/arch_topology.h @@ -12,8 +12,14 @@ void topology_normalize_cpu_scale(void); struct device_node; bool topology_parse_cpu_capacity(struct device_node *cpu_node, int cpu); +DECLARE_PER_CPU(unsigned long, cpu_scale); + struct sched_domain; -unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu); +static inline +unsigned long topology_get_cpu_scale(struct sched_domain *sd, int cpu) +{ + return per_cpu(cpu_scale, cpu); +} void topology_set_cpu_scale(unsigned int cpu, unsigned long capacity); -- cgit v1.2.3 From 7f20333fe76780af19ee0d8d2dc6d04d35aa88d2 Mon Sep 17 00:00:00 2001 From: Al Cooper Date: Fri, 22 Sep 2017 15:33:59 -0400 Subject: soc: brcmstb: Add Product ID and Family ID helper functions Add Product ID and Family ID helper functions for brcmstb soc. Signed-off-by: Al Cooper Acked-by: Florian Fainelli Signed-off-by: Kishon Vijay Abraham I --- include/linux/soc/brcmstb/brcmstb.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/brcmstb/brcmstb.h b/include/linux/soc/brcmstb/brcmstb.h index 337ce414e898..8baafd6908b9 100644 --- a/include/linux/soc/brcmstb/brcmstb.h +++ b/include/linux/soc/brcmstb/brcmstb.h @@ -1,10 +1,27 @@ #ifndef __BRCMSTB_SOC_H #define __BRCMSTB_SOC_H +static inline u32 BRCM_ID(u32 reg) +{ + return reg >> 28 ? reg >> 16 : reg >> 8; +} + +static inline u32 BRCM_REV(u32 reg) +{ + return reg & 0xff; +} + /* * Bus Interface Unit control register setup, must happen early during boot, * before SMP is brought up, called by machine entry point. */ void brcmstb_biuctrl_init(void); +/* + * Helper functions for getting family or product id from the + * SoC driver. + */ +u32 brcmstb_get_family_id(void); +u32 brcmstb_get_product_id(void); + #endif /* __BRCMSTB_SOC_H */ -- cgit v1.2.3 From 640ab98fb3629c0f8417b9b2532eca596495f3bb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 27 Sep 2017 05:40:16 -0600 Subject: buffer: have alloc_page_buffers() use __GFP_NOFAIL Instead of adding weird retry logic in that function, utilize __GFP_NOFAIL to ensure that the vm takes care of handling any potential retries appropriately. This means we don't have to call free_more_memory() from here. Reviewed-by: Nikolay Borisov Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c8dae555eccf..ae2d25f01b98 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -156,7 +156,7 @@ void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); int try_to_free_buffers(struct page *); struct buffer_head *alloc_page_buffers(struct page *page, unsigned long size, - int retry); + bool retry); void create_empty_buffers(struct page *, unsigned long, unsigned long b_state); void end_buffer_read_sync(struct buffer_head *bh, int uptodate); -- cgit v1.2.3 From 9ba4b2dfafaa711b41cc2102b0e9a529f3981218 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Sep 2017 08:58:25 -0600 Subject: fs: kill 'nr_pages' argument from wakeup_flusher_threads() Everybody is passing in 0 now, let's get rid of the argument. Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/writeback.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index d5815794416c..1f9c6db5e29a 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -189,7 +189,7 @@ bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); void sync_inodes_sb(struct super_block *); -void wakeup_flusher_threads(long nr_pages, enum wb_reason reason); +void wakeup_flusher_threads(enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ -- cgit v1.2.3 From 47410d88f665486bf91f02242ab5d5692b8887ac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 28 Sep 2017 11:25:03 -0600 Subject: writeback: remove 'range_cyclic' argument for wb_start_writeback() All the callers pass in 'true' for range_cyclic, so kill the argument. Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 854e1bdd0b2a..0f63493de9e7 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -39,7 +39,7 @@ static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask) } void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, - bool range_cyclic, enum wb_reason reason); + enum wb_reason reason); void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); void wb_wakeup_delayed(struct bdi_writeback *wb); -- cgit v1.2.3 From 595043e5f9ef1d8263bd9fda215cade489227491 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 28 Sep 2017 11:26:59 -0600 Subject: writeback: provide a wakeup_flusher_threads_bdi() Similar to wakeup_flusher_threads(), except that we only wake up the flusher threads on the specified backing device. No functional changes in this patch. Acked-by: Johannes Weiner Tested-by: Chris Mason Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/writeback.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 1f9c6db5e29a..9c0091678af4 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -190,6 +190,8 @@ bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); void sync_inodes_sb(struct super_block *); void wakeup_flusher_threads(enum wb_reason reason); +void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, + enum wb_reason reason); void inode_wait_for_writeback(struct inode *inode); /* writeback.h requires fs.h; it, too, is not included from here. */ -- cgit v1.2.3 From 9dfb176fae57a1dea68531fd25e867037e4d9bac Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 28 Sep 2017 11:28:55 -0600 Subject: writeback: make wb_start_writeback() static We don't have any callers outside of fs-writeback.c anymore, make it private. Acked-by: Johannes Weiner Tested-by: Chris Mason Reviewed-by: Jan Kara Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 0f63493de9e7..157e950a70dc 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -38,8 +38,6 @@ static inline struct backing_dev_info *bdi_alloc(gfp_t gfp_mask) return bdi_alloc_node(gfp_mask, NUMA_NO_NODE); } -void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, - enum wb_reason reason); void wb_start_background_writeback(struct bdi_writeback *wb); void wb_workfn(struct work_struct *work); void wb_wakeup_delayed(struct bdi_writeback *wb); -- cgit v1.2.3 From aac8d41cd438f25bf3110fc6b98f1d16d7dbc169 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 28 Sep 2017 11:31:55 -0600 Subject: writeback: only allow one inflight and pending full flush When someone calls wakeup_flusher_threads() or wakeup_flusher_threads_bdi(), they schedule writeback of all dirty pages in the system (or on that bdi). If we are tight on memory, we can get tons of these queued from kswapd/vmscan. This causes (at least) two problems: 1) We consume a ton of memory just allocating writeback work items. We've seen as much as 600 million of these writeback work items pending. That's a lot of memory to pointlessly hold hostage, while the box is under memory pressure. 2) We spend so much time processing these work items, that we introduce a softlockup in writeback processing. This is because each of the writeback work items don't end up doing any work (it's hard when you have millions of identical ones coming in to the flush machinery), so we just sit in a tight loop pulling work items and deleting/freeing them. Fix this by adding a 'start_all' bit to the writeback structure, and set that when someone attempts to flush all dirty pages. The bit is cleared when we start writeback on that work item. If the bit is already set when we attempt to queue !nr_pages writeback, then we simply ignore it. This provides us one full flush in flight, with one pending as well, and makes for more efficient handling of this type of writeback. Acked-by: Johannes Weiner Tested-by: Chris Mason Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 866c433e7d32..420de5c7c7f9 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -24,6 +24,7 @@ enum wb_state { WB_shutting_down, /* wb_shutdown() in progress */ WB_writeback_running, /* Writeback is in progress */ WB_has_dirty_io, /* Dirty inodes on ->b_{dirty|io|more_io} */ + WB_start_all, /* nr_pages == 0 (all) work pending */ }; enum wb_congested_state { -- cgit v1.2.3 From abf4bb6b63d0a54266f8e7eff3720c1974063971 Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:06 +0200 Subject: skbuff: Add the offload_mr_fwd_mark field Similarly to the offload_fwd_mark field, the offload_mr_fwd_mark field is used to allow partial offloading of MFC multicast routes. Switchdev drivers can offload MFC multicast routes to the hardware by registering to the FIB notification chain. When one of the route output interfaces is not offload-able, i.e. has different parent ID, the route cannot be fully offloaded by the hardware. Examples to non-offload-able devices are a management NIC, dummy device, pimreg device, etc. Similar problem exists in the bridge module, as one bridge can hold interfaces with different parent IDs. At the bridge, the problem is solved by the offload_fwd_mark skb field. Currently, when a route cannot go through full offload, the only solution for a switchdev driver is not to offload it at all and let the packet go through slow path. Using the offload_mr_fwd_mark field, a driver can indicate that a packet was already forwarded by hardware to all the devices with the same parent ID as the input device. Further patches in this patch-set are going to enhance ipmr to skip multicast forwarding to devices with the same parent ID if a packets is marked with that field. The reason why the already existing "offload_fwd_mark" bit cannot be used is that a switchdev driver would want to make the distinction between a packet that has already gone through L2 forwarding but did not go through multicast forwarding, and a packet that has already gone through both L2 and multicast forwarding. For example: when a packet is ingressing from a switchport enslaved to a bridge, which is configured with multicast forwarding, the following scenarios are possible: - The packet can be trapped to the CPU due to exception while multicast forwarding (for example, MTU error). In that case, it had already gone through L2 forwarding in the hardware, thus A switchdev driver would want to set the skb->offload_fwd_mark and not the skb->offload_mr_fwd_mark. - The packet can also be trapped due to a pimreg/dummy device used as one of the output interfaces. In that case, it can go through both L2 and (partial) multicast forwarding inside the hardware, thus a switchdev driver would want to set both the skb->offload_fwd_mark and skb->offload_mr_fwd_mark. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 19e64bfb1a66..ada821466e88 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -772,6 +772,7 @@ struct sk_buff { __u8 remcsum_offload:1; #ifdef CONFIG_NET_SWITCHDEV __u8 offload_fwd_mark:1; + __u8 offload_mr_fwd_mark:1; #endif #ifdef CONFIG_NET_CLS_ACT __u8 tc_skip_classify:1; -- cgit v1.2.3 From 5d8b3e69fc5e5ccafc9db1251bb7c78a8622fddd Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Tue, 3 Oct 2017 09:58:07 +0200 Subject: ipv4: ipmr: Add the parent ID field to VIF struct In order to allow the ipmr module to do partial multicast forwarding according to the device parent ID, add the device parent ID field to the VIF struct. This way, the forwarding path can use the parent ID field without invoking switchdev calls, which requires the RTNL lock. When a new VIF is added, set the device parent ID field in it by invoking the switchdev_port_attr_get call. Signed-off-by: Yotam Gigi Reviewed-by: Ido Schimmel Signed-off-by: Jiri Pirko Signed-off-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/mroute.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mroute.h b/include/linux/mroute.h index b072a84fbe1c..8242d05df35e 100644 --- a/include/linux/mroute.h +++ b/include/linux/mroute.h @@ -57,6 +57,7 @@ static inline bool ipmr_rule_default(const struct fib_rule *rule) struct vif_device { struct net_device *dev; /* Device we are using */ + struct netdev_phys_item_id dev_parent_id; /* Device parent ID */ unsigned long bytes_in,bytes_out; unsigned long pkt_in,pkt_out; /* Statistics */ unsigned long rate_limit; /* Traffic shaping (NI) */ -- cgit v1.2.3 From 6c5570016b972d9b1f0f6c2dca9cc0422b1f92bf Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Mon, 2 Oct 2017 23:50:05 +0200 Subject: net: core: decouple ifalias get/set from rtnl lock Device alias can be set by either rtnetlink (rtnl is held) or sysfs. rtnetlink hold the rtnl mutex, sysfs acquires it for this purpose. Add an extra mutex for it and use rcu to protect concurrent accesses. This allows the sysfs path to not take rtnl and would later allow to not hold it when dumping ifalias. Based on suggestion from Eric Dumazet. Signed-off-by: Florian Westphal Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e1d6ef130611..d04424cfffba 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -826,6 +826,11 @@ struct xfrmdev_ops { }; #endif +struct dev_ifalias { + struct rcu_head rcuhead; + char ifalias[]; +}; + /* * This structure defines the management hooks for network devices. * The following hooks can be defined; unless noted otherwise, they are @@ -1632,7 +1637,7 @@ enum netdev_priv_flags { struct net_device { char name[IFNAMSIZ]; struct hlist_node name_hlist; - char *ifalias; + struct dev_ifalias __rcu *ifalias; /* * I/O specific fields * FIXME: Merge these and struct ifmap into one @@ -3275,6 +3280,7 @@ void __dev_notify_flags(struct net_device *, unsigned int old_flags, unsigned int gchanges); int dev_change_name(struct net_device *, const char *); int dev_set_alias(struct net_device *, const char *, size_t); +int dev_get_alias(const struct net_device *, char *, size_t); int dev_change_net_namespace(struct net_device *, struct net *, const char *); int __dev_set_mtu(struct net_device *, int); int dev_set_mtu(struct net_device *, int); -- cgit v1.2.3 From eaefd5abf6b095bfc55eb745bdf7c42cf66790eb Mon Sep 17 00:00:00 2001 From: James Smart Date: Thu, 14 Sep 2017 10:38:42 -0700 Subject: nvme-fc: add uevent for auto-connect To support auto-connecting to FC-NVME devices upon their dynamic appearance, add a uevent that can kick off connection scripts. uevent is posted against the fc_udev device. patch set tested with the following rule to kick an nvme-cli connect-all for the FC initiator and FC target ports. This is just an example for testing and not intended for real life use. ACTION=="change", SUBSYSTEM=="fc", ENV{FC_EVENT}=="nvmediscovery", \ ENV{NVMEFC_HOST_TRADDR}=="*", ENV{NVMEFC_TRADDR}=="*", \ RUN+="/bin/sh -c '/usr/local/sbin/nvme connect-all --transport=fc --host-traddr=$env{NVMEFC_HOST_TRADDR} --traddr=$env{NVMEFC_TRADDR} >> /tmp/nvme_fc.log'" I will post proposed udev/systemd scripts for possible kernel support. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- include/linux/nvme-fc-driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index a726f96010d5..4ea03b9a5c8c 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -446,6 +446,8 @@ int nvme_fc_register_remoteport(struct nvme_fc_local_port *localport, int nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *remoteport); +void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport); + /* -- cgit v1.2.3 From eb8470db8bc018fc28901e4e3b0f48e33f1ea7df Mon Sep 17 00:00:00 2001 From: Jan Kandziora Date: Wed, 20 Sep 2017 23:52:45 +0200 Subject: wire: export w1_touch_bit The w1_ds28e17 driver from the next part of this patch needs to emit single-bit read timeslots to the DS28E17. The w1 subsystem already has this function but it is not exported outside drivers/w1/w1_io.c This subpatch exports the w1_touch_bit symbol with EXPORT_SYMBOL_GPL, same as the other exported symbols in drivers/w1/w1_io.c May be also useful later for writing drivers for other Onewire chips which do single-bit communication. Signed-off-by: Jan Kandziora Acked-by: Evgeniy Polyakov Signed-off-by: Greg Kroah-Hartman --- include/linux/w1.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/w1.h b/include/linux/w1.h index 5b2972946dda..694101f744c7 100644 --- a/include/linux/w1.h +++ b/include/linux/w1.h @@ -293,6 +293,7 @@ void w1_unregister_family(struct w1_family *family); w1_unregister_family) u8 w1_triplet(struct w1_master *dev, int bdir); +u8 w1_touch_bit(struct w1_master *dev, int bit); void w1_write_8(struct w1_master *, u8); u8 w1_read_8(struct w1_master *); int w1_reset_bus(struct w1_master *); -- cgit v1.2.3 From c2e5df616e1ae6c2a074cb241ebb65a318ebaf7c Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Thu, 21 Sep 2017 20:58:49 -0700 Subject: vmbus: add per-channel sysfs info This extends existing vmbus related sysfs structure to provide per-channel state information. This is useful when diagnosing issues with multiple queues in networking and storage. The existing sysfs only displayed information about the primary channel. The one place it reported multiple channels was the channel_vp_mapping file which violated the sysfs convention of one value per file. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index c458d7b7ad19..ef16ee039850 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -828,6 +828,11 @@ struct vmbus_channel { */ struct rcu_head rcu; + /* + * For sysfs per-channel properties. + */ + struct kobject kobj; + /* * For performance critical channels (storage, networking * etc,), Hyper-V has a mechanism to enhance the throughput @@ -1089,6 +1094,7 @@ struct hv_device { struct device device; struct vmbus_channel *channel; + struct kset *channels_kset; }; -- cgit v1.2.3 From 667063acb81931e2f8fd0cb91df9fcccad131d9a Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 29 Sep 2017 16:23:01 +0800 Subject: regmap: add iopoll-like polling macro for regmap_field This patch adds a macro regmap_field_read_poll_timeout that works similar to the readx_poll_timeout defined in linux/iopoll.h, except that this can also return the error value returned by a failed regmap_field_read. Signed-off-by: Chen-Yu Tsai Signed-off-by: Mark Brown --- include/linux/regmap.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 978abfbac617..93a4663d7acb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -139,6 +139,45 @@ struct reg_sequence { pollret ?: ((cond) ? 0 : -ETIMEDOUT); \ }) +/** + * regmap_field_read_poll_timeout - Poll until a condition is met or timeout + * + * @field: Regmap field to read from + * @val: Unsigned integer variable to read the value into + * @cond: Break condition (usually involving @val) + * @sleep_us: Maximum time to sleep between reads in us (0 + * tight-loops). Should be less than ~20ms since usleep_range + * is used (see Documentation/timers/timers-howto.txt). + * @timeout_us: Timeout in us, 0 means never timeout + * + * Returns 0 on success and -ETIMEDOUT upon a timeout or the regmap_field_read + * error return value in case of a error read. In the two former cases, + * the last read value at @addr is stored in @val. Must not be called + * from atomic context if sleep_us or timeout_us are used. + * + * This is modelled after the readx_poll_timeout macros in linux/iopoll.h. + */ +#define regmap_field_read_poll_timeout(field, val, cond, sleep_us, timeout_us) \ +({ \ + ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ + int pollret; \ + might_sleep_if(sleep_us); \ + for (;;) { \ + pollret = regmap_field_read((field), &(val)); \ + if (pollret) \ + break; \ + if (cond) \ + break; \ + if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + pollret = regmap_field_read((field), &(val)); \ + break; \ + } \ + if (sleep_us) \ + usleep_range((sleep_us >> 2) + 1, sleep_us); \ + } \ + pollret ?: ((cond) ? 0 : -ETIMEDOUT); \ +}) + #ifdef CONFIG_REGMAP enum regmap_endian { -- cgit v1.2.3 From d81851c1764b26b46670c0b3bd6701308ddaab98 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 29 Sep 2017 11:25:09 +0800 Subject: regulator: axp20x: Add support for AXP813 regulators The AXP813 PMIC has 7 DC-DC buck regulators, 16 LDOs (including the fixed RTC LDO and 2 GPIO LDOs), and 1 switchable. The drive-vbus feature is also supported. All the hardware details are very similar to the AXP803, with the following exceptions: - Extra DCDC7 buck regulator, with the same range as DCDC6 - SWitch now has a separate supply pin, instead of being chained internaly from DCDC1 - RTC LDO output voltage is now 1.8V - FLDO3 is an LDO with switchable supplies, but unconfigurable output voltage. The voltage is always half that of its supply. Support for FLDO3 is currently unimplemented, as it requires runtime switching of its supplies, something the regulator subsystem does not support. It is not used in either the reference designs nor actually produced boards available. Signed-off-by: Chen-Yu Tsai Tested-by: Maxime Ripard Acked-by: Maxime Ripard Signed-off-by: Mark Brown --- include/linux/mfd/axp20x.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index e9c908c4fba8..78dc85365c4f 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -131,6 +131,9 @@ enum axp20x_variants { #define AXP803_DCDC6_V_OUT 0x25 #define AXP803_DCDC_FREQ_CTRL 0x3b +/* Other DCDC regulator control registers are the same as AXP803 */ +#define AXP813_DCDC7_V_OUT 0x26 + /* Interrupt */ #define AXP152_IRQ1_EN 0x40 #define AXP152_IRQ2_EN 0x41 -- cgit v1.2.3 From 85009b4f5f0399669a44f07cb9a5622c0e71d419 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 30 Sep 2017 02:09:06 -0600 Subject: writeback: eliminate work item allocation in bd_start_writeback() Handle start-all writeback like we do periodic or kupdate style writeback - by marking the bdi_writeback as needing a full flush, and simply waking the thread. This eliminates the need to allocate and queue a specific work item just for this purpose. After this change, we truly only ever have one of them running at any point in time. We mark the need to start all flushes, and the writeback thread will clear it once it has processed the request. Reviewed-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 23 +++++++++++++++++++++++ include/linux/writeback.h | 22 ---------------------- 2 files changed, 23 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 420de5c7c7f9..b7c7be6f5986 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -44,6 +44,28 @@ enum wb_stat_item { #define WB_STAT_BATCH (8*(1+ilog2(nr_cpu_ids))) +/* + * why some writeback work was initiated + */ +enum wb_reason { + WB_REASON_BACKGROUND, + WB_REASON_VMSCAN, + WB_REASON_SYNC, + WB_REASON_PERIODIC, + WB_REASON_LAPTOP_TIMER, + WB_REASON_FREE_MORE_MEM, + WB_REASON_FS_FREE_SPACE, + /* + * There is no bdi forker thread any more and works are done + * by emergency worker, however, this is TPs userland visible + * and we'll be exposing exactly the same information, + * so it has a mismatch name. + */ + WB_REASON_FORKER_THREAD, + + WB_REASON_MAX, +}; + /* * For cgroup writeback, multiple wb's may map to the same blkcg. Those * wb's can operate mostly independently but should share the congested @@ -116,6 +138,7 @@ struct bdi_writeback { struct fprop_local_percpu completions; int dirty_exceeded; + enum wb_reason start_all_reason; spinlock_t work_lock; /* protects work_list & dwork scheduling */ struct list_head work_list; diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 9c0091678af4..dd1d2c23f743 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -41,28 +41,6 @@ enum writeback_sync_modes { WB_SYNC_ALL, /* Wait on every mapping */ }; -/* - * why some writeback work was initiated - */ -enum wb_reason { - WB_REASON_BACKGROUND, - WB_REASON_VMSCAN, - WB_REASON_SYNC, - WB_REASON_PERIODIC, - WB_REASON_LAPTOP_TIMER, - WB_REASON_FREE_MORE_MEM, - WB_REASON_FS_FREE_SPACE, - /* - * There is no bdi forker thread any more and works are done - * by emergency worker, however, this is TPs userland visible - * and we'll be exposing exactly the same information, - * so it has a mismatch name. - */ - WB_REASON_FORKER_THREAD, - - WB_REASON_MAX, -}; - /* * A control structure which tells the writeback code what to do. These are * always on the stack, and hence need no locking. They are always initialised -- cgit v1.2.3 From 6cafbe159416822f6d3dfd711bf4c39050c650ba Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Tue, 20 Jun 2017 10:44:58 -0400 Subject: ftrace: Add a ftrace_free_mem() function for modules to use In order to be able to trace module init functions, the module code needs to tell ftrace what is being freed when the init sections are freed. Use the code that the main init calls to tell ftrace to free the main init sections. This requires passing in a start and end address to free. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2e028854bac7..47fc404ad233 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -151,8 +151,10 @@ struct ftrace_ops_hash { }; void ftrace_free_init_mem(void); +void ftrace_free_mem(void *start, void *end); #else static inline void ftrace_free_init_mem(void) { } +static inline void ftrace_free_mem(void *start, void *end) { } #endif /* -- cgit v1.2.3 From 5c95878f618cf4f3eb0a4c7ff54a09ca6d4d0426 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Thu, 7 Sep 2017 20:39:45 -0300 Subject: [media] media: rc: gpio-ir-recv: remove gpio_ir_recv_platform_data gpio_ir_recv_platform_data are not used anywhere in kernel tree, so remove it. Signed-off-by: Ladislav Michl Signed-off-by: Sean Young Signed-off-by: Mauro Carvalho Chehab --- include/linux/platform_data/media/gpio-ir-recv.h | 23 ----------------------- 1 file changed, 23 deletions(-) delete mode 100644 include/linux/platform_data/media/gpio-ir-recv.h (limited to 'include/linux') diff --git a/include/linux/platform_data/media/gpio-ir-recv.h b/include/linux/platform_data/media/gpio-ir-recv.h deleted file mode 100644 index 0c298f569d5a..000000000000 --- a/include/linux/platform_data/media/gpio-ir-recv.h +++ /dev/null @@ -1,23 +0,0 @@ -/* Copyright (c) 2012, Code Aurora Forum. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ - -#ifndef __GPIO_IR_RECV_H__ -#define __GPIO_IR_RECV_H__ - -struct gpio_ir_recv_platform_data { - int gpio_nr; - bool active_low; - u64 allowed_protos; - const char *map_name; -}; - -#endif /* __GPIO_IR_RECV_H__ */ -- cgit v1.2.3 From 324bda9e6c5add86ba2e1066476481c48132aca0 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:21 -0700 Subject: bpf: multi program support for cgroup+bpf introduce BPF_F_ALLOW_MULTI flag that can be used to attach multiple bpf programs to a cgroup. The difference between three possible flags for BPF_PROG_ATTACH command: - NONE(default): No further bpf programs allowed in the subtree. - BPF_F_ALLOW_OVERRIDE: If a sub-cgroup installs some bpf program, the program in this cgroup yields to sub-cgroup program. - BPF_F_ALLOW_MULTI: If a sub-cgroup installs some bpf program, that cgroup program gets run in addition to the program in this cgroup. NONE and BPF_F_ALLOW_OVERRIDE existed before. This patch doesn't change their behavior. It only clarifies the semantics in relation to new flag. Only one program is allowed to be attached to a cgroup with NONE or BPF_F_ALLOW_OVERRIDE flag. Multiple programs are allowed to be attached to a cgroup with BPF_F_ALLOW_MULTI flag. They are executed in FIFO order (those that were attached first, run first) The programs of sub-cgroup are executed first, then programs of this cgroup and then programs of parent cgroup. All eligible programs are executed regardless of return code from earlier programs. To allow efficient execution of multiple programs attached to a cgroup and to avoid penalizing cgroups without any programs attached introduce 'struct bpf_prog_array' which is RCU protected array of pointers to bpf programs. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 46 ++++++++++++++++++++++++++++++---------------- include/linux/bpf.h | 32 ++++++++++++++++++++++++++++++++ include/linux/filter.h | 2 +- 3 files changed, 63 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index d41d40ac3efd..102e56fbb6de 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -14,27 +14,42 @@ struct bpf_sock_ops_kern; extern struct static_key_false cgroup_bpf_enabled_key; #define cgroup_bpf_enabled static_branch_unlikely(&cgroup_bpf_enabled_key) +struct bpf_prog_list { + struct list_head node; + struct bpf_prog *prog; +}; + +struct bpf_prog_array; + struct cgroup_bpf { - /* - * Store two sets of bpf_prog pointers, one for programs that are - * pinned directly to this cgroup, and one for those that are effective - * when this cgroup is accessed. + /* array of effective progs in this cgroup */ + struct bpf_prog_array __rcu *effective[MAX_BPF_ATTACH_TYPE]; + + /* attached progs to this cgroup and attach flags + * when flags == 0 or BPF_F_ALLOW_OVERRIDE the progs list will + * have either zero or one element + * when BPF_F_ALLOW_MULTI the list can have up to BPF_CGROUP_MAX_PROGS */ - struct bpf_prog *prog[MAX_BPF_ATTACH_TYPE]; - struct bpf_prog __rcu *effective[MAX_BPF_ATTACH_TYPE]; - bool disallow_override[MAX_BPF_ATTACH_TYPE]; + struct list_head progs[MAX_BPF_ATTACH_TYPE]; + u32 flags[MAX_BPF_ATTACH_TYPE]; + + /* temp storage for effective prog array used by prog_attach/detach */ + struct bpf_prog_array __rcu *inactive; }; void cgroup_bpf_put(struct cgroup *cgrp); -void cgroup_bpf_inherit(struct cgroup *cgrp, struct cgroup *parent); +int cgroup_bpf_inherit(struct cgroup *cgrp); -int __cgroup_bpf_update(struct cgroup *cgrp, struct cgroup *parent, - struct bpf_prog *prog, enum bpf_attach_type type, - bool overridable); +int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); -/* Wrapper for __cgroup_bpf_update() protected by cgroup_mutex */ -int cgroup_bpf_update(struct cgroup *cgrp, struct bpf_prog *prog, - enum bpf_attach_type type, bool overridable); +/* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ +int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); +int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, + enum bpf_attach_type type, u32 flags); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, @@ -96,8 +111,7 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct cgroup_bpf {}; static inline void cgroup_bpf_put(struct cgroup *cgrp) {} -static inline void cgroup_bpf_inherit(struct cgroup *cgrp, - struct cgroup *parent) {} +static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 252f4bc9eb25..a6964b75f070 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -241,6 +241,38 @@ int bpf_prog_test_run_xdp(struct bpf_prog *prog, const union bpf_attr *kattr, int bpf_prog_test_run_skb(struct bpf_prog *prog, const union bpf_attr *kattr, union bpf_attr __user *uattr); +/* an array of programs to be executed under rcu_lock. + * + * Typical usage: + * ret = BPF_PROG_RUN_ARRAY(&bpf_prog_array, ctx, BPF_PROG_RUN); + * + * the structure returned by bpf_prog_array_alloc() should be populated + * with program pointers and the last pointer must be NULL. + * The user has to keep refcnt on the program and make sure the program + * is removed from the array before bpf_prog_put(). + * The 'struct bpf_prog_array *' should only be replaced with xchg() + * since other cpus are walking the array of pointers in parallel. + */ +struct bpf_prog_array { + struct rcu_head rcu; + struct bpf_prog *progs[0]; +}; + +struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); +void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); + +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + ({ \ + struct bpf_prog **_prog; \ + u32 _ret = 1; \ + rcu_read_lock(); \ + _prog = rcu_dereference(array)->progs; \ + for (; *_prog; _prog++) \ + _ret &= func(*_prog, ctx); \ + rcu_read_unlock(); \ + _ret; \ + }) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/filter.h b/include/linux/filter.h index 911d454af107..2d2db394b0ca 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -481,7 +481,7 @@ struct sk_filter { struct bpf_prog *prog; }; -#define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) +#define BPF_PROG_RUN(filter, ctx) (*(filter)->bpf_func)(ctx, (filter)->insnsi) #define BPF_SKB_CB_LEN QDISC_CB_PRIV_LEN -- cgit v1.2.3 From 468e2f64d220fe2dc11caa2bcb9b3a1e50fc7321 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 2 Oct 2017 22:50:22 -0700 Subject: bpf: introduce BPF_PROG_QUERY command introduce BPF_PROG_QUERY command to retrieve a set of either attached programs to given cgroup or a set of effective programs that will execute for events within a cgroup Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Martin KaFai Lau for cgroup bits Acked-by: Tejun Heo Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 4 ++++ include/linux/bpf.h | 3 +++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 102e56fbb6de..359b6f5d3d90 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -44,12 +44,16 @@ int __cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int __cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); +int __cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr); /* Wrapper for __cgroup_bpf_*() protected by cgroup_mutex */ int cgroup_bpf_attach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); int cgroup_bpf_detach(struct cgroup *cgrp, struct bpf_prog *prog, enum bpf_attach_type type, u32 flags); +int cgroup_bpf_query(struct cgroup *cgrp, const union bpf_attr *attr, + union bpf_attr __user *uattr); int __cgroup_bpf_run_filter_skb(struct sock *sk, struct sk_buff *skb, diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a6964b75f070..a67daea731ab 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -260,6 +260,9 @@ struct bpf_prog_array { struct bpf_prog_array __rcu *bpf_prog_array_alloc(u32 prog_cnt, gfp_t flags); void bpf_prog_array_free(struct bpf_prog_array __rcu *progs); +int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); +int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, + __u32 __user *prog_ids, u32 cnt); #define BPF_PROG_RUN_ARRAY(array, ctx, func) \ ({ \ -- cgit v1.2.3 From 6621dd29eb9b5e6774ec7a9a75161352fdea47fc Mon Sep 17 00:00:00 2001 From: Nicolas Dichtel Date: Tue, 3 Oct 2017 13:53:23 +0200 Subject: dev: advertise the new nsid when the netns iface changes x-netns interfaces are bound to two netns: the link netns and the upper netns. Usually, this kind of interfaces is created in the link netns and then moved to the upper netns. At the end, the interface is visible only in the upper netns. The link nsid is advertised via netlink in the upper netns, thus the user always knows where is the link part. There is no such mechanism in the link netns. When the interface is moved to another netns, the user cannot "follow" it. This patch adds a new netlink attribute which helps to follow an interface which moves to another netns. When the interface is unregistered, the new nsid is advertised. If the interface is a x-netns interface (ie rtnl_link_ops->get_link_net is defined), the nsid is allocated if needed. CC: Jason A. Donenfeld Signed-off-by: Nicolas Dichtel Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index dea59c8eec54..1251638e60d3 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -17,9 +17,11 @@ extern int rtnl_put_cacheinfo(struct sk_buff *skb, struct dst_entry *dst, u32 id, long expires, u32 error); void rtmsg_ifinfo(int type, struct net_device *dev, unsigned change, gfp_t flags); +void rtmsg_ifinfo_newnet(int type, struct net_device *dev, unsigned int change, + gfp_t flags, int *new_nsid); struct sk_buff *rtmsg_ifinfo_build_skb(int type, struct net_device *dev, unsigned change, u32 event, - gfp_t flags); + gfp_t flags, int *new_nsid); void rtmsg_ifinfo_send(struct sk_buff *skb, struct net_device *dev, gfp_t flags); -- cgit v1.2.3 From 51d0c04795a4b5d9a188336884887a9d394a94b0 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:45 -0700 Subject: net: Add extack to netdev_notifier_info Add netlink_ext_ack to netdev_notifier_info to allow notifier handlers to return errors to userspace. Clean up the initialization in dev.c such that extack is easily added in subsequent patches where relevant. Specifically, remove the init call in call_netdevice_notifiers_info and have callers initalize on stack when info is declared. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d04424cfffba..05fcaba4b0d9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2309,7 +2309,8 @@ int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); struct netdev_notifier_info { - struct net_device *dev; + struct net_device *dev; + struct netlink_ext_ack *extack; }; struct netdev_notifier_change_info { @@ -2334,6 +2335,7 @@ static inline void netdev_notifier_info_init(struct netdev_notifier_info *info, struct net_device *dev) { info->dev = dev; + info->extack = NULL; } static inline struct net_device * @@ -2342,6 +2344,12 @@ netdev_notifier_info_to_dev(const struct netdev_notifier_info *info) return info->dev; } +static inline struct netlink_ext_ack * +netdev_notifier_info_to_extack(const struct netdev_notifier_info *info) +{ + return info->extack; +} + int call_netdevice_notifiers(unsigned long val, struct net_device *dev); -- cgit v1.2.3 From 33eaf2a6eb48ebf00374aaaf4b1b43f9950dcbe4 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:46 -0700 Subject: net: Add extack to ndo_add_slave Pass extack to do_set_master and down to ndo_add_slave Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 05fcaba4b0d9..368a5064a487 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1246,7 +1246,8 @@ struct net_device_ops { u32 flow_id); #endif int (*ndo_add_slave)(struct net_device *dev, - struct net_device *slave_dev); + struct net_device *slave_dev, + struct netlink_ext_ack *extack); int (*ndo_del_slave)(struct net_device *dev, struct net_device *slave_dev); netdev_features_t (*ndo_fix_features)(struct net_device *dev, -- cgit v1.2.3 From 42ab19ee90292993370a30ad242599d75a3b749e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 4 Oct 2017 17:48:47 -0700 Subject: net: Add extack to upper device linking Add extack arg to netdev_upper_dev_link and netdev_master_upper_dev_link Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 3 ++- include/linux/netdevice.h | 6 ++++-- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index c9ec1343d187..10e319f41fb1 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -72,7 +72,8 @@ static inline void macvlan_count_rx(const struct macvlan_dev *vlan, extern void macvlan_common_setup(struct net_device *dev); extern int macvlan_common_newlink(struct net *src_net, struct net_device *dev, - struct nlattr *tb[], struct nlattr *data[]); + struct nlattr *tb[], struct nlattr *data[], + struct netlink_ext_ack *extack); extern void macvlan_count_rx(const struct macvlan_dev *vlan, unsigned int len, bool success, diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 368a5064a487..31bb3010c69b 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3919,10 +3919,12 @@ void *netdev_adjacent_get_private(struct list_head *adj_list); void *netdev_lower_get_first_private_rcu(struct net_device *dev); struct net_device *netdev_master_upper_dev_get(struct net_device *dev); struct net_device *netdev_master_upper_dev_get_rcu(struct net_device *dev); -int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev); +int netdev_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, + struct netlink_ext_ack *extack); int netdev_master_upper_dev_link(struct net_device *dev, struct net_device *upper_dev, - void *upper_priv, void *upper_info); + void *upper_priv, void *upper_info, + struct netlink_ext_ack *extack); void netdev_upper_dev_unlink(struct net_device *dev, struct net_device *upper_dev); void netdev_adjacent_rename_links(struct net_device *dev, char *oldname); -- cgit v1.2.3 From 29cc309d8bf19a36c5196bf626662319af6e3c0b Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Tue, 22 Aug 2017 09:10:11 +0800 Subject: HID: hid-multitouch: forward MSC_TIMESTAMP Computes and forwards the device timestamp according to the specification. Many devices use a 16-bit timestamp field, with a resolution of 100us, therefore rolling around very frequently (every 6.5 seconds). To make sure there is no ambiguity, the timestamp reported to the input stack reset to 0 whenever the time between 2 received events is greater than MAX_TIMESTAMP_INTERVAL (1 second). Signed-off-by: Nicolas Boichat Acked-by: Benjamin Tissoires Signed-off-by: Jiri Kosina --- include/linux/hid.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index ab05a86269dc..47dd962d9a7a 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -289,6 +289,7 @@ struct hid_item { #define HID_DG_DEVICEINDEX 0x000d0053 #define HID_DG_CONTACTCOUNT 0x000d0054 #define HID_DG_CONTACTMAX 0x000d0055 +#define HID_DG_SCANTIME 0x000d0056 #define HID_DG_BUTTONTYPE 0x000d0059 #define HID_DG_BARRELSWITCH2 0x000d005a #define HID_DG_TOOLSERIALNUMBER 0x000d005b -- cgit v1.2.3 From 58e1177b4cd10b0d358faf7d7ebb3779f98bc3ea Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:26:55 -0700 Subject: timer: Convert schedule_timeout() to use from_timer() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new from_timer() helper and passing the timer pointer explicitly. Since this special timer is on the stack, it needs to have a wrapper structure to carry state once .data is eliminated. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Guenter Roeck Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Tejun Heo Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: "Rafael J. Wysocki" Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: Stephen Boyd Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Michael Reed Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-2-git-send-email-keescook@chromium.org --- include/linux/timer.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 6383c528b148..5ef5c9e41a09 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -179,6 +179,14 @@ static inline void timer_setup(struct timer_list *timer, (TIMER_DATA_TYPE)timer, flags); } +static inline void timer_setup_on_stack(struct timer_list *timer, + void (*callback)(struct timer_list *), + unsigned int flags) +{ + __setup_timer_on_stack(timer, (TIMER_FUNC_TYPE)callback, + (TIMER_DATA_TYPE)timer, flags); +} + #define from_timer(var, callback_timer, timer_fieldname) \ container_of(callback_timer, typeof(*var), timer_fieldname) -- cgit v1.2.3 From 1d1fe902afb380571105d05d0be3de61b81bc9a8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:26:56 -0700 Subject: timer: Remove init_timer_pinned_deferrable() in favor of timer_setup() This refactors the only user of init_timer_pinned_deferrable() to use the new timer_setup() and from_timer(). Adds a pointer back to the policy, and drops the definition of init_timer_pinned_deferrable(). Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Stephen Boyd Cc: Guenter Roeck Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Tejun Heo Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Michael Reed Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-3-git-send-email-keescook@chromium.org --- include/linux/timer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 5ef5c9e41a09..d11e819a86e2 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -132,8 +132,6 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, __init_timer((timer), TIMER_PINNED) #define init_timer_deferrable(timer) \ __init_timer((timer), TIMER_DEFERRABLE) -#define init_timer_pinned_deferrable(timer) \ - __init_timer((timer), TIMER_DEFERRABLE | TIMER_PINNED) #define init_timer_on_stack(timer) \ __init_timer_on_stack((timer), 0) -- cgit v1.2.3 From 9c6c273aa4248c60569de6ef7e7e9c7bed3cd32e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:26:57 -0700 Subject: timer: Remove init_timer_on_stack() in favor of timer_setup_on_stack() Remove uses of init_timer_on_stack() with open-coded function and data assignments that could be expressed using timer_setup_on_stack(). Several were removed from the stack entirely since there was a one-to-one mapping of parent structure to timer, those are switched to using timer_setup() instead. All related callbacks were adjusted to use from_timer(). Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Heiko Carstens Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: Wim Van Sebroeck Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" Cc: linux-scsi@vger.kernel.org Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Stephen Boyd Cc: Michael Reed Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Lai Jiangshan Cc: Tejun Heo Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: linux-watchdog@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Guenter Roeck Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-4-git-send-email-keescook@chromium.org --- include/linux/parport.h | 1 + include/linux/timer.h | 2 -- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/parport.h b/include/linux/parport.h index 58e3c64c6b49..397607a0c0eb 100644 --- a/include/linux/parport.h +++ b/include/linux/parport.h @@ -225,6 +225,7 @@ struct parport { struct pardevice *waittail; struct list_head list; + struct timer_list timer; unsigned int flags; void *sysctl_table; diff --git a/include/linux/timer.h b/include/linux/timer.h index d11e819a86e2..b10c4bdc6fbd 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -132,8 +132,6 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, __init_timer((timer), TIMER_PINNED) #define init_timer_deferrable(timer) \ __init_timer((timer), TIMER_DEFERRABLE) -#define init_timer_on_stack(timer) \ - __init_timer_on_stack((timer), 0) #define __setup_timer(_timer, _fn, _data, _flags) \ do { \ -- cgit v1.2.3 From 185981d54a60ae90942c6ba9006b250f3348cef2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:26:58 -0700 Subject: timer: Remove init_timer_pinned() in favor of timer_setup() This refactors the only users of init_timer_pinned() to use the new timer_setup() and from_timer(). Drops the definition of init_timer_pinned(). Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Acked-by: David S. Miller # for networking parts Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Stephen Boyd Cc: Guenter Roeck Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Tejun Heo Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Michael Reed Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-5-git-send-email-keescook@chromium.org --- include/linux/timer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index b10c4bdc6fbd..9da903562ed4 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -128,8 +128,6 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define init_timer(timer) \ __init_timer((timer), 0) -#define init_timer_pinned(timer) \ - __init_timer((timer), TIMER_PINNED) #define init_timer_deferrable(timer) \ __init_timer((timer), TIMER_DEFERRABLE) -- cgit v1.2.3 From df7e828c1b699792b2ff26ebcf0a6d1025b2b790 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:26:59 -0700 Subject: timer: Remove init_timer_deferrable() in favor of timer_setup() This refactors the only users of init_timer_deferrable() to use the new timer_setup() and from_timer(). Removes definition of init_timer_deferrable(). Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Acked-by: David S. Miller # for networking parts Acked-by: Sebastian Reichel # for drivers/hsi parts Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Oleg Nesterov Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: "James E.J. Bottomley" Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Stephen Boyd Cc: Guenter Roeck Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Tejun Heo Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: "Rafael J. Wysocki" Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: linux-wireless@vger.kernel.org Cc: Sebastian Reichel Cc: Ralf Baechle Cc: Stefan Richter Cc: Michael Reed Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-6-git-send-email-keescook@chromium.org --- include/linux/timer.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 9da903562ed4..10cc45ca5803 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -128,8 +128,6 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define init_timer(timer) \ __init_timer((timer), 0) -#define init_timer_deferrable(timer) \ - __init_timer((timer), TIMER_DEFERRABLE) #define __setup_timer(_timer, _fn, _data, _flags) \ do { \ -- cgit v1.2.3 From 51487d9ed1e386f9f0863bbf385e5da8a586bff8 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:27:01 -0700 Subject: timer: Remove last user of TIMER_INITIALIZER Drops the last user of TIMER_INITIALIZER and adapts timer.h to use the internal version. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: Mark Gross Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Stephen Boyd Cc: Michael Reed Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Tejun Heo Cc: Julian Wiedmann Cc: John Stultz Cc: "James E.J. Bottomley" Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Guenter Roeck Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-8-git-send-email-keescook@chromium.org --- include/linux/timer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 10cc45ca5803..4f7476e4a727 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -87,7 +87,7 @@ struct timer_list { #define DEFINE_TIMER(_name, _function, _expires, _data) \ struct timer_list _name = \ - TIMER_INITIALIZER(_function, _expires, _data) + __TIMER_INITIALIZER(_function, _expires, _data, 0) void init_timer_key(struct timer_list *timer, unsigned int flags, const char *name, struct lock_class_key *key); -- cgit v1.2.3 From fca7ce5b7c6d3eda8c2e011cff98d22e0fbd5097 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:27:02 -0700 Subject: timer: Remove unused static initializer macros This removes the now unused TIMER_*INITIALIZER macros: TIMER_INITIALIZER TIMER_PINNED_INITIALIZER TIMER_DEFERRED_INITIALIZER TIMER_PINNED_DEFERRED_INITIALIZER Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Stephen Boyd Cc: Michael Reed Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Tejun Heo Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Guenter Roeck Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-9-git-send-email-keescook@chromium.org --- include/linux/timer.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 4f7476e4a727..a33220311361 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -73,18 +73,6 @@ struct timer_list { __FILE__ ":" __stringify(__LINE__)) \ } -#define TIMER_INITIALIZER(_function, _expires, _data) \ - __TIMER_INITIALIZER((_function), (_expires), (_data), 0) - -#define TIMER_PINNED_INITIALIZER(_function, _expires, _data) \ - __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_PINNED) - -#define TIMER_DEFERRED_INITIALIZER(_function, _expires, _data) \ - __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE) - -#define TIMER_PINNED_DEFERRED_INITIALIZER(_function, _expires, _data) \ - __TIMER_INITIALIZER((_function), (_expires), (_data), TIMER_DEFERRABLE | TIMER_PINNED) - #define DEFINE_TIMER(_name, _function, _expires, _data) \ struct timer_list _name = \ __TIMER_INITIALIZER(_function, _expires, _data, 0) -- cgit v1.2.3 From 1d27e3e2252ba9d949ca82fbdb73cde102cb2067 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:27:04 -0700 Subject: timer: Remove expires and data arguments from DEFINE_TIMER Drop the arguments from the macro and adjust all callers with the following script: perl -pi -e 's/DEFINE_TIMER\((.*), 0, 0\);/DEFINE_TIMER($1);/g;' \ $(git grep DEFINE_TIMER | cut -d: -f1 | sort -u | grep -v timer.h) Signed-off-by: Kees Cook Acked-by: Geert Uytterhoeven # for m68k parts Acked-by: Guenter Roeck # for watchdog parts Acked-by: David S. Miller # for networking parts Acked-by: Greg Kroah-Hartman Acked-by: Kalle Valo # for wireless parts Acked-by: Arnd Bergmann Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Viresh Kumar Cc: Harish Patil Cc: Stephen Boyd Cc: Michael Reed Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Tejun Heo Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Guenter Roeck Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-11-git-send-email-keescook@chromium.org Signed-off-by: Thomas Gleixner --- include/linux/timer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index a33220311361..91e5a2cc81b5 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -73,9 +73,9 @@ struct timer_list { __FILE__ ":" __stringify(__LINE__)) \ } -#define DEFINE_TIMER(_name, _function, _expires, _data) \ +#define DEFINE_TIMER(_name, _function) \ struct timer_list _name = \ - __TIMER_INITIALIZER(_function, _expires, _data, 0) + __TIMER_INITIALIZER(_function, 0, 0, 0) void init_timer_key(struct timer_list *timer, unsigned int flags, const char *name, struct lock_class_key *key); -- cgit v1.2.3 From 8ede369b2cccfa585e2969bbed18edc0e2a18c50 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:27:05 -0700 Subject: timer: Remove expires argument from __TIMER_INITIALIZER() The expires field is normally initialized during the first mod_timer() call. It was unused by all callers, so remove it from the macro. Signed-off-by: Kees Cook Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Stephen Boyd Cc: Michael Reed Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Tejun Heo Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Guenter Roeck Cc: netdev@vger.kernel.org Cc: Martin Schwidefsky Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-12-git-send-email-keescook@chromium.org Signed-off-by: Thomas Gleixner --- include/linux/kthread.h | 2 +- include/linux/timer.h | 5 ++--- include/linux/workqueue.h | 2 +- 3 files changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 82e197eeac91..0d622b350d3f 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -117,7 +117,7 @@ struct kthread_delayed_work { #define KTHREAD_DELAYED_WORK_INIT(dwork, fn) { \ .work = KTHREAD_WORK_INIT((dwork).work, (fn)), \ .timer = __TIMER_INITIALIZER(kthread_delayed_work_timer_fn, \ - 0, (unsigned long)&(dwork), \ + (unsigned long)&(dwork), \ TIMER_IRQSAFE), \ } diff --git a/include/linux/timer.h b/include/linux/timer.h index 91e5a2cc81b5..10685c33e679 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -63,10 +63,9 @@ struct timer_list { #define TIMER_TRACE_FLAGMASK (TIMER_MIGRATING | TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE) -#define __TIMER_INITIALIZER(_function, _expires, _data, _flags) { \ +#define __TIMER_INITIALIZER(_function, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ - .expires = (_expires), \ .data = (_data), \ .flags = (_flags), \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ @@ -75,7 +74,7 @@ struct timer_list { #define DEFINE_TIMER(_name, _function) \ struct timer_list _name = \ - __TIMER_INITIALIZER(_function, 0, 0, 0) + __TIMER_INITIALIZER(_function, 0, 0) void init_timer_key(struct timer_list *timer, unsigned int flags, const char *name, struct lock_class_key *key); diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1c49431f3121..f4960260feaf 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -176,7 +176,7 @@ struct execute_work { #define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ .timer = __TIMER_INITIALIZER(delayed_work_timer_fn, \ - 0, (unsigned long)&(n), \ + (unsigned long)&(n), \ (tflags) | TIMER_IRQSAFE), \ } -- cgit v1.2.3 From fe5c3b69b540e3387223a696f327c1bb8880d1ac Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:27:06 -0700 Subject: kthread: Convert callback to use from_timer() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch kthread to use from_timer() and pass the timer pointer explicitly. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Len Brown Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Stephen Boyd Cc: Guenter Roeck Cc: Manish Chopra Cc: Petr Mladek Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Michael Reed Cc: netdev@vger.kernel.org Cc: Tejun Heo Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-13-git-send-email-keescook@chromium.org --- include/linux/kthread.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 0d622b350d3f..35cbe3b0ce5b 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -75,7 +75,7 @@ extern int tsk_fork_get_node(struct task_struct *tsk); */ struct kthread_work; typedef void (*kthread_work_func_t)(struct kthread_work *work); -void kthread_delayed_work_timer_fn(unsigned long __data); +void kthread_delayed_work_timer_fn(struct timer_list *t); enum { KTW_FREEZABLE = 1 << 0, /* freeze during suspend */ @@ -116,8 +116,8 @@ struct kthread_delayed_work { #define KTHREAD_DELAYED_WORK_INIT(dwork, fn) { \ .work = KTHREAD_WORK_INIT((dwork).work, (fn)), \ - .timer = __TIMER_INITIALIZER(kthread_delayed_work_timer_fn, \ - (unsigned long)&(dwork), \ + .timer = __TIMER_INITIALIZER((TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn,\ + (TIMER_DATA_TYPE)&(dwork.timer), \ TIMER_IRQSAFE), \ } @@ -164,8 +164,8 @@ extern void __kthread_init_worker(struct kthread_worker *worker, do { \ kthread_init_work(&(dwork)->work, (fn)); \ __setup_timer(&(dwork)->timer, \ - kthread_delayed_work_timer_fn, \ - (unsigned long)(dwork), \ + (TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn,\ + (TIMER_DATA_TYPE)&(dwork)->timer, \ TIMER_IRQSAFE); \ } while (0) -- cgit v1.2.3 From 8c20feb60604d91a29cd7fef8ac758bd92d9fd2c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 4 Oct 2017 16:27:07 -0700 Subject: workqueue: Convert callback to use from_timer() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch workqueue to use from_timer() and pass the timer pointer explicitly. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: linux-mips@linux-mips.org Cc: Petr Mladek Cc: Benjamin Herrenschmidt Cc: Lai Jiangshan Cc: Sebastian Reichel Cc: Kalle Valo Cc: Paul Mackerras Cc: Pavel Machek Cc: linux1394-devel@lists.sourceforge.net Cc: Chris Metcalf Cc: linux-s390@vger.kernel.org Cc: linux-wireless@vger.kernel.org Cc: "James E.J. Bottomley" Cc: Wim Van Sebroeck Cc: Michael Ellerman Cc: Ursula Braun Cc: Geert Uytterhoeven Cc: Viresh Kumar Cc: Harish Patil Cc: Stephen Boyd Cc: Guenter Roeck Cc: Manish Chopra Cc: Len Brown Cc: Arnd Bergmann Cc: linux-pm@vger.kernel.org Cc: Heiko Carstens Cc: Martin Schwidefsky Cc: Julian Wiedmann Cc: John Stultz Cc: Mark Gross Cc: linux-watchdog@vger.kernel.org Cc: linux-scsi@vger.kernel.org Cc: "Martin K. Petersen" Cc: Greg Kroah-Hartman Cc: "Rafael J. Wysocki" Cc: Oleg Nesterov Cc: Ralf Baechle Cc: Stefan Richter Cc: Michael Reed Cc: netdev@vger.kernel.org Cc: Tejun Heo Cc: Andrew Morton Cc: linuxppc-dev@lists.ozlabs.org Cc: Sudip Mukherjee Link: https://lkml.kernel.org/r/1507159627-127660-14-git-send-email-keescook@chromium.org --- include/linux/workqueue.h | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index f4960260feaf..f3c47a05fd06 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -17,7 +17,7 @@ struct workqueue_struct; struct work_struct; typedef void (*work_func_t)(struct work_struct *work); -void delayed_work_timer_fn(unsigned long __data); +void delayed_work_timer_fn(struct timer_list *t); /* * The first word is the work queue pointer and the flags rolled into @@ -175,8 +175,8 @@ struct execute_work { #define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ - .timer = __TIMER_INITIALIZER(delayed_work_timer_fn, \ - (unsigned long)&(n), \ + .timer = __TIMER_INITIALIZER((TIMER_FUNC_TYPE)delayed_work_timer_fn,\ + (TIMER_DATA_TYPE)&(n.timer), \ (tflags) | TIMER_IRQSAFE), \ } @@ -241,8 +241,9 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ - __setup_timer(&(_work)->timer, delayed_work_timer_fn, \ - (unsigned long)(_work), \ + __setup_timer(&(_work)->timer, \ + (TIMER_FUNC_TYPE)delayed_work_timer_fn, \ + (TIMER_DATA_TYPE)&(_work)->timer, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) @@ -250,8 +251,8 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } do { \ INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ __setup_timer_on_stack(&(_work)->timer, \ - delayed_work_timer_fn, \ - (unsigned long)(_work), \ + (TIMER_FUNC_TYPE)delayed_work_timer_fn,\ + (TIMER_DATA_TYPE)&(_work)->timer,\ (_tflags) | TIMER_IRQSAFE); \ } while (0) -- cgit v1.2.3 From a060c2104ef83e62346b7e893947a940471c0d7c Mon Sep 17 00:00:00 2001 From: Marc Gonzalez Date: Tue, 26 Sep 2017 12:22:54 +0200 Subject: of/pci: Add of_pci_dma_range_parser_init() for dma-ranges parsing support Several host bridge drivers duplicate of_pci_range_parser_init() in order to parse their dma-ranges property. Provide of_pci_dma_range_parser_init() for that use case. Signed-off-by: Marc Gonzalez Signed-off-by: Bjorn Helgaas Reviewed-by: Rob Herring Reviewed-by: Linus Walleij --- include/linux/of_address.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_address.h b/include/linux/of_address.h index 37864734ca50..8beed2de98e9 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -49,6 +49,8 @@ extern const __be32 *of_get_address(struct device_node *dev, int index, extern int of_pci_range_parser_init(struct of_pci_range_parser *parser, struct device_node *node); +extern int of_pci_dma_range_parser_init(struct of_pci_range_parser *parser, + struct device_node *node); extern struct of_pci_range *of_pci_range_parser_one( struct of_pci_range_parser *parser, struct of_pci_range *range); @@ -85,7 +87,13 @@ static inline const __be32 *of_get_address(struct device_node *dev, int index, static inline int of_pci_range_parser_init(struct of_pci_range_parser *parser, struct device_node *node) { - return -1; + return -ENOSYS; +} + +static inline int of_pci_dma_range_parser_init(struct of_pci_range_parser *parser, + struct device_node *node) +{ + return -ENOSYS; } static inline struct of_pci_range *of_pci_range_parser_one( -- cgit v1.2.3 From 753f612471819d3b3abba8c520eb3ce8f9d00fa5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Tue, 26 Sep 2017 12:53:23 -0500 Subject: PCI: Remove reset argument from pci_iov_{add,remove}_virtfn() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The "reset" argument passed to pci_iov_add_virtfn() and pci_iov_remove_virtfn() is always zero since 46cb7b1bd86f ("PCI: Remove unused SR-IOV VF Migration support") Remove the argument together with the associated code. Signed-off-by: Jan H. Schönherr Signed-off-by: Bjorn Helgaas Acked-by: Russell Currey --- include/linux/pci.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f4f8ee5a7362..75ac2af89e87 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1958,8 +1958,8 @@ int pci_iov_virtfn_devfn(struct pci_dev *dev, int id); int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn); void pci_disable_sriov(struct pci_dev *dev); -int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset); -void pci_iov_remove_virtfn(struct pci_dev *dev, int id, int reset); +int pci_iov_add_virtfn(struct pci_dev *dev, int id); +void pci_iov_remove_virtfn(struct pci_dev *dev, int id); int pci_num_vf(struct pci_dev *dev); int pci_vfs_assigned(struct pci_dev *dev); int pci_sriov_set_totalvfs(struct pci_dev *dev, u16 numvfs); @@ -1976,12 +1976,12 @@ static inline int pci_iov_virtfn_devfn(struct pci_dev *dev, int id) } static inline int pci_enable_sriov(struct pci_dev *dev, int nr_virtfn) { return -ENODEV; } -static inline int pci_iov_add_virtfn(struct pci_dev *dev, int id, int reset) +static inline int pci_iov_add_virtfn(struct pci_dev *dev, int id) { return -ENOSYS; } static inline void pci_iov_remove_virtfn(struct pci_dev *dev, - int id, int reset) { } + int id) { } static inline void pci_disable_sriov(struct pci_dev *dev) { } static inline int pci_num_vf(struct pci_dev *dev) { return 0; } static inline int pci_vfs_assigned(struct pci_dev *dev) -- cgit v1.2.3 From 79e699b648b93f76d0f6e692499d5c6a2295ef05 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Wed, 6 Sep 2017 01:21:23 +0200 Subject: PCI: Remove unused function __pci_reset_function() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The last caller of __pci_reset_function() has been removed. Remove the function as well. Signed-off-by: Jan H. Schönherr Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 75ac2af89e87..fb29d964a057 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1089,7 +1089,6 @@ int pcie_set_mps(struct pci_dev *dev, int mps); int pcie_get_minimum_link(struct pci_dev *dev, enum pci_bus_speed *speed, enum pcie_link_width *width); void pcie_flr(struct pci_dev *dev); -int __pci_reset_function(struct pci_dev *dev); int __pci_reset_function_locked(struct pci_dev *dev); int pci_reset_function(struct pci_dev *dev); int pci_reset_function_locked(struct pci_dev *dev); -- cgit v1.2.3 From 5fdee2127faa77c9c91862ad5e001dfab7013e92 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 5 Oct 2017 21:22:52 +0200 Subject: block: remove QUEUE_FLAG_STACKABLE We already have a queue_is_rq_based helper to check if a request_queue is request based, so we can remove the flag for it. Acked-by: Mike Snitzer Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 02fa42d24b52..9fb71fc7d0e8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -609,7 +609,6 @@ struct request_queue { #define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */ #define QUEUE_FLAG_FAIL_IO 7 /* fake timeout */ -#define QUEUE_FLAG_STACKABLE 8 /* supports request stacking */ #define QUEUE_FLAG_NONROT 9 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 10 /* do IO stats */ @@ -633,12 +632,10 @@ struct request_queue { #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_STACKABLE) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_ADD_RANDOM)) #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_STACKABLE) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_POLL)) @@ -722,8 +719,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) -#define blk_queue_stackable(q) \ - test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_queue_secure_erase(q) \ (test_bit(QUEUE_FLAG_SECERASE, &(q)->queue_flags)) -- cgit v1.2.3 From 3e234289f86b12985ef8909cd34525fcb66c4efb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 3 Mar 2017 18:00:22 -0500 Subject: ftrace: Allow module init functions to be traced Allow for module init sections to be traced as well as core kernel init sections. Now that filtering modules functions can be stored, for when they are loaded, it makes sense to be able to trace them. Cc: Jessica Yu Cc: Rusty Russell Signed-off-by: Steven Rostedt (VMware) --- include/linux/init.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init.h b/include/linux/init.h index 94769d687cf0..a779c1816437 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -39,7 +39,7 @@ /* These are for everybody (although not all archs will actually discard it in modules) */ -#define __init __section(.init.text) __cold __inittrace __latent_entropy +#define __init __section(.init.text) __cold __latent_entropy #define __initdata __section(.init.data) #define __initconst __section(.init.rodata) #define __exitdata __section(.exit.data) @@ -68,10 +68,8 @@ #ifdef MODULE #define __exitused -#define __inittrace notrace #else #define __exitused __used -#define __inittrace #endif #define __exit __section(.exit.text) __exitused __cold notrace -- cgit v1.2.3 From aba4b5c22cbac296f4081a0476d0c55828f135b4 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Fri, 1 Sep 2017 08:35:38 -0400 Subject: ftrace: Save module init functions kallsyms symbols for tracing If function tracing is active when the module init functions are freed, then store them to be referenced by kallsyms. As module init functions can now be traced on module load, they were useless: ># echo ':mod:snd_seq' > set_ftrace_filter ># echo function > current_tracer ># modprobe snd_seq ># cat trace # tracer: function # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | modprobe-2786 [000] .... 3189.037874: 0xffffffffa0860000 <-do_one_initcall modprobe-2786 [000] .... 3189.037876: 0xffffffffa086004d <-0xffffffffa086000f modprobe-2786 [000] .... 3189.037876: 0xffffffffa086010d <-0xffffffffa0860018 modprobe-2786 [000] .... 3189.037877: 0xffffffffa086011a <-0xffffffffa0860021 modprobe-2786 [000] .... 3189.037877: 0xffffffffa0860080 <-0xffffffffa086002a modprobe-2786 [000] .... 3189.039523: 0xffffffffa0860400 <-0xffffffffa0860033 modprobe-2786 [000] .... 3189.039523: 0xffffffffa086038a <-0xffffffffa086041c modprobe-2786 [000] .... 3189.039591: 0xffffffffa086038a <-0xffffffffa0860436 modprobe-2786 [000] .... 3189.039657: 0xffffffffa086038a <-0xffffffffa0860450 modprobe-2786 [000] .... 3189.039719: 0xffffffffa0860127 <-0xffffffffa086003c modprobe-2786 [000] .... 3189.039742: snd_seq_create_kernel_client <-0xffffffffa08601f6 When the output is shown, the kallsyms for the module init functions have already been freed, and the output of the trace can not convert them to their function names. Now this looks like this: # tracer: function # # _-----=> irqs-off # / _----=> need-resched # | / _---=> hardirq/softirq # || / _--=> preempt-depth # ||| / delay # TASK-PID CPU# |||| TIMESTAMP FUNCTION # | | | |||| | | modprobe-2463 [002] .... 174.243237: alsa_seq_init <-do_one_initcall modprobe-2463 [002] .... 174.243239: client_init_data <-alsa_seq_init modprobe-2463 [002] .... 174.243240: snd_sequencer_memory_init <-alsa_seq_init modprobe-2463 [002] .... 174.243240: snd_seq_queues_init <-alsa_seq_init modprobe-2463 [002] .... 174.243240: snd_sequencer_device_init <-alsa_seq_init modprobe-2463 [002] .... 174.244860: snd_seq_info_init <-alsa_seq_init modprobe-2463 [002] .... 174.244861: create_info_entry <-snd_seq_info_init modprobe-2463 [002] .... 174.244936: create_info_entry <-snd_seq_info_init modprobe-2463 [002] .... 174.245003: create_info_entry <-snd_seq_info_init modprobe-2463 [002] .... 174.245072: snd_seq_system_client_init <-alsa_seq_init modprobe-2463 [002] .... 174.245094: snd_seq_create_kernel_client <-snd_seq_system_client_init Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 47fc404ad233..202b40784c4e 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -51,6 +51,21 @@ static inline void early_trace_init(void) { } struct module; struct ftrace_hash; +#if defined(CONFIG_FUNCTION_TRACER) && defined(CONFIG_MODULES) && \ + defined(CONFIG_DYNAMIC_FTRACE) +const char * +ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym); +#else +static inline const char * +ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, + unsigned long *off, char **modname, char *sym) +{ + return NULL; +} +#endif + + #ifdef CONFIG_FUNCTION_TRACER extern int ftrace_enabled; @@ -151,10 +166,10 @@ struct ftrace_ops_hash { }; void ftrace_free_init_mem(void); -void ftrace_free_mem(void *start, void *end); +void ftrace_free_mem(struct module *mod, void *start, void *end); #else static inline void ftrace_free_init_mem(void) { } -static inline void ftrace_free_mem(void *start, void *end) { } +static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } #endif /* @@ -272,6 +287,7 @@ static inline int ftrace_nr_registered_ops(void) static inline void clear_ftrace_function(void) { } static inline void ftrace_kill(void) { } static inline void ftrace_free_init_mem(void) { } +static inline void ftrace_free_mem(struct module *mod, void *start, void *end) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_STACK_TRACER -- cgit v1.2.3 From 6171a0310a06a7a0cb83713fa7068bdd4192de19 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Wed, 6 Sep 2017 08:40:41 -0400 Subject: ftrace/kallsyms: Have /proc/kallsyms show saved mod init functions If a module is loaded while tracing is enabled, then there's a possibility that the module init functions were traced. These functions have their name and address stored by ftrace such that it can translate the function address that is written into the buffer into a human readable function name. As userspace tools may be doing the same, they need a way to map function names to their address as well. This is done through reading /proc/kallsyms. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 202b40784c4e..346f8294e40a 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -56,6 +56,9 @@ struct ftrace_hash; const char * ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, unsigned long *off, char **modname, char *sym); +int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported); #else static inline const char * ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, @@ -63,6 +66,12 @@ ftrace_mod_address_lookup(unsigned long addr, unsigned long *size, { return NULL; } +static inline int ftrace_mod_get_kallsym(unsigned int symnum, unsigned long *value, + char *type, char *name, + char *module_name, int *exported) +{ + return -1; +} #endif -- cgit v1.2.3 From e2080072ed2d98a55ae69d95dea60ff7a17cddd5 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Wed, 4 Oct 2017 12:59:58 -0700 Subject: tcp: new list for sent but unacked skbs for RACK recovery This patch adds a new queue (list) that tracks the sent but not yet acked or SACKed skbs for a TCP connection. The list is chronologically ordered by skb->skb_mstamp (the head is the oldest sent skb). This list will be used to optimize TCP Rack recovery, which checks an skb's timestamp to judge if it has been lost and needs to be retransmitted. Since TCP write queue is ordered by sequence instead of sent time, RACK has to scan over the write queue to catch all eligible packets to detect lost retransmission, and iterates through SACKed skbs repeatedly. Special cares for rare events: 1. TCP repair fakes skb transmission so the send queue needs adjusted 2. SACK reneging would require re-inserting SACKed skbs into the send queue. For now I believe it's not worth the complexity to make RACK work perfectly on SACK reneging, so we do nothing here. 3. Fast Open: currently for non-TFO, send-queue correctly queues the pure SYN packet. For TFO which queues a pure SYN and then a data packet, send-queue only queues the data packet but not the pure SYN due to the structure of TFO code. This is okay because the SYN receiver would never respond with a SACK on a missing SYN (i.e. SYN is never fast-retransmitted by SACK/RACK). In order to not grow sk_buff, we use an union for the new list and _skb_refdst/destructor fields. This is a bit complicated because we need to make sure _skb_refdst and destructor are properly zeroed before skb is cloned/copied at transmit, and before being freed. Signed-off-by: Eric Dumazet Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/skbuff.h | 11 +++++++++-- include/linux/tcp.h | 1 + 2 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ada821466e88..01a985937867 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -617,6 +617,7 @@ typedef unsigned char *sk_buff_data_t; * @nf_trace: netfilter packet trace flag * @protocol: Packet protocol from driver * @destructor: Destruct function + * @tcp_tsorted_anchor: list structure for TCP (tp->tsorted_sent_queue) * @_nfct: Associated connection, if any (with nfctinfo bits) * @nf_bridge: Saved data about a bridged frame - see br_netfilter.c * @skb_iif: ifindex of device we arrived on @@ -686,8 +687,14 @@ struct sk_buff { */ char cb[48] __aligned(8); - unsigned long _skb_refdst; - void (*destructor)(struct sk_buff *skb); + union { + struct { + unsigned long _skb_refdst; + void (*destructor)(struct sk_buff *skb); + }; + struct list_head tcp_tsorted_anchor; + }; + #ifdef CONFIG_XFRM struct sec_path *sp; #endif diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 4aa40ef02d32..1d2c44e09e31 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -191,6 +191,7 @@ struct tcp_sock { u32 tsoffset; /* timestamp offset */ struct list_head tsq_node; /* anchor in tsq_tasklet.head list */ + struct list_head tsorted_sent_queue; /* time-sorted sent but un-SACKed skbs */ u32 snd_wl1; /* Sequence for window update */ u32 snd_wnd; /* The window we expect to receive */ -- cgit v1.2.3 From ec154bf56b276a0bb36079a5d22a267b5f417801 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Fri, 6 Oct 2017 15:00:53 +0200 Subject: iommu/vt-d: Don't register bus-notifier under dmar_global_lock The notifier function will take the dmar_global_lock too, so lockdep complains about inverse locking order when the notifier is registered under the dmar_global_lock. Reported-by: Jan Kiszka Fixes: 59ce0515cdaf ('iommu/vt-d: Update DRHD/RMRR/ATSR device scope caches when PCI hotplug happens') Signed-off-by: Joerg Roedel --- include/linux/dmar.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dmar.h b/include/linux/dmar.h index e8ffba1052d3..e2433bc50210 100644 --- a/include/linux/dmar.h +++ b/include/linux/dmar.h @@ -112,6 +112,7 @@ static inline bool dmar_rcu_check(void) extern int dmar_table_init(void); extern int dmar_dev_scope_init(void); +extern void dmar_register_bus_notifier(void); extern int dmar_parse_dev_scope(void *start, void *end, int *cnt, struct dmar_dev_scope **devices, u16 segment); extern void *dmar_alloc_dev_scope(void *start, void *end, int *cnt); -- cgit v1.2.3 From 775d3a35dc3e13de55ec0e061c59e36faa7dd7f0 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 6 Oct 2017 08:15:15 -0600 Subject: backing-dev: kill unused pdflush_proc_obsolete() After commit b35bd0d9f8a8, pdflush_proc_obsolete() is no longer used. Kill the function and declaration. Reported-by: Rakesh Pandit Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 157e950a70dc..872afa41abc2 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -172,8 +172,6 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout); -int pdflush_proc_obsolete(struct ctl_table *table, int write, - void __user *buffer, size_t *lenp, loff_t *ppos); static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) { -- cgit v1.2.3 From 878e832ade6fc315e6ea59d95824bbb0430c6e8d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 6 Oct 2017 11:19:24 -0400 Subject: acct.h: get rid of detritus unused forward declarations of structs Signed-off-by: Al Viro --- include/linux/acct.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acct.h b/include/linux/acct.h index dccc2d4fe7de..3912a45b8c89 100644 --- a/include/linux/acct.h +++ b/include/linux/acct.h @@ -19,9 +19,6 @@ #ifdef CONFIG_BSD_PROCESS_ACCT -struct vfsmount; -struct super_block; -struct pacct_struct; struct pid_namespace; extern int acct_parm[]; /* for sysctl */ extern void acct_collect(long exitcode, int group_dead); -- cgit v1.2.3 From 55edd1dad96b760c87f5b9e2c461ba551a625f44 Mon Sep 17 00:00:00 2001 From: David Lin Date: Wed, 13 Sep 2017 10:53:58 -0700 Subject: leds: Replace flags bit shift with BIT() macros This is for readability as well as to avoid checkpatch warnings when adding new bit flag information in the future. Signed-off-by: David Lin Signed-off-by: Jacek Anaszewski --- include/linux/leds.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index bf6db4fe895b..5579c64c8fd6 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -40,16 +40,16 @@ struct led_classdev { int flags; /* Lower 16 bits reflect status */ -#define LED_SUSPENDED (1 << 0) -#define LED_UNREGISTERING (1 << 1) +#define LED_SUSPENDED BIT(0) +#define LED_UNREGISTERING BIT(1) /* Upper 16 bits reflect control information */ -#define LED_CORE_SUSPENDRESUME (1 << 16) -#define LED_SYSFS_DISABLE (1 << 17) -#define LED_DEV_CAP_FLASH (1 << 18) -#define LED_HW_PLUGGABLE (1 << 19) -#define LED_PANIC_INDICATOR (1 << 20) -#define LED_BRIGHT_HW_CHANGED (1 << 21) -#define LED_RETAIN_AT_SHUTDOWN (1 << 22) +#define LED_CORE_SUSPENDRESUME BIT(16) +#define LED_SYSFS_DISABLE BIT(17) +#define LED_DEV_CAP_FLASH BIT(18) +#define LED_HW_PLUGGABLE BIT(19) +#define LED_PANIC_INDICATOR BIT(20) +#define LED_BRIGHT_HW_CHANGED BIT(21) +#define LED_RETAIN_AT_SHUTDOWN BIT(22) /* set_brightness_work / blink_timer flags, atomic, private. */ unsigned long work_flags; -- cgit v1.2.3 From 18a4c0eab2623cc95be98a1e6af1ad18e7695977 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Oct 2017 22:21:21 -0700 Subject: net: add rb_to_skb() and other rb tree helpers Geeralize private netem_rb_to_skb() TCP rtx queue will soon be converted to rb-tree, so we will need skb_rbtree_walk() helpers. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 01a985937867..03634ec2f918 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3158,6 +3158,12 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) return __skb_grow(skb, len); } +#define rb_to_skb(rb) rb_entry_safe(rb, struct sk_buff, rbnode) +#define skb_rb_first(root) rb_to_skb(rb_first(root)) +#define skb_rb_last(root) rb_to_skb(rb_last(root)) +#define skb_rb_next(skb) rb_to_skb(rb_next(&(skb)->rbnode)) +#define skb_rb_prev(skb) rb_to_skb(rb_prev(&(skb)->rbnode)) + #define skb_queue_walk(queue, skb) \ for (skb = (queue)->next; \ skb != (struct sk_buff *)(queue); \ @@ -3172,6 +3178,18 @@ static inline int __skb_grow_rcsum(struct sk_buff *skb, unsigned int len) for (; skb != (struct sk_buff *)(queue); \ skb = skb->next) +#define skb_rbtree_walk(skb, root) \ + for (skb = skb_rb_first(root); skb != NULL; \ + skb = skb_rb_next(skb)) + +#define skb_rbtree_walk_from(skb) \ + for (; skb != NULL; \ + skb = skb_rb_next(skb)) + +#define skb_rbtree_walk_from_safe(skb, tmp) \ + for (; tmp = skb ? skb_rb_next(skb) : NULL, (skb != NULL); \ + skb = tmp) + #define skb_queue_walk_from_safe(queue, skb, tmp) \ for (tmp = skb->next; \ skb != (struct sk_buff *)(queue); \ -- cgit v1.2.3 From cf5868c8a22dc2854b96e9569064bb92365549ca Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 8 Sep 2017 20:57:10 +0200 Subject: padata: ensure the reorder timer callback runs on the correct CPU The reorder timer function runs on the CPU where the timer interrupt was handled which is not necessarily one of the CPUs of the 'pcpu' CPU mask set. Ensure the padata_reorder() callback runs on the correct CPU, which is one in the 'pcpu' CPU mask set and, preferrably, the next expected one. Do so by comparing the current CPU with the expected target CPU. If they match, call padata_reorder() right away. If they differ, schedule a work item on the target CPU that does the padata_reorder() call for us. Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu --- include/linux/padata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 2f9c1f93b1ce..5c0175bbc179 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -85,6 +85,7 @@ struct padata_serial_queue { * @swork: work struct for serialization. * @pd: Backpointer to the internal control structure. * @work: work struct for parallelization. + * @reorder_work: work struct for reordering. * @num_obj: Number of objects that are processed by this cpu. * @cpu_index: Index of the cpu. */ @@ -93,6 +94,7 @@ struct padata_parallel_queue { struct padata_list reorder; struct parallel_data *pd; struct work_struct work; + struct work_struct reorder_work; atomic_t num_obj; int cpu_index; }; -- cgit v1.2.3 From 350ef88e7e922354f82a931897ad4a4ce6c686ff Mon Sep 17 00:00:00 2001 From: Mathias Krause Date: Fri, 8 Sep 2017 20:57:11 +0200 Subject: padata: ensure padata_do_serial() runs on the correct CPU If the algorithm we're parallelizing is asynchronous we might change CPUs between padata_do_parallel() and padata_do_serial(). However, we don't expect this to happen as we need to enqueue the padata object into the per-cpu reorder queue we took it from, i.e. the same-cpu's parallel queue. Ensure we're not switching CPUs for a given padata object by tracking the CPU within the padata object. If the serial callback gets called on the wrong CPU, defer invoking padata_reorder() via a kernel worker on the CPU we're expected to run on. Signed-off-by: Mathias Krause Signed-off-by: Herbert Xu --- include/linux/padata.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/padata.h b/include/linux/padata.h index 5c0175bbc179..5d13d25da2c8 100644 --- a/include/linux/padata.h +++ b/include/linux/padata.h @@ -37,6 +37,7 @@ * @list: List entry, to attach to the padata lists. * @pd: Pointer to the internal control structure. * @cb_cpu: Callback cpu for serializatioon. + * @cpu: Cpu for parallelization. * @seq_nr: Sequence number of the parallelized data object. * @info: Used to pass information from the parallel to the serial function. * @parallel: Parallel execution function. @@ -46,6 +47,7 @@ struct padata_priv { struct list_head list; struct parallel_data *pd; int cb_cpu; + int cpu; int info; void (*parallel)(struct padata_priv *padata); void (*serial)(struct padata_priv *padata); -- cgit v1.2.3 From 703321b60b605b1f381f5dcf0bca42703b912e3e Mon Sep 17 00:00:00 2001 From: Matthew Auld Date: Fri, 6 Oct 2017 23:18:13 +0100 Subject: mm/shmem: introduce shmem_file_setup_with_mnt We are planning to use our own tmpfs mnt in i915 in place of the shm_mnt, such that we can control the mount options, in particular huge=, which we require to support huge-gtt-pages. So rather than roll our own version of __shmem_file_setup, it would be preferred if we could just give shmem our mnt, and let it do the rest. Signed-off-by: Matthew Auld Cc: Joonas Lahtinen Cc: Chris Wilson Cc: Dave Hansen Cc: Kirill A. Shutemov Cc: Hugh Dickins Cc: linux-mm@kvack.org Acked-by: Andrew Morton Acked-by: Kirill A. Shutemov Reviewed-by: Joonas Lahtinen Link: https://patchwork.freedesktop.org/patch/msgid/20171006145041.21673-2-matthew.auld@intel.com Signed-off-by: Chris Wilson Link: https://patchwork.freedesktop.org/patch/msgid/20171006221833.32439-1-chris@chris-wilson.co.uk --- include/linux/shmem_fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/shmem_fs.h b/include/linux/shmem_fs.h index b6c3540e07bc..0937d9a7d8fb 100644 --- a/include/linux/shmem_fs.h +++ b/include/linux/shmem_fs.h @@ -53,6 +53,8 @@ extern struct file *shmem_file_setup(const char *name, loff_t size, unsigned long flags); extern struct file *shmem_kernel_file_setup(const char *name, loff_t size, unsigned long flags); +extern struct file *shmem_file_setup_with_mnt(struct vfsmount *mnt, + const char *name, loff_t size, unsigned long flags); extern int shmem_zero_setup(struct vm_area_struct *); extern unsigned long shmem_get_unmapped_area(struct file *, unsigned long addr, unsigned long len, unsigned long pgoff, unsigned long flags); -- cgit v1.2.3 From f3d0d8d938b4d71be1f7fb003ca2ac9250b3be4d Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 24 Sep 2017 19:39:12 +0200 Subject: mtd: nand: gpio: Convert to use GPIO descriptors There is exactly one board in the kernel that defines platform data for the GPIO NAND driver. Use the feature to provide a lookup table for the GPIOs in the board file so we can convert the driver as a whole to just use GPIO descriptors. After this we can cut the use of and use the GPIO descriptor management from alone to grab and use the GPIOs used in the driver. I also created a local struct device *dev in the probe() function because I was getting annoyed with all the &pdev->dev dereferencing. Cc: arm@kernel.org Cc: Mike Rapoport Cc: Frans Klaver Cc: Gerhard Sittig Cc: Jamie Iles Signed-off-by: Linus Walleij Reviewed-by: Marek Vasut Acked-by: Jamie Iles Acked-by: Olof Johansson Acked-by: Robert Jarzmik Signed-off-by: Boris Brezillon --- include/linux/mtd/nand-gpio.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand-gpio.h b/include/linux/mtd/nand-gpio.h index be4f45d89be2..98f71908212d 100644 --- a/include/linux/mtd/nand-gpio.h +++ b/include/linux/mtd/nand-gpio.h @@ -4,11 +4,6 @@ #include struct gpio_nand_platdata { - int gpio_nce; - int gpio_nwp; - int gpio_cle; - int gpio_ale; - int gpio_rdy; void (*adjust_parts)(struct gpio_nand_platdata *, size_t); struct mtd_partition *parts; unsigned int num_parts; -- cgit v1.2.3 From a1c4d24e02d093efd84cbde417051d2e767fa8fa Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Sep 2017 08:43:38 -0700 Subject: linux/log2.h: fix kernel-doc notation Fix kernel-doc: - Add kernel-doc notation to some functions. - Fix kernel-doc notation in function parameters. Signed-off-by: Randy Dunlap Signed-off-by: Jonathan Corbet --- include/linux/log2.h | 42 +++++++++++++++++++++++------------------- 1 file changed, 23 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index c373295f359f..41a1ae010993 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -37,19 +37,23 @@ int __ilog2_u64(u64 n) } #endif -/* - * Determine whether some value is a power of two, where zero is +/** + * is_power_of_2() - check if a value is a power of two + * @n: the value to check + * + * Determine whether some value is a power of two, where zero is * *not* considered a power of two. + * Return: true if @n is a power of 2, otherwise false. */ - static inline __attribute__((const)) bool is_power_of_2(unsigned long n) { return (n != 0 && ((n & (n - 1)) == 0)); } -/* - * round up to nearest power of two +/** + * __roundup_pow_of_two() - round up to nearest power of two + * @n: value to round up */ static inline __attribute__((const)) unsigned long __roundup_pow_of_two(unsigned long n) @@ -57,8 +61,9 @@ unsigned long __roundup_pow_of_two(unsigned long n) return 1UL << fls_long(n - 1); } -/* - * round down to nearest power of two +/** + * __rounddown_pow_of_two() - round down to nearest power of two + * @n: value to round down */ static inline __attribute__((const)) unsigned long __rounddown_pow_of_two(unsigned long n) @@ -67,12 +72,12 @@ unsigned long __rounddown_pow_of_two(unsigned long n) } /** - * ilog2 - log of base 2 of 32-bit or a 64-bit unsigned value - * @n - parameter + * ilog2 - log base 2 of 32-bit or a 64-bit unsigned value + * @n: parameter * * constant-capable log of base 2 calculation * - this can be used to initialise global variables from constant data, hence - * the massive ternary operator construction + * the massive ternary operator construction * * selects the appropriately-sized optimised version depending on sizeof(n) */ @@ -150,7 +155,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) /** * roundup_pow_of_two - round the given value up to nearest power of two - * @n - parameter + * @n: parameter * * round the given value up to the nearest power of two * - the result is undefined when n == 0 @@ -167,7 +172,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) /** * rounddown_pow_of_two - round the given value down to nearest power of two - * @n - parameter + * @n: parameter * * round the given value down to the nearest power of two * - the result is undefined when n == 0 @@ -180,6 +185,12 @@ unsigned long __rounddown_pow_of_two(unsigned long n) __rounddown_pow_of_two(n) \ ) +static inline __attribute_const__ +int __order_base_2(unsigned long n) +{ + return n > 1 ? ilog2(n - 1) + 1 : 0; +} + /** * order_base_2 - calculate the (rounded up) base 2 order of the argument * @n: parameter @@ -193,13 +204,6 @@ unsigned long __rounddown_pow_of_two(unsigned long n) * ob2(5) = 3 * ... and so on. */ - -static inline __attribute_const__ -int __order_base_2(unsigned long n) -{ - return n > 1 ? ilog2(n - 1) + 1 : 0; -} - #define order_base_2(n) \ ( \ __builtin_constant_p(n) ? ( \ -- cgit v1.2.3 From 078843f75d234123e654b992d97e8ae8a744ba23 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Sat, 30 Sep 2017 08:43:45 -0700 Subject: math64: add missing kernel-doc notation Add missing kernel-doc notation (function parameters) for several div() functions. Signed-off-by: Randy Dunlap Signed-off-by: Jonathan Corbet --- include/linux/math64.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/math64.h b/include/linux/math64.h index 80690c96c734..8dd2488bf289 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -11,6 +11,11 @@ /** * div_u64_rem - unsigned 64bit divide with 32bit divisor with remainder + * @dividend: unsigned 64bit dividend + * @divisor: unsigned 32bit divisor + * @remainder: pointer to unsigned 32bit remainder + * + * Return: sets ``*remainder``, then returns dividend / divisor * * This is commonly provided by 32bit archs to provide an optimized 64bit * divide. @@ -23,6 +28,11 @@ static inline u64 div_u64_rem(u64 dividend, u32 divisor, u32 *remainder) /** * div_s64_rem - signed 64bit divide with 32bit divisor with remainder + * @dividend: signed 64bit dividend + * @divisor: signed 32bit divisor + * @remainder: pointer to signed 32bit remainder + * + * Return: sets ``*remainder``, then returns dividend / divisor */ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) { @@ -32,6 +42,11 @@ static inline s64 div_s64_rem(s64 dividend, s32 divisor, s32 *remainder) /** * div64_u64_rem - unsigned 64bit divide with 64bit divisor and remainder + * @dividend: unsigned 64bit dividend + * @divisor: unsigned 64bit divisor + * @remainder: pointer to unsigned 64bit remainder + * + * Return: sets ``*remainder``, then returns dividend / divisor */ static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) { @@ -41,6 +56,10 @@ static inline u64 div64_u64_rem(u64 dividend, u64 divisor, u64 *remainder) /** * div64_u64 - unsigned 64bit divide with 64bit divisor + * @dividend: unsigned 64bit dividend + * @divisor: unsigned 64bit divisor + * + * Return: dividend / divisor */ static inline u64 div64_u64(u64 dividend, u64 divisor) { @@ -49,6 +68,10 @@ static inline u64 div64_u64(u64 dividend, u64 divisor) /** * div64_s64 - signed 64bit divide with 64bit divisor + * @dividend: signed 64bit dividend + * @divisor: signed 64bit divisor + * + * Return: dividend / divisor */ static inline s64 div64_s64(s64 dividend, s64 divisor) { @@ -88,6 +111,8 @@ extern s64 div64_s64(s64 dividend, s64 divisor); /** * div_u64 - unsigned 64bit divide with 32bit divisor + * @dividend: unsigned 64bit dividend + * @divisor: unsigned 32bit divisor * * This is the most common 64bit divide and should be used if possible, * as many 32bit archs can optimize this variant better than a full 64bit @@ -103,6 +128,8 @@ static inline u64 div_u64(u64 dividend, u32 divisor) /** * div_s64 - signed 64bit divide with 32bit divisor + * @dividend: signed 64bit dividend + * @divisor: signed 32bit divisor */ #ifndef div_s64 static inline s64 div_s64(s64 dividend, s32 divisor) -- cgit v1.2.3 From 97562633bcbac4a07d605ae628d7655fa71caaf5 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:19 -0700 Subject: bpf: perf event change needed for subsequent bpf helpers This patch does not impact existing functionalities. It contains the changes in perf event area needed for subsequent bpf_perf_event_read_value and bpf_perf_prog_read_value helpers. Signed-off-by: Yonghong Song Acked-by: Peter Zijlstra (Intel) Signed-off-by: David S. Miller --- include/linux/perf_event.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e22f24ded6a..79b18a20cf5d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -806,6 +806,7 @@ struct perf_output_handle { struct bpf_perf_event_data_kern { struct pt_regs *regs; struct perf_sample_data *data; + struct perf_event *event; }; #ifdef CONFIG_CGROUP_PERF @@ -884,7 +885,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, void *context); extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); -int perf_event_read_local(struct perf_event *event, u64 *value); +int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -1286,7 +1288,8 @@ static inline const struct perf_event_attr *perf_event_attrs(struct perf_event * { return ERR_PTR(-EINVAL); } -static inline int perf_event_read_local(struct perf_event *event, u64 *value) +static inline int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running) { return -EINVAL; } -- cgit v1.2.3 From 64237470ddf97b63155fbd272c9e743e01d5f514 Mon Sep 17 00:00:00 2001 From: Lin Zhang Date: Fri, 6 Oct 2017 01:37:29 +0800 Subject: net: phonet: mark header_ops as const Signed-off-by: Lin Zhang Signed-off-by: David S. Miller --- include/linux/if_phonet.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/if_phonet.h b/include/linux/if_phonet.h index bbcdb0a767d8..a118ee4a8428 100644 --- a/include/linux/if_phonet.h +++ b/include/linux/if_phonet.h @@ -10,5 +10,5 @@ #include -extern struct header_ops phonet_header_ops; +extern const struct header_ops phonet_header_ops; #endif -- cgit v1.2.3 From 067cae47771c864604969fd902efe10916e0d79c Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Thu, 5 Oct 2017 21:52:12 -0700 Subject: bpf: Use char in prog and map name Instead of u8, use char for prog and map name. It can avoid the userspace tool getting compiler's signess warning. The bpf_prog_aux, bpf_map, bpf_attr, bpf_prog_info and bpf_map_info are changed. Signed-off-by: Martin KaFai Lau Cc: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index a67daea731ab..bc7da2ddfcaf 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -56,7 +56,7 @@ struct bpf_map { struct work_struct work; atomic_t usercnt; struct bpf_map *inner_map_meta; - u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; }; /* function argument constraints */ @@ -189,7 +189,7 @@ struct bpf_prog_aux { struct bpf_prog *prog; struct user_struct *user; u64 load_time; /* ns since boottime */ - u8 name[BPF_OBJ_NAME_LEN]; + char name[BPF_OBJ_NAME_LEN]; union { struct work_struct work; struct rcu_head rcu; -- cgit v1.2.3 From 821f1b21cabb46827ce39ddf82e2789680b5042a Mon Sep 17 00:00:00 2001 From: Roopa Prabhu Date: Fri, 6 Oct 2017 22:12:37 -0700 Subject: bridge: add new BR_NEIGH_SUPPRESS port flag to suppress arp and nd flood This patch adds a new bridge port flag BR_NEIGH_SUPPRESS to suppress arp and nd flood on bridge ports. It implements rfc7432, section 10. https://tools.ietf.org/html/rfc7432#section-10 for ethernet VPN deployments. It is similar to the existing BR_PROXYARP* flags but has a few semantic differences to conform to EVPN standard. Unlike the existing flags, this new flag suppresses flood of all neigh discovery packets (arp and nd) to tunnel ports. Supports both vlan filtering and non-vlan filtering bridges. In case of EVPN, it is mainly used to avoid flooding of arp and nd packets to tunnel ports like vxlan. This patch adds netlink and sysfs support to set this bridge port flag. Signed-off-by: Roopa Prabhu Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 3cd18ac0697f..316ee113a220 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -49,6 +49,7 @@ struct br_ip_list { #define BR_MULTICAST_TO_UNICAST BIT(12) #define BR_VLAN_TUNNEL BIT(13) #define BR_BCAST_FLOOD BIT(14) +#define BR_NEIGH_SUPPRESS BIT(15) #define BR_DEFAULT_AGEING_TIME (300 * HZ) -- cgit v1.2.3 From f8e0731db4a02c8c5624e1dce6dc983210a51f64 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Fri, 15 Sep 2017 11:53:07 +0200 Subject: dma-fence: fix dma_fence_get_rcu_safe v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When dma_fence_get_rcu() fails to acquire a reference it doesn't necessary mean that there is no fence at all. It usually mean that the fence was replaced by a new one and in this situation we certainly want to have the new one as result and *NOT* NULL. v2: Keep extra check after dma_fence_get_rcu(). Signed-off-by: Christian König Cc: Chris Wilson Cc: Daniel Vetter Cc: Sumit Semwal Cc: linux-media@vger.kernel.org Cc: dri-devel@lists.freedesktop.org Cc: linaro-mm-sig@lists.linaro.org Reviewed-by: Maarten Lankhorst Signed-off-by: Alex Deucher Link: https://patchwork.freedesktop.org/patch/msgid/1505469187-3565-1-git-send-email-deathsimple@vodafone.de --- include/linux/dma-fence.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index 171895072435..ca974224d92e 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -248,9 +248,12 @@ dma_fence_get_rcu_safe(struct dma_fence * __rcu *fencep) struct dma_fence *fence; fence = rcu_dereference(*fencep); - if (!fence || !dma_fence_get_rcu(fence)) + if (!fence) return NULL; + if (!dma_fence_get_rcu(fence)) + continue; + /* The atomic_inc_not_zero() inside dma_fence_get_rcu() * provides a full memory barrier upon success (such as now). * This is paired with the write barrier from assigning -- cgit v1.2.3 From 0912bda436388a02c72164b4b490b578e64c012e Mon Sep 17 00:00:00 2001 From: Yotam Gigi Date: Mon, 9 Oct 2017 11:15:32 +0200 Subject: net: bridge: Export bridge multicast router state Add an access function that, given a bridge netdevice, returns whether the bridge device is currently an mrouter or not. The function uses the already existing br_multicast_is_router function to check that. This function is needed in order to allow ports that join an already existing bridge to know the current mrouter state of the bridge device. Together with the bridge device mrouter ports switchdev notifications, it is possible to have full offloading of the semantics of the bridge device mcast router state. Due to the fact that the bridge multicast router status can change in packet RX path, take the multicast_router bridge spinlock to protect the read. Signed-off-by: Yotam Gigi Reviewed-by: Nogah Frankel Reviewed-by: Nikolay Aleksandrov Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index 316ee113a220..02639ebea2f0 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -64,6 +64,7 @@ int br_multicast_list_adjacent(struct net_device *dev, bool br_multicast_has_querier_anywhere(struct net_device *dev, int proto); bool br_multicast_has_querier_adjacent(struct net_device *dev, int proto); bool br_multicast_enabled(const struct net_device *dev); +bool br_multicast_router(const struct net_device *dev); #else static inline int br_multicast_list_adjacent(struct net_device *dev, struct list_head *br_ip_list) @@ -84,6 +85,10 @@ static inline bool br_multicast_enabled(const struct net_device *dev) { return false; } +static inline bool br_multicast_router(const struct net_device *dev) +{ + return false; +} #endif #if IS_ENABLED(CONFIG_BRIDGE) && IS_ENABLED(CONFIG_BRIDGE_VLAN_FILTERING) -- cgit v1.2.3 From ed468ebee04ffba0231a8f50616bdb250752a891 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:44 +0300 Subject: qed: Add ll2 ability of opening a secondary queue When more than one ll2 queue is opened ( that is not an OOO queue ) ll2 code does not have enough information to determine whether the queue is the main one or not, so a new field is added to the acquire input data to expose the control of determining whether the queue is the main queue or a secondary queue. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 89fa0bbd54f3..d7cca590b743 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -171,6 +171,7 @@ struct qed_ll2_acquire_data_inputs { enum qed_ll2_tx_dest tx_dest; enum qed_ll2_error_handle ai_err_packet_too_big; enum qed_ll2_error_handle ai_err_no_buf; + bool secondary_queue; u8 gsi_enable; }; -- cgit v1.2.3 From 77caa792f5d8e4ecc88eb1cf4b9c478c07e0ec57 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:45 +0300 Subject: qed: Add ll2 option for dropping a tx packet The option of sending a packet on the ll2 and dropping it exists in hardware and was not used until now, thus not exposed. The iWARP unaligned MPA flow requires this functionality for flushing the tx queue. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index d7cca590b743..95fdf02a3bbe 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -64,6 +64,7 @@ enum qed_ll2_roce_flavor_type { enum qed_ll2_tx_dest { QED_LL2_TX_DEST_NW, /* Light L2 TX Destination to the Network */ QED_LL2_TX_DEST_LB, /* Light L2 TX Destination to the Loopback */ + QED_LL2_TX_DEST_DROP, /* Light L2 Drop the TX packet */ QED_LL2_TX_DEST_MAX }; -- cgit v1.2.3 From 6f34a284f36399501fcc034dc4522a2d8d9fa6c9 Mon Sep 17 00:00:00 2001 From: Michal Kalderon Date: Mon, 9 Oct 2017 12:37:48 +0300 Subject: qed: Add LL2 slowpath handling For iWARP unaligned MPA flow, a slowpath event of flushing an MPA connection that entered an unaligned state is required. The flush ramrod is received on the ll2 queue, and a pre-registered callback function is called to handle the flush event. Signed-off-by: Michal Kalderon Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_ll2_if.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_ll2_if.h b/include/linux/qed/qed_ll2_if.h index 95fdf02a3bbe..e755954d85fd 100644 --- a/include/linux/qed/qed_ll2_if.h +++ b/include/linux/qed/qed_ll2_if.h @@ -151,11 +151,16 @@ void (*qed_ll2_release_tx_packet_cb)(void *cxt, dma_addr_t first_frag_addr, bool b_last_fragment, bool b_last_packet); +typedef +void (*qed_ll2_slowpath_cb)(void *cxt, u8 connection_handle, + u32 opaque_data_0, u32 opaque_data_1); + struct qed_ll2_cbs { qed_ll2_complete_rx_packet_cb rx_comp_cb; qed_ll2_release_rx_packet_cb rx_release_cb; qed_ll2_complete_tx_packet_cb tx_comp_cb; qed_ll2_release_tx_packet_cb tx_release_cb; + qed_ll2_slowpath_cb slowpath_cb; void *cookie; }; -- cgit v1.2.3 From e72a060151e5bb673af24993665e270fc4f674a7 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 30 Aug 2017 13:50:39 +0200 Subject: iio: st_sensors: add register mask for status register Introduce register mask for data-ready status register since pressure sensors (e.g. LPS22HB) export just two channels (BIT(0) and BIT(1)) and BIT(2) is marked reserved while in st_sensors_new_samples_available() value read from status register is masked using 0x7. Moreover do not mask status register using active_scan_mask since now status value is properly masked and if the result is not zero the interrupt has to be consumed by the driver. This fix an issue on LPS25H and LPS331AP where channel definition is swapped respect to status register. Furthermore that change allows to properly support new devices (e.g LIS2DW12) that report just ZYXDA (data-ready) field in status register to figure out if the interrupt has been generated by the device. Fixes: 97865fe41322 (iio: st_sensors: verify interrupt event to status) Signed-off-by: Lorenzo Bianconi Reviewed-by: Linus Walleij Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 7b0fa8b5c120..ce0ef1c0a30a 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -139,7 +139,7 @@ struct st_sensor_das { * @mask_ihl: mask to enable/disable active low on the INT lines. * @addr_od: address to enable/disable Open Drain on the INT lines. * @mask_od: mask to enable/disable Open Drain on the INT lines. - * @addr_stat_drdy: address to read status of DRDY (data ready) interrupt + * struct stat_drdy - status register of DRDY (data ready) interrupt. * struct ig1 - represents the Interrupt Generator 1 of sensors. * @en_addr: address of the enable ig1 register. * @en_mask: mask to write the on/off value for enable. @@ -152,7 +152,10 @@ struct st_sensor_data_ready_irq { u8 mask_ihl; u8 addr_od; u8 mask_od; - u8 addr_stat_drdy; + struct { + u8 addr; + u8 mask; + } stat_drdy; struct { u8 en_addr; u8 en_mask; -- cgit v1.2.3 From 75d4c6d2e15d4455dfd5995c27e6e6ad6f214e39 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Wed, 30 Aug 2017 13:50:40 +0200 Subject: iio: st_sensors: decouple irq1 configuration parameters from the irq2 ones Separate data-ready configuration parameters for INT1 and INT2 pins in st_sensor_data_ready_irq data structure. That change will be use to properly support LIS2DW12 accel sensor. Signed-off-by: Lorenzo Bianconi Reviewed-by: Linus Walleij Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 16 ++++++++++------ 1 file changed, 10 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index ce0ef1c0a30a..e6c646d5d6d4 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -132,9 +132,8 @@ struct st_sensor_das { /** * struct st_sensor_data_ready_irq - ST sensor device data-ready interrupt - * @addr: address of the register. - * @mask_int1: mask to enable/disable IRQ on INT1 pin. - * @mask_int2: mask to enable/disable IRQ on INT2 pin. + * struct int1 - data-ready configuration register for INT1 pin. + * struct int2 - data-ready configuration register for INT2 pin. * @addr_ihl: address to enable/disable active low on the INT lines. * @mask_ihl: mask to enable/disable active low on the INT lines. * @addr_od: address to enable/disable Open Drain on the INT lines. @@ -145,9 +144,14 @@ struct st_sensor_das { * @en_mask: mask to write the on/off value for enable. */ struct st_sensor_data_ready_irq { - u8 addr; - u8 mask_int1; - u8 mask_int2; + struct { + u8 addr; + u8 mask; + } int1; + struct { + u8 addr; + u8 mask; + } int2; u8 addr_ihl; u8 mask_ihl; u8 addr_od; -- cgit v1.2.3 From 6733bab7bc09b67668028dab562caea1b4ff3c69 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 18 Aug 2017 10:59:16 -0700 Subject: irq_work: Map irq_work_on_queue() to irq_work_on() in !SMP Commit 478850160636 ("irq_work: Implement remote queueing") provides irq_work_on_queue() only for SMP builds. However, providing it simplifies code that submits irq_work to lists of CPUs, eliminating the !SMP special cases. This commit therefore maps irq_work_on_queue() to irq_work_on() in !SMP builds, but validating the specified CPU. Signed-off-by: Paul E. McKenney --- include/linux/irq_work.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq_work.h b/include/linux/irq_work.h index 47b9ebd4a74f..d8c9876d5da4 100644 --- a/include/linux/irq_work.h +++ b/include/linux/irq_work.h @@ -33,10 +33,7 @@ void init_irq_work(struct irq_work *work, void (*func)(struct irq_work *)) #define DEFINE_IRQ_WORK(name, _f) struct irq_work name = { .func = (_f), } bool irq_work_queue(struct irq_work *work); - -#ifdef CONFIG_SMP bool irq_work_queue_on(struct irq_work *work, int cpu); -#endif void irq_work_tick(void); void irq_work_sync(struct irq_work *work); -- cgit v1.2.3 From 8055af0a4fddb45a8cd925fb9bc71f4b52628c9a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 6 Oct 2017 09:08:34 +0200 Subject: ACPI / PM: Remove stale function header acpi_dev_pm_get_node() isn't used or implemented, so remove it. Signed-off-by: Ulf Hansson Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 502af53ec012..3b89b4fe6812 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -868,17 +868,12 @@ int acpi_dev_runtime_suspend(struct device *dev); int acpi_dev_runtime_resume(struct device *dev); int acpi_subsys_runtime_suspend(struct device *dev); int acpi_subsys_runtime_resume(struct device *dev); -struct acpi_device *acpi_dev_pm_get_node(struct device *dev); int acpi_dev_pm_attach(struct device *dev, bool power_on); #else static inline int acpi_dev_runtime_suspend(struct device *dev) { return 0; } static inline int acpi_dev_runtime_resume(struct device *dev) { return 0; } static inline int acpi_subsys_runtime_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_runtime_resume(struct device *dev) { return 0; } -static inline struct acpi_device *acpi_dev_pm_get_node(struct device *dev) -{ - return NULL; -} static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) { return -ENODEV; -- cgit v1.2.3 From cf4c950b87ee2f547ad3abd3aca6ae3f3eb3443f Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 14:30:52 -0700 Subject: once: switch to new jump label API Switch the DO_ONCE() macro from the deprecated jump label API to the new one. The new one is more readable, and for DO_ONCE() it also makes the generated code more icache-friendly: now the one-time initialization code is placed out-of-line at the jump target, rather than at the inline fallthrough case. Acked-by: Hannes Frederic Sowa Signed-off-by: Eric Biggers Signed-off-by: David S. Miller --- include/linux/once.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/once.h b/include/linux/once.h index 9c98aaa87cbc..724724918e8b 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -5,7 +5,7 @@ #include bool __do_once_start(bool *done, unsigned long *flags); -void __do_once_done(bool *done, struct static_key *once_key, +void __do_once_done(bool *done, struct static_key_true *once_key, unsigned long *flags); /* Call a function exactly once. The idea of DO_ONCE() is to perform @@ -38,8 +38,8 @@ void __do_once_done(bool *done, struct static_key *once_key, ({ \ bool ___ret = false; \ static bool ___done = false; \ - static struct static_key ___once_key = STATIC_KEY_INIT_TRUE; \ - if (static_key_true(&___once_key)) { \ + static DEFINE_STATIC_KEY_TRUE(___once_key); \ + if (static_branch_unlikely(&___once_key)) { \ unsigned long ___flags; \ ___ret = __do_once_start(&___done, &___flags); \ if (unlikely(___ret)) { \ -- cgit v1.2.3 From 1d48b080bcce0a5e7d7aa2dbcdb35deefc188c3f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Sep 2017 13:50:16 +0200 Subject: sched/debug: Rename task-state printing helpers Steve requested better names for the new task-state helper functions. So introduce the concept of task-state index for the printing and rename __get_task_state() to task_state_index() and __task_state_to_char() to task_index_to_char(). Requested-by: Steven Rostedt Signed-off-by: Peter Zijlstra (Intel) Acked-by: Steven Rostedt Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20170929115016.pzlqc7ss3ccystyg@hirez.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index bdd6ad6fcce1..33a01f4deb00 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1248,7 +1248,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) #define TASK_REPORT_IDLE (TASK_REPORT + 1) #define TASK_REPORT_MAX (TASK_REPORT_IDLE << 1) -static inline unsigned int __get_task_state(struct task_struct *tsk) +static inline unsigned int task_state_index(struct task_struct *tsk) { unsigned int tsk_state = READ_ONCE(tsk->state); unsigned int state = (tsk_state | tsk->exit_state) & TASK_REPORT; @@ -1261,7 +1261,7 @@ static inline unsigned int __get_task_state(struct task_struct *tsk) return fls(state); } -static inline char __task_state_to_char(unsigned int state) +static inline char task_index_to_char(unsigned int state) { static const char state_char[] = "RSDTtXZPI"; @@ -1272,7 +1272,7 @@ static inline char __task_state_to_char(unsigned int state) static inline char task_state_to_char(struct task_struct *tsk) { - return __task_state_to_char(__get_task_state(tsk)); + return task_index_to_char(task_state_index(tsk)); } /** -- cgit v1.2.3 From 799ba82de01e7543f6b2042e1a739f3a20255f23 Mon Sep 17 00:00:00 2001 From: luca abeni Date: Thu, 7 Sep 2017 12:09:31 +0200 Subject: sched/deadline: Use C bitfields for the state flags Ask the compiler to use a single bit for storing true / false values, instead of wasting the size of a whole int value. Tested with gcc 5.4.0 on x86_64, and the compiler produces the expected Assembly (similar to the Assembly code generated when explicitly accessing the bits with bitmasks, "&" and "|"). Signed-off-by: luca abeni Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Daniel Bristot de Oliveira Cc: Juri Lelli Cc: Linus Torvalds Cc: Mathieu Poirier Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1504778971-13573-5-git-send-email-luca.abeni@santannapisa.it Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 33a01f4deb00..0f897dfc195e 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -474,10 +474,10 @@ struct sched_dl_entity { * conditions between the inactive timer handler and the wakeup * code. */ - int dl_throttled; - int dl_boosted; - int dl_yielded; - int dl_non_contending; + int dl_throttled : 1; + int dl_boosted : 1; + int dl_yielded : 1; + int dl_non_contending : 1; /* * Bandwidth enforcement timer. Each -deadline task has its -- cgit v1.2.3 From ff0d4a9dc16b1f4c954f6407c233ab848bdfe8b0 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 4 Oct 2017 17:49:00 +0200 Subject: sched/rt: Add a helper to test for a RT task This helper returns true if a task has elevated priority which is true for RT tasks (SCHED_RR and SCHED_FIFO) and also for SCHED_DEADLINE. A task which runs at RT priority due to PI-boosting is not considered as one with elevated priority. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Jens Axboe Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171004154901.26904-1-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/sched/rt.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/rt.h b/include/linux/sched/rt.h index f93329aba31a..133001627ba1 100644 --- a/include/linux/sched/rt.h +++ b/include/linux/sched/rt.h @@ -17,6 +17,17 @@ static inline int rt_task(struct task_struct *p) return rt_prio(p->prio); } +static inline bool task_is_realtime(struct task_struct *tsk) +{ + int policy = tsk->policy; + + if (policy == SCHED_FIFO || policy == SCHED_RR) + return true; + if (policy == SCHED_DEADLINE) + return true; + return false; +} + #ifdef CONFIG_RT_MUTEXES /* * Must hold either p->pi_lock or task_rq(p)->lock. -- cgit v1.2.3 From 36436440cd19f59f5be12a1b181d299af2725140 Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Wed, 4 Oct 2017 17:49:01 +0200 Subject: block/ioprio: Use a helper to check for RT prio A side-effect to the old code is that now SCHED_DEADLINE is also recognized. Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Jens Axboe Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171004154901.26904-2-bigeasy@linutronix.de Signed-off-by: Ingo Molnar --- include/linux/ioprio.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 8c1239020d79..2f19aab84a4a 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -2,6 +2,7 @@ #define IOPRIO_H #include +#include #include /* @@ -62,7 +63,7 @@ static inline int task_nice_ioclass(struct task_struct *task) { if (task->policy == SCHED_IDLE) return IOPRIO_CLASS_IDLE; - else if (task->policy == SCHED_FIFO || task->policy == SCHED_RR) + else if (task_is_realtime(task)) return IOPRIO_CLASS_RT; else return IOPRIO_CLASS_BE; -- cgit v1.2.3 From 76f8507f7a6442215df19de74f07eabca2462f1e Mon Sep 17 00:00:00 2001 From: Kirill Tkhai Date: Fri, 29 Sep 2017 19:06:38 +0300 Subject: locking/rwsem: Add down_read_killable() Similar to down_read() and down_write_killable(), add killable version of down_read(), based on __down_read_killable() function, added in previous patches. Signed-off-by: Kirill Tkhai Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: arnd@arndb.de Cc: avagin@virtuozzo.com Cc: davem@davemloft.net Cc: fenghua.yu@intel.com Cc: gorcunov@virtuozzo.com Cc: heiko.carstens@de.ibm.com Cc: hpa@zytor.com Cc: ink@jurassic.park.msu.ru Cc: mattst88@gmail.com Cc: rientjes@google.com Cc: rth@twiddle.net Cc: schwidefsky@de.ibm.com Cc: tony.luck@intel.com Cc: viro@zeniv.linux.org.uk Link: http://lkml.kernel.org/r/150670119884.23930.2585570605960763239.stgit@localhost.localdomain Signed-off-by: Ingo Molnar --- include/linux/rwsem.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rwsem.h b/include/linux/rwsem.h index 0ad7318ff299..6ac8ee5f15dd 100644 --- a/include/linux/rwsem.h +++ b/include/linux/rwsem.h @@ -111,6 +111,7 @@ static inline int rwsem_is_contended(struct rw_semaphore *sem) * lock for reading */ extern void down_read(struct rw_semaphore *sem); +extern int __must_check down_read_killable(struct rw_semaphore *sem); /* * trylock for reading -- returns 1 if successful, 0 if contention -- cgit v1.2.3 From a8a217c22116eff6c120d753c9934089fb229af0 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 3 Oct 2017 19:25:27 +0100 Subject: locking/core: Remove {read,spin,write}_can_lock() Outside of the locking code itself, {read,spin,write}_can_lock() have no users in tree. Apparmor (the last remaining user of write_can_lock()) got moved over to lockdep by the previous patch. This patch removes the use of {read,spin,write}_can_lock() from the BUILD_LOCK_OPS macro, deferring to the trylock operation for testing the lock status, and subsequently removes the unused macros altogether. They aren't guaranteed to work in a concurrent environment and can give incorrect results in the case of qrwlock. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507055129-12300-2-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/rwlock.h | 3 --- include/linux/spinlock.h | 11 ----------- include/linux/spinlock_up.h | 3 --- 3 files changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index bc2994ed66e1..766c5ca5cbd1 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -50,9 +50,6 @@ do { \ # define do_raw_write_unlock(rwlock) do {arch_write_unlock(&(rwlock)->raw_lock); __release(lock); } while (0) #endif -#define read_can_lock(rwlock) arch_read_can_lock(&(rwlock)->raw_lock) -#define write_can_lock(rwlock) arch_write_can_lock(&(rwlock)->raw_lock) - /* * Define the various rw_lock methods. Note we define these * regardless of whether CONFIG_SMP or CONFIG_PREEMPT are set. The various diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 69e079c5ff98..1e3e48041800 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -278,12 +278,6 @@ static inline void do_raw_spin_unlock(raw_spinlock_t *lock) __releases(lock) 1 : ({ local_irq_restore(flags); 0; }); \ }) -/** - * raw_spin_can_lock - would raw_spin_trylock() succeed? - * @lock: the spinlock in question. - */ -#define raw_spin_can_lock(lock) (!raw_spin_is_locked(lock)) - /* Include rwlock functions */ #include @@ -396,11 +390,6 @@ static __always_inline int spin_is_contended(spinlock_t *lock) return raw_spin_is_contended(&lock->rlock); } -static __always_inline int spin_can_lock(spinlock_t *lock) -{ - return raw_spin_can_lock(&lock->rlock); -} - #define assert_spin_locked(lock) assert_raw_spin_locked(&(lock)->rlock) /* diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 612fb530af41..901cf8f44388 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -77,7 +77,4 @@ static inline void arch_spin_unlock(arch_spinlock_t *lock) #define arch_spin_is_contended(lock) (((void)(lock), 0)) -#define arch_read_can_lock(lock) (((void)(lock), 1)) -#define arch_write_can_lock(lock) (((void)(lock), 1)) - #endif /* __LINUX_SPINLOCK_UP_H */ -- cgit v1.2.3 From a4c1887d4c1462b0ec5a8989f8ba3cdd9057a299 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 3 Oct 2017 19:25:29 +0100 Subject: locking/arch: Remove dummy arch_{read,spin,write}_lock_flags() implementations The arch_{read,spin,write}_lock_flags() macros are simply mapped to the non-flags versions by the majority of architectures, so do this in core code and remove the dummy implementations. Also remove the implementation in spinlock_up.h, since all callers of do_raw_spin_lock_flags() call local_irq_save(flags) anyway. Signed-off-by: Will Deacon Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: paulmck@linux.vnet.ibm.com Link: http://lkml.kernel.org/r/1507055129-12300-4-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/rwlock.h | 9 +++++++++ include/linux/spinlock.h | 4 ++++ include/linux/spinlock_up.h | 8 -------- 3 files changed, 13 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rwlock.h b/include/linux/rwlock.h index 766c5ca5cbd1..3dcd617e65ae 100644 --- a/include/linux/rwlock.h +++ b/include/linux/rwlock.h @@ -38,6 +38,15 @@ do { \ extern int do_raw_write_trylock(rwlock_t *lock); extern void do_raw_write_unlock(rwlock_t *lock) __releases(lock); #else + +#ifndef arch_read_lock_flags +# define arch_read_lock_flags(lock, flags) arch_read_lock(lock) +#endif + +#ifndef arch_write_lock_flags +# define arch_write_lock_flags(lock, flags) arch_write_lock(lock) +#endif + # define do_raw_read_lock(rwlock) do {__acquire(lock); arch_read_lock(&(rwlock)->raw_lock); } while (0) # define do_raw_read_lock_flags(lock, flags) \ do {__acquire(lock); arch_read_lock_flags(&(lock)->raw_lock, *(flags)); } while (0) diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index 1e3e48041800..4e202b00dd66 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -165,6 +165,10 @@ static inline void do_raw_spin_lock(raw_spinlock_t *lock) __acquires(lock) arch_spin_lock(&lock->raw_lock); } +#ifndef arch_spin_lock_flags +#define arch_spin_lock_flags(lock, flags) arch_spin_lock(lock) +#endif + static inline void do_raw_spin_lock_flags(raw_spinlock_t *lock, unsigned long *flags) __acquires(lock) { diff --git a/include/linux/spinlock_up.h b/include/linux/spinlock_up.h index 901cf8f44388..0ac9112c1bbe 100644 --- a/include/linux/spinlock_up.h +++ b/include/linux/spinlock_up.h @@ -32,14 +32,6 @@ static inline void arch_spin_lock(arch_spinlock_t *lock) barrier(); } -static inline void -arch_spin_lock_flags(arch_spinlock_t *lock, unsigned long flags) -{ - local_irq_save(flags); - lock->slock = 0; - barrier(); -} - static inline int arch_spin_trylock(arch_spinlock_t *lock) { char oldval = lock->slock; -- cgit v1.2.3 From de8cd83e91bc3ee212b3e6ec6e4283af9e4ab269 Mon Sep 17 00:00:00 2001 From: Steve Grubb Date: Mon, 2 Oct 2017 20:21:39 -0400 Subject: audit: Record fanotify access control decisions The fanotify interface allows user space daemons to make access control decisions. Under common criteria requirements, we need to optionally record decisions based on policy. This patch adds a bit mask, FAN_AUDIT, that a user space daemon can 'or' into the response decision which will tell the kernel that it made a decision and record it. It would be used something like this in user space code: response.response = FAN_DENY | FAN_AUDIT; write(fd, &response, sizeof(struct fanotify_response)); When the syscall ends, the audit system will record the decision as a AUDIT_FANOTIFY auxiliary record to denote that the reason this event occurred is the result of an access control decision from fanotify rather than DAC or MAC policy. A sample event looks like this: type=PATH msg=audit(1504310584.332:290): item=0 name="./evil-ls" inode=1319561 dev=fc:03 mode=0100755 ouid=1000 ogid=1000 rdev=00:00 obj=unconfined_u:object_r:user_home_t:s0 nametype=NORMAL type=CWD msg=audit(1504310584.332:290): cwd="/home/sgrubb" type=SYSCALL msg=audit(1504310584.332:290): arch=c000003e syscall=2 success=no exit=-1 a0=32cb3fca90 a1=0 a2=43 a3=8 items=1 ppid=901 pid=959 auid=1000 uid=1000 gid=1000 euid=1000 suid=1000 fsuid=1000 egid=1000 sgid=1000 fsgid=1000 tty=pts1 ses=3 comm="bash" exe="/usr/bin/bash" subj=unconfined_u:unconfined_r:unconfined_t: s0-s0:c0.c1023 key=(null) type=FANOTIFY msg=audit(1504310584.332:290): resp=2 Prior to using the audit flag, the developer needs to call fanotify_init or'ing in FAN_ENABLE_AUDIT to ensure that the kernel supports auditing. The calling process must also have the CAP_AUDIT_WRITE capability. Signed-off-by: sgrubb Reviewed-by: Amir Goldstein Signed-off-by: Jan Kara --- include/linux/audit.h | 10 ++++++++++ include/linux/fsnotify_backend.h | 1 + 2 files changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index cb708eb8accc..d66220dac364 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -356,6 +356,7 @@ extern int __audit_log_bprm_fcaps(struct linux_binprm *bprm, extern void __audit_log_capset(const struct cred *new, const struct cred *old); extern void __audit_mmap_fd(int fd, int flags); extern void __audit_log_kern_module(char *name); +extern void __audit_fanotify(unsigned int response); static inline void audit_ipc_obj(struct kern_ipc_perm *ipcp) { @@ -452,6 +453,12 @@ static inline void audit_log_kern_module(char *name) __audit_log_kern_module(name); } +static inline void audit_fanotify(unsigned int response) +{ + if (!audit_dummy_context()) + __audit_fanotify(response); +} + extern int audit_n_rules; extern int audit_signals; #else /* CONFIG_AUDITSYSCALL */ @@ -568,6 +575,9 @@ static inline void audit_log_kern_module(char *name) { } +static inline void audit_fanotify(unsigned int response) +{ } + static inline void audit_ptrace(struct task_struct *t) { } #define audit_n_rules 0 diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c6c69318752b..4a474f972910 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -190,6 +190,7 @@ struct fsnotify_group { int f_flags; unsigned int max_marks; struct user_struct *user; + bool audit; } fanotify_data; #endif /* CONFIG_FANOTIFY */ }; -- cgit v1.2.3 From 8264c3214f28b52b399d9e03bfa7feec275a0d71 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Mon, 9 Oct 2017 13:34:41 +0300 Subject: writeback: merge try_to_writeback_inodes_sb_nr() into caller Since commit 925a6efb8ff0c ("Btrfs: stop using try_to_writeback_inodes_sb_nr to flush delalloc") this function hasn't been used outside so stop exporting it. In addition we merge it into try_to_writeback_inodes_sb() which is the only caller. Also change return type of try_to_writeback_inodes_sb to void as the only user ext4 doesn't care. Reviewed-by: Jan Kara Signed-off-by: Rakesh Pandit Signed-off-by: Jens Axboe --- include/linux/writeback.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index dd1d2c23f743..e15ec14085ad 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -163,9 +163,7 @@ struct bdi_writeback; void writeback_inodes_sb(struct super_block *, enum wb_reason reason); void writeback_inodes_sb_nr(struct super_block *, unsigned long nr, enum wb_reason reason); -bool try_to_writeback_inodes_sb(struct super_block *, enum wb_reason reason); -bool try_to_writeback_inodes_sb_nr(struct super_block *, unsigned long nr, - enum wb_reason reason); +void try_to_writeback_inodes_sb(struct super_block *sb, enum wb_reason reason); void sync_inodes_sb(struct super_block *); void wakeup_flusher_threads(enum wb_reason reason); void wakeup_flusher_threads_bdi(struct backing_dev_info *bdi, -- cgit v1.2.3 From 46dde01f6bab35d99af111fcc02ca3ee1146050f Mon Sep 17 00:00:00 2001 From: Kamal Dasu Date: Tue, 22 Aug 2017 16:45:21 -0400 Subject: mtd: spi-nor: add spi_nor_init() function This patch extracts some chunks from spi_nor_init_params and spi_nor_scan() and moves them into a new spi_nor_init() function. Indeed, spi_nor_init() regroups all the required SPI flash commands to be sent to the SPI flash memory before performing any runtime operations (Fast Read, Page Program, Sector Erase, ...). Hence spi_nor_init(): 1) removes the flash protection if applicable for certain vendors. 2) sets the Quad Enable bit, if needed, before using Quad SPI protocols. 3) makes the memory enter its (stateful) 4-byte address mode, if needed, for SPI flash memory > 128Mbits not supporting the 4-byte address instruction set. spi_nor_scan() now ends by calling spi_nor_init() once the probe phase has completed. Further patches could also use spi_nor_init() to implement the mtd->_resume() handler for the spi-nor framework. Signed-off-by: Kamal Dasu Signed-off-by: Cyrille Pitchen --- include/linux/mtd/spi-nor.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 1f0a7fc7772f..d0c66a0975cf 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -231,11 +231,18 @@ enum spi_nor_option_flags { SNOR_F_USE_CLSR = BIT(5), }; +/** + * struct flash_info - Forward declaration of a structure used internally by + * spi_nor_scan() + */ +struct flash_info; + /** * struct spi_nor - Structure for defining a the SPI NOR layer * @mtd: point to a mtd_info structure * @lock: the lock for the read/write/erase/lock/unlock operations * @dev: point to a spi device, or a spi nor controller device. + * @info: spi-nor part JDEC MFR id and other info * @page_size: the page size of the SPI NOR * @addr_width: number of address bytes * @erase_opcode: the opcode for erasing a sector @@ -262,6 +269,7 @@ enum spi_nor_option_flags { * @flash_lock: [FLASH-SPECIFIC] lock a region of the SPI NOR * @flash_unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR * @flash_is_locked: [FLASH-SPECIFIC] check if a region of the SPI NOR is + * @quad_enable: [FLASH-SPECIFIC] enables SPI NOR quad mode * completely locked * @priv: the private data */ @@ -269,6 +277,7 @@ struct spi_nor { struct mtd_info mtd; struct mutex lock; struct device *dev; + const struct flash_info *info; u32 page_size; u8 addr_width; u8 erase_opcode; @@ -296,6 +305,7 @@ struct spi_nor { int (*flash_lock)(struct spi_nor *nor, loff_t ofs, uint64_t len); int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len); int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len); + int (*quad_enable)(struct spi_nor *nor); void *priv; }; -- cgit v1.2.3 From e7bf8249e8f1bac64885eeccb55bcf6111901a81 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:10 -0700 Subject: bpf: encapsulate verifier log state into a structure Put the loose log_* variables into a structure. This will make it simpler to remove the global verifier state in following patches. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b8d200f60a40..163541ba70d9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -115,6 +115,19 @@ struct bpf_insn_aux_data { #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +struct bpf_verifer_log { + u32 level; + char *kbuf; + char __user *ubuf; + u32 len_used; + u32 len_total; +}; + +static inline bool bpf_verifier_log_full(const struct bpf_verifer_log *log) +{ + return log->len_used >= log->len_total - 1; +} + struct bpf_verifier_env; struct bpf_ext_analyzer_ops { int (*insn_hook)(struct bpf_verifier_env *env, -- cgit v1.2.3 From 61bd5218eef349fcacc4976a251bc83a4748b4af Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:11 -0700 Subject: bpf: move global verifier log into verifier environment The biggest piece of global state protected by the verifier lock is the verifier_log. Move that log to struct bpf_verifier_env. struct bpf_verifier_env has to be passed now to all invocations of verbose(). Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 163541ba70d9..5ddb9a626a51 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -152,6 +152,8 @@ struct bpf_verifier_env { bool allow_ptr_leaks; bool seen_direct_write; struct bpf_insn_aux_data *insn_aux_data; /* array of per-insn state */ + + struct bpf_verifer_log log; }; int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, -- cgit v1.2.3 From a2a7d5701052542cd2260e7659b12443e0a74733 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 9 Oct 2017 10:30:15 -0700 Subject: bpf: write back the verifier log buffer as it gets filled Verifier log buffer can be quite large (up to 16MB currently). As Eric Dumazet points out if we allow multiple verification requests to proceed simultaneously, malicious user may use the verifier as a way of allocating large amounts of unswappable memory to OOM the host. Switch to a strategy of allocating a smaller buffer (1024B) and writing it out into the user buffer after every print. While at it remove the old BUG_ON(). This is in preparation of the global verifier lock removal. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 5ddb9a626a51..f00ef751c1c5 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -115,9 +115,11 @@ struct bpf_insn_aux_data { #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ +#define BPF_VERIFIER_TMP_LOG_SIZE 1024 + struct bpf_verifer_log { u32 level; - char *kbuf; + char kbuf[BPF_VERIFIER_TMP_LOG_SIZE]; char __user *ubuf; u32 len_used; u32 len_total; -- cgit v1.2.3 From a542f9a04d30570d367770e34f2c5d0c1d313337 Mon Sep 17 00:00:00 2001 From: Lorenzo Bianconi Date: Sun, 17 Sep 2017 18:17:09 +0200 Subject: iio: st_sensors: split open-drain parameters for irq1 and irq2 Define st_sensor_int_drdy structure in st_sensor_data_ready_irq in order to contain irq line parameters of the device. Moreover separate data-ready open-drain configuration parameters for INT1 and INT2 pins in st_sensor_data_ready_irq data structure. That change will be used to properly support LIS3DHH accel sensor. Signed-off-by: Lorenzo Bianconi Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 28 ++++++++++++++++------------ 1 file changed, 16 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index e6c646d5d6d4..f9bd6e8ab138 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -130,32 +130,36 @@ struct st_sensor_das { u8 mask; }; +/** + * struct st_sensor_int_drdy - ST sensor device drdy line parameters + * @addr: address of INT drdy register. + * @mask: mask to enable drdy line. + * @addr_od: address to enable/disable Open Drain on the INT line. + * @mask_od: mask to enable/disable Open Drain on the INT line. + */ +struct st_sensor_int_drdy { + u8 addr; + u8 mask; + u8 addr_od; + u8 mask_od; +}; + /** * struct st_sensor_data_ready_irq - ST sensor device data-ready interrupt * struct int1 - data-ready configuration register for INT1 pin. * struct int2 - data-ready configuration register for INT2 pin. * @addr_ihl: address to enable/disable active low on the INT lines. * @mask_ihl: mask to enable/disable active low on the INT lines. - * @addr_od: address to enable/disable Open Drain on the INT lines. - * @mask_od: mask to enable/disable Open Drain on the INT lines. * struct stat_drdy - status register of DRDY (data ready) interrupt. * struct ig1 - represents the Interrupt Generator 1 of sensors. * @en_addr: address of the enable ig1 register. * @en_mask: mask to write the on/off value for enable. */ struct st_sensor_data_ready_irq { - struct { - u8 addr; - u8 mask; - } int1; - struct { - u8 addr; - u8 mask; - } int2; + struct st_sensor_int_drdy int1; + struct st_sensor_int_drdy int2; u8 addr_ihl; u8 mask_ihl; - u8 addr_od; - u8 mask_od; struct { u8 addr; u8 mask; -- cgit v1.2.3 From eca8b53a6769e60d6d8240d71202d73b0af81901 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 6 Oct 2017 17:55:59 -0700 Subject: blk-stat: delete useless code Fix two issues: - the per-cpu stat flush is unnecessary, nobody uses per-cpu stat except sum it to global stat. We can do the calculation there. The flush just wastes cpu time. - some fields are signed int/s64. I don't see the point. Reviewed-by: Omar Sandoval Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a2d2aa709cef..3385c89f402e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -329,11 +329,10 @@ static inline bool blk_qc_t_is_internal(blk_qc_t cookie) } struct blk_rq_stat { - s64 mean; + u64 mean; u64 min; u64 max; - s32 nr_samples; - s32 nr_batch; + u32 nr_samples; u64 batch; }; -- cgit v1.2.3 From d59158162e032917a428704160a2063a02405ec6 Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Tue, 10 Oct 2017 15:51:37 -0700 Subject: tracing: Add support for preempt and irq enable/disable events Preempt and irq trace events can be used for tracing the start and end of an atomic section which can be used by a trace viewer like systrace to graphically view the start and end of an atomic section and correlate them with latencies and scheduling issues. This also serves as a prelude to using synthetic events or probes to rewrite the preempt and irqsoff tracers, along with numerous benefits of using trace events features for these events. Link: http://lkml.kernel.org/r/20171006005432.14244-3-joelaf@google.com Link: http://lkml.kernel.org/r/20171010225137.17370-1-joelaf@google.com Cc: Peter Zilstra Cc: kernel-team@android.com Signed-off-by: Joel Fernandes Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 346f8294e40a..1f8545caa691 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -769,7 +769,8 @@ static inline unsigned long get_lock_parent_ip(void) static inline void time_hardirqs_off(unsigned long a0, unsigned long a1) { } #endif -#ifdef CONFIG_PREEMPT_TRACER +#if defined(CONFIG_PREEMPT_TRACER) || \ + (defined(CONFIG_DEBUG_PREEMPT) && defined(CONFIG_PREEMPTIRQ_EVENTS)) extern void trace_preempt_on(unsigned long a0, unsigned long a1); extern void trace_preempt_off(unsigned long a0, unsigned long a1); #else -- cgit v1.2.3 From 604a7aeb4325b8ecb23df163c89fc12248302a4e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 5 Oct 2017 17:26:21 +0530 Subject: PM / OPP: Rename dev_pm_opp_register_put_opp_helper() The routine is named incorrectly since the first attempt as there is nothing like a put_opp() helper. We wanted to unregister the set_opp() helper here and so it should rather be named as dev_pm_opp_unregister_set_opp_helper(). Signed-off-by: Viresh Kumar Reviewed-by: Stephen Boyd Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 51ec727b4824..849d21dc4ca7 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -124,7 +124,7 @@ void dev_pm_opp_put_regulators(struct opp_table *opp_table); struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); -void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table); +void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); @@ -243,7 +243,7 @@ static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device return ERR_PTR(-ENOTSUPP); } -static inline void dev_pm_opp_register_put_opp_helper(struct opp_table *opp_table) {} +static inline void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) {} static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) { -- cgit v1.2.3 From e901b9873876ca30a09253731bd3a6b00c44b5b0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Wed, 4 Oct 2017 16:15:59 +0200 Subject: usb: core: Add a helper function to check the validity of EP type in URB This patch adds a new helper function to perform a sanity check of the given URB to see whether it contains a valid endpoint. It's a light- weight version of what usb_submit_urb() does, but without the kernel warning followed by the stack trace, just returns an error code. Especially for a driver that doesn't parse the descriptor but fills the URB with the fixed endpoint (e.g. some quirks for non-compliant devices), this kind of check is preferable at the probe phase before actually submitting the urb. Tested-by: Andrey Konovalov Acked-by: Greg Kroah-Hartman Signed-off-by: Takashi Iwai --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index cb9fbd54386e..2b861804fffa 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1728,6 +1728,8 @@ static inline int usb_urb_dir_out(struct urb *urb) return (urb->transfer_flags & URB_DIR_MASK) == URB_DIR_OUT; } +int usb_urb_ep_type_check(const struct urb *urb); + void *usb_alloc_coherent(struct usb_device *dev, size_t size, gfp_t mem_flags, dma_addr_t *dma); void usb_free_coherent(struct usb_device *dev, size_t size, -- cgit v1.2.3 From 63705c406a8adbd6f26691148b09d466dd4d8d2f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 10 Oct 2017 18:49:22 +0200 Subject: ACPI / PM: Combine two identical device resume routines Notice that acpi_dev_runtime_resume() and acpi_dev_resume_early() are actually literally identical after some more-or-less recent changes, so rename acpi_dev_runtime_resume() to acpi_dev_resume(), use it everywhere instead of acpi_dev_resume_early() and drop the latter. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- include/linux/acpi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 3b89b4fe6812..d18c92d4ba19 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -865,7 +865,7 @@ static inline void arch_reserve_mem_area(acpi_physical_address addr, #if defined(CONFIG_ACPI) && defined(CONFIG_PM) int acpi_dev_runtime_suspend(struct device *dev); -int acpi_dev_runtime_resume(struct device *dev); +int acpi_dev_resume(struct device *dev); int acpi_subsys_runtime_suspend(struct device *dev); int acpi_subsys_runtime_resume(struct device *dev); int acpi_dev_pm_attach(struct device *dev, bool power_on); @@ -882,7 +882,6 @@ static inline int acpi_dev_pm_attach(struct device *dev, bool power_on) #if defined(CONFIG_ACPI) && defined(CONFIG_PM_SLEEP) int acpi_dev_suspend_late(struct device *dev); -int acpi_dev_resume_early(struct device *dev); int acpi_subsys_prepare(struct device *dev); void acpi_subsys_complete(struct device *dev); int acpi_subsys_suspend_late(struct device *dev); -- cgit v1.2.3 From eeb2d80d502af28e5660ff4bbe00f90ceb82c2db Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Thu, 5 Oct 2017 16:24:03 -0700 Subject: ACPI / LPIT: Add Low Power Idle Table (LPIT) support Add functionality to read LPIT table, which provides: - Sysfs interface to read residency counters via /sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us /sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us Here the count "low_power_idle_cpu_residency_us" shows the time spent by CPU package in low power state. This is read via MSR interface, which points to MSR for PKG C10. Here the count "low_power_idle_system_residency_us" show the count the system was in low power state. This is read via MMIO interface. This is mapped to SLP_S0 residency on modern Intel systems. This residency is achieved only when CPU is in PKG C10 and all functional blocks are in low power state. It is possible that none of the above counters present or anyone of the counter present or all counters present. For example: On my Kabylake system both of the above counters present. After suspend to idle these counts updated and prints: 6916179 6998564 This counter can be read by tools like turbostat to display. Or it can be used to debug, if modern systems are reaching desired low power state. - Provides an interface to read residency counter memory address This address can be used to get the base address of PMC memory mapped IO. This is utilized by intel_pmc_core driver to print more debug information. In addition, to avoid code duplication to read iomem, removed the read of iomem from acpi_os_read_memory() in osl.c and made a common function acpi_os_read_iomem(). This new function is used for reading iomem in in both osl.c and acpi_lpit.c. Link: http://www.uefi.org/sites/default/files/resources/Intel_ACPI_Low_Power_S0_Idle.pdf Signed-off-by: Srinivas Pandruvada Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index d18c92d4ba19..2b1738f840ab 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -1248,4 +1248,13 @@ int acpi_irq_get(acpi_handle handle, unsigned int index, struct resource *res) } #endif +#ifdef CONFIG_ACPI_LPIT +int lpit_read_residency_count_address(u64 *address); +#else +static inline int lpit_read_residency_count_address(u64 *address) +{ + return -EINVAL; +} +#endif + #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From 0e708fc602531b8355b5de6ea7c98f09129b223f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 3 Oct 2017 09:11:07 +0200 Subject: PM / sleep: Remove pm_complete_with_resume_check() According to recent changes for ACPI, the are longer any users of pm_complete_with_resume_check(), thus let's drop it. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 47ded8aa8a5d..a0ceeccf2846 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -736,7 +736,6 @@ extern int pm_generic_poweroff_noirq(struct device *dev); extern int pm_generic_poweroff_late(struct device *dev); extern int pm_generic_poweroff(struct device *dev); extern void pm_generic_complete(struct device *dev); -extern void pm_complete_with_resume_check(struct device *dev); #else /* !CONFIG_PM_SLEEP */ -- cgit v1.2.3 From e81cef5d3001501350b4e596b4bd6dfd26187afa Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 09:25:39 -0400 Subject: blk_rq_map_user_iov(): move iov_iter_advance() down ... into bio_{map,copy}_user_iov() Signed-off-by: Al Viro --- include/linux/bio.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 275c91c99516..6050c0caa4e1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -462,7 +462,7 @@ extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, - const struct iov_iter *, gfp_t); + struct iov_iter *, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -494,7 +494,7 @@ extern void bio_free_pages(struct bio *bio); extern struct bio *bio_copy_user_iov(struct request_queue *, struct rq_map_data *, - const struct iov_iter *, + struct iov_iter *, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); -- cgit v1.2.3 From faea13297ea739f94913d56d7b865134b4fc8726 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 24 Sep 2017 14:03:29 -0400 Subject: kill iov_shorten() Signed-off-by: Al Viro --- include/linux/uio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 8a642cda641c..5885daeae721 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -80,8 +80,6 @@ static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) ((iov = iov_iter_iovec(&(iter))), 1); \ iov_iter_advance(&(iter), (iov).iov_len)) -unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); - size_t iov_iter_copy_from_user_atomic(struct page *page, struct iov_iter *i, unsigned long offset, size_t bytes); void iov_iter_advance(struct iov_iter *i, size_t bytes); -- cgit v1.2.3 From 09cf698a594276139b7dfafb232af3fe4fbc4438 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 18 Feb 2017 01:44:03 -0500 Subject: new primitive: iov_iter_for_each_range() For kvec and bvec: feeds segments to given callback as long as it returns 0. For iovec and pipe: fails. Signed-off-by: Al Viro --- include/linux/uio.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 5885daeae721..e67e12adb136 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -244,4 +244,8 @@ int compat_import_iovec(int type, const struct compat_iovec __user * uvector, int import_single_range(int type, void __user *buf, size_t len, struct iovec *iov, struct iov_iter *i); +int iov_iter_for_each_range(struct iov_iter *i, size_t bytes, + int (*f)(struct kvec *vec, void *context), + void *context); + #endif -- cgit v1.2.3 From 4e659dbe2d02a56ca0df25c77e099760252a329c Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Mon, 14 Aug 2017 15:46:17 -0700 Subject: firmware: qcom: scm: Expose secure IO service The secure IO service provides operations for reading and writing secure memory from non-secure mode, expose this API through SCM. Reviewed-by: Stephen Boyd Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/qcom_scm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index e5380471c2cd..e8357f570695 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -43,6 +43,8 @@ extern int qcom_scm_set_remote_state(u32 state, u32 id); extern int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare); extern int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size); extern int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare); +extern int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val); +extern int qcom_scm_io_writel(phys_addr_t addr, unsigned int val); #else static inline int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus) @@ -73,5 +75,7 @@ qcom_scm_set_remote_state(u32 state,u32 id) { return -ENODEV; } static inline int qcom_scm_restore_sec_cfg(u32 device_id, u32 spare) { return -ENODEV; } static inline int qcom_scm_iommu_secure_ptbl_size(u32 spare, size_t *size) { return -ENODEV; } static inline int qcom_scm_iommu_secure_ptbl_init(u64 addr, u32 size, u32 spare) { return -ENODEV; } +static inline int qcom_scm_io_readl(phys_addr_t addr, unsigned int *val) { return -ENODEV; } +static inline int qcom_scm_io_writel(phys_addr_t addr, unsigned int val) { return -ENODEV; } #endif #endif -- cgit v1.2.3 From 538d5b333216c3daa7a5821307164f10af73ec8c Mon Sep 17 00:00:00 2001 From: Tomasz Nowicki Date: Wed, 20 Sep 2017 10:52:02 +0200 Subject: iommu/iova: Make rcache flush optional on IOVA allocation failure Since IOVA allocation failure is not unusual case we need to flush CPUs' rcache in hope we will succeed in next round. However, it is useful to decide whether we need rcache flush step because of two reasons: - Not scalability. On large system with ~100 CPUs iterating and flushing rcache for each CPU becomes serious bottleneck so we may want to defer it. - free_cpu_cached_iovas() does not care about max PFN we are interested in. Thus we may flush our rcaches and still get no new IOVA like in the commonly used scenario: if (dma_limit > DMA_BIT_MASK(32) && dev_is_pci(dev)) iova = alloc_iova_fast(iovad, iova_len, DMA_BIT_MASK(32) >> shift); if (!iova) iova = alloc_iova_fast(iovad, iova_len, dma_limit >> shift); 1. First alloc_iova_fast() call is limited to DMA_BIT_MASK(32) to get PCI devices a SAC address 2. alloc_iova() fails due to full 32-bit space 3. rcaches contain PFNs out of 32-bit space so free_cpu_cached_iovas() throws entries away for nothing and alloc_iova() fails again 4. Next alloc_iova_fast() call cannot take advantage of rcache since we have just defeated caches. In this case we pick the slowest option to proceed. This patch reworks flushed_rcache local flag to be additional function argument instead and control rcache flush step. Also, it updates all users to do the flush as the last chance. Signed-off-by: Tomasz Nowicki Reviewed-by: Robin Murphy Tested-by: Nate Watterson Signed-off-by: Joerg Roedel --- include/linux/iova.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index c696ee81054e..928442dda565 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -150,7 +150,7 @@ void queue_iova(struct iova_domain *iovad, unsigned long pfn, unsigned long pages, unsigned long data); unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn); + unsigned long limit_pfn, bool flush_rcache); struct iova *reserve_iova(struct iova_domain *iovad, unsigned long pfn_lo, unsigned long pfn_hi); void copy_reserved_iova(struct iova_domain *from, struct iova_domain *to); @@ -212,7 +212,8 @@ static inline void queue_iova(struct iova_domain *iovad, static inline unsigned long alloc_iova_fast(struct iova_domain *iovad, unsigned long size, - unsigned long limit_pfn) + unsigned long limit_pfn, + bool flush_rcache) { return 0; } -- cgit v1.2.3 From 24d654fadd70884a719c45ffabb0fe6033996f92 Mon Sep 17 00:00:00 2001 From: Christophe JAILLET Date: Sat, 23 Sep 2017 12:08:16 +0200 Subject: kfifo: Fix comments Fix some typo. Signed-off-by: Christophe JAILLET Signed-off-by: Jiri Kosina --- include/linux/kfifo.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h index 41eb6fdf87a8..7b45959ebd92 100644 --- a/include/linux/kfifo.h +++ b/include/linux/kfifo.h @@ -325,7 +325,7 @@ __kfifo_uint_must_check_helper( \ * * This macro dynamically allocates a new fifo buffer. * - * The numer of elements will be rounded-up to a power of 2. + * The number of elements will be rounded-up to a power of 2. * The fifo will be release with kfifo_free(). * Return 0 if no error, otherwise an error code. */ @@ -358,9 +358,9 @@ __kfifo_int_must_check_helper( \ * @buffer: the preallocated buffer to be used * @size: the size of the internal buffer, this have to be a power of 2 * - * This macro initialize a fifo using a preallocated buffer. + * This macro initializes a fifo using a preallocated buffer. * - * The numer of elements will be rounded-up to a power of 2. + * The number of elements will be rounded-up to a power of 2. * Return 0 if no error, otherwise an error code. */ #define kfifo_init(fifo, buffer, size) \ -- cgit v1.2.3 From 7f66721a7d5bf6251f9c49aada9200c61ddcecc5 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Thu, 12 Oct 2017 19:58:10 +0300 Subject: fs/block_dev: remove vfs_msg() interface Replaced by pr_err usage in commit ef51042472f5 ("block, dax: move "select DAX" from BLOCK to FS_DAX") Signed-off-by: Rakesh Pandit Acked-by: Ross Zwisler Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9fb71fc7d0e8..72637028f3c9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -917,17 +917,6 @@ static inline void rq_flush_dcache_pages(struct request *rq) } #endif -#ifdef CONFIG_PRINTK -#define vfs_msg(sb, level, fmt, ...) \ - __vfs_msg(sb, level, fmt, ##__VA_ARGS__) -#else -#define vfs_msg(sb, level, fmt, ...) \ -do { \ - no_printk(fmt, ##__VA_ARGS__); \ - __vfs_msg(sb, "", " "); \ -} while (0) -#endif - extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern blk_qc_t generic_make_request(struct bio *bio); -- cgit v1.2.3 From 2355a6546a053b1c16ebefd6ce1f0cccc00e1da5 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Rafa=C5=82=20Mi=C5=82ecki?= Date: Thu, 12 Oct 2017 10:21:25 +0200 Subject: net: phy: broadcom: support new device flag for setting master mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some of Broadcom's PHYs run by default in slave mode with Automatic Slave/Master configuration disabled. It stops them from working properly with some devices. So far it has been verified for BCM54210E and BCM50212E which don't work well with Intel's I217-LM and I218-LM: http://ark.intel.com/products/60019/Intel-Ethernet-Connection-I217-LM http://ark.intel.com/products/71307/Intel-Ethernet-Connection-I218-LM I was told there is massive ping loss. This commit adds support for a new flag which can be set by an ethernet driver to fixup PHY setup. Signed-off-by: Rafał Miłecki Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index abcda9b458ab..9ac9e3e3d1e5 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -63,6 +63,7 @@ #define PHY_BRCM_EXT_IBND_TX_ENABLE 0x00002000 #define PHY_BRCM_CLEAR_RGMII_MODE 0x00004000 #define PHY_BRCM_DIS_TXCRXC_NOENRGY 0x00008000 +#define PHY_BRCM_EN_MASTER_MODE 0x00010000 /* Broadcom BCM7xxx specific workarounds */ #define PHY_BRCM_7XXX_REV(x) (((x) >> 8) & 0xff) -- cgit v1.2.3 From 511cb17448d95e4277451cdee882e72b6a9a3099 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Wed, 30 Aug 2017 14:59:10 +0530 Subject: mfd: tps65217: Introduce dependency on CONFIG_OF Currently the driver boots only via device tree hence add a dependency on CONFIG_OF. This leaves with a bunch of unused code so clean that up. This patch also makes use of probe_new function in place of the probe function so as to avoid passing i2c_device_id. Signed-off-by: Keerthy Reviewed-by: Javier Martinez Canillas Signed-off-by: Lee Jones --- include/linux/mfd/tps65217.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65217.h b/include/linux/mfd/tps65217.h index eac285756b37..b5dd108421c8 100644 --- a/include/linux/mfd/tps65217.h +++ b/include/linux/mfd/tps65217.h @@ -263,7 +263,6 @@ struct tps65217_board { struct tps65217 { struct device *dev; struct tps65217_board *pdata; - unsigned long id; struct regulator_desc desc[TPS65217_NUM_REGULATOR]; struct regmap *regmap; u8 *strobes; @@ -278,11 +277,6 @@ static inline struct tps65217 *dev_to_tps65217(struct device *dev) return dev_get_drvdata(dev); } -static inline unsigned long tps65217_chip_id(struct tps65217 *tps65217) -{ - return tps65217->id; -} - int tps65217_reg_read(struct tps65217 *tps, unsigned int reg, unsigned int *val); int tps65217_reg_write(struct tps65217 *tps, unsigned int reg, -- cgit v1.2.3 From 8275b77a15131673f955f738f0704e1d40a8edae Mon Sep 17 00:00:00 2001 From: Rui Feng Date: Thu, 7 Sep 2017 16:26:39 +0800 Subject: mfd: rts5249: Add support for RTS5250S power saving Enable power saving for RTS5250S as following steps: 1.Set 0xFE58 to enable clock power management. 2.Check cfg space whether support L1SS or not. 3.If support L1SS, set 0xFF03 to free clkreq. 4.When entering idle status, enable aspm and set parameters for L1SS and LTR. 5.Wnen entering run status, disable aspm and set parameters for L1SS and LTR. If entering L1SS mode successfully, electric current will be below 2mA. Signed-off-by: Rui Feng Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 84 ++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 116816fb9110..57e23ad1c2bc 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -573,6 +573,12 @@ #define MSGTXDATA3 0xFE47 #define MSGTXCTL 0xFE48 #define LTR_CTL 0xFE4A +#define LTR_TX_EN_MASK BIT(7) +#define LTR_TX_EN_1 BIT(7) +#define LTR_TX_EN_0 0 +#define LTR_LATENCY_MODE_MASK BIT(6) +#define LTR_LATENCY_MODE_HW 0 +#define LTR_LATENCY_MODE_SW BIT(6) #define OBFF_CFG 0xFE4C #define CDRESUMECTL 0xFE52 @@ -616,11 +622,15 @@ #define L1SUB_CONFIG2 0xFE8E #define L1SUB_AUTO_CFG 0x02 #define L1SUB_CONFIG3 0xFE8F +#define L1OFF_MBIAS2_EN_5250 BIT(7) #define DUMMY_REG_RESET_0 0xFE90 #define AUTOLOAD_CFG_BASE 0xFF00 #define PETXCFG 0xFF03 +#define FORCE_CLKREQ_DELINK_MASK BIT(7) +#define FORCE_CLKREQ_LOW 0x80 +#define FORCE_CLKREQ_HIGH 0x00 #define PM_CTRL1 0xFF44 #define CD_RESUME_EN_MASK 0xF0 @@ -844,6 +854,9 @@ #define PHY_DIG1E_RX_EN_KEEP 0x0001 #define PHY_DUM_REG 0x1F +#define PCR_ASPM_SETTING_REG1 0x160 +#define PCR_ASPM_SETTING_REG2 0x168 + #define PCR_SETTING_REG1 0x724 #define PCR_SETTING_REG2 0x814 #define PCR_SETTING_REG3 0x747 @@ -876,14 +889,79 @@ struct pcr_ops { int (*conv_clk_and_div_n)(int clk, int dir); void (*fetch_vendor_settings)(struct rtsx_pcr *pcr); void (*force_power_down)(struct rtsx_pcr *pcr, u8 pm_state); + + void (*set_aspm)(struct rtsx_pcr *pcr, bool enable); + int (*set_ltr_latency)(struct rtsx_pcr *pcr, u32 latency); + int (*set_l1off_sub)(struct rtsx_pcr *pcr, u8 val); + void (*set_l1off_cfg_sub_d0)(struct rtsx_pcr *pcr, int active); + void (*full_on)(struct rtsx_pcr *pcr); + void (*power_saving)(struct rtsx_pcr *pcr); }; enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN}; +#define ASPM_L1_1_EN_MASK BIT(3) +#define ASPM_L1_2_EN_MASK BIT(2) +#define PM_L1_1_EN_MASK BIT(1) +#define PM_L1_2_EN_MASK BIT(0) + +#define ASPM_L1_1_EN BIT(0) +#define ASPM_L1_2_EN BIT(1) +#define PM_L1_1_EN BIT(2) +#define PM_L1_2_EN BIT(3) +#define LTR_L1SS_PWR_GATE_EN BIT(4) +#define L1_SNOOZE_TEST_EN BIT(5) +#define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6) + +enum dev_aspm_mode { + DEV_ASPM_DISABLE = 0, + DEV_ASPM_DYNAMIC, + DEV_ASPM_BACKDOOR, + DEV_ASPM_STATIC, +}; + +/* + * struct rtsx_cr_option - card reader option + * @dev_flags: device flags + * @force_clkreq_0: force clock request + * @ltr_en: enable ltr mode flag + * @ltr_enabled: ltr mode in configure space flag + * @ltr_active: ltr mode status + * @ltr_active_latency: ltr mode active latency + * @ltr_idle_latency: ltr mode idle latency + * @ltr_l1off_latency: ltr mode l1off latency + * @dev_aspm_mode: device aspm mode + * @l1_snooze_delay: l1 snooze delay + * @ltr_l1off_sspwrgate: ltr l1off sspwrgate + * @ltr_l1off_snooze_sspwrgate: ltr l1off snooze sspwrgate + */ +struct rtsx_cr_option { + u32 dev_flags; + bool force_clkreq_0; + bool ltr_en; + bool ltr_enabled; + bool ltr_active; + u32 ltr_active_latency; + u32 ltr_idle_latency; + u32 ltr_l1off_latency; + enum dev_aspm_mode dev_aspm_mode; + u32 l1_snooze_delay; + u8 ltr_l1off_sspwrgate; + u8 ltr_l1off_snooze_sspwrgate; +}; + +#define rtsx_set_dev_flag(cr, flag) \ + ((cr)->option.dev_flags |= (flag)) +#define rtsx_clear_dev_flag(cr, flag) \ + ((cr)->option.dev_flags &= ~(flag)) +#define rtsx_check_dev_flag(cr, flag) \ + ((cr)->option.dev_flags & (flag)) + struct rtsx_pcr { struct pci_dev *pci; unsigned int id; int pcie_cap; + struct rtsx_cr_option option; /* pci resources */ unsigned long addr; @@ -940,6 +1018,7 @@ struct rtsx_pcr { u8 card_drive_sel; #define ASPM_L1_EN 0x02 u8 aspm_en; + bool aspm_enabled; #define PCR_MS_PMOS (1 << 0) #define PCR_REVERSE_SOCKET (1 << 1) @@ -964,6 +1043,11 @@ struct rtsx_pcr { u8 dma_error_count; }; +#define PID_524A 0x524A +#define PID_5249 0x5249 +#define PID_5250 0x5250 +#define PID_525A 0x525A + #define CHK_PCI_PID(pcr, pid) ((pcr)->pci->device == (pid)) #define PCI_VID(pcr) ((pcr)->pci->vendor) #define PCI_PID(pcr) ((pcr)->pci->device) -- cgit v1.2.3 From 900148296b78c61aa8c443dc594c0da968c3be53 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:45:50 +0200 Subject: lightnvm: prevent target type module removal when in use MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If target type module e.g. pblk here is unloaded (rmmod) while module is in use (after creating target) system crashes. We fix this by using module API refcnt. Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 7dfa56ebbc6d..7b80ac911d26 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -460,6 +460,7 @@ struct nvm_tgt_type { /* For internal use */ struct list_head list; + struct module *owner; }; extern struct nvm_tgt_type *nvm_find_target_type(const char *, int); -- cgit v1.2.3 From ef56b9ce562753cacf518f081a4ff3227efdab25 Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:30 +0200 Subject: lightnvm: remove unused argument from nvm_set_tgt_bb_tbl MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vblk isn't being used anyway and if we ever have a usecase we can introduce this again. This makes the logic easier and removes unnecessary checks. Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 7b80ac911d26..32ec35b5e18b 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -481,7 +481,7 @@ extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int); extern int nvm_set_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *, - const struct ppa_addr *, int, int); + const struct ppa_addr *, int); extern void nvm_free_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); -- cgit v1.2.3 From eb6f168f97438bf1cac8b9b1301c662eace9e39f Mon Sep 17 00:00:00 2001 From: Rakesh Pandit Date: Fri, 13 Oct 2017 14:46:31 +0200 Subject: lightnvm: remove stale extern and unused exported symbols MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Not all exported symbols are being used outside core and there were some stale entries in lightnvm.h Signed-off-by: Rakesh Pandit Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 32ec35b5e18b..4f0e4a0fd204 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -463,8 +463,6 @@ struct nvm_tgt_type { struct module *owner; }; -extern struct nvm_tgt_type *nvm_find_target_type(const char *, int); - extern int nvm_register_tgt_type(struct nvm_tgt_type *); extern void nvm_unregister_tgt_type(struct nvm_tgt_type *); @@ -480,9 +478,6 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int); -extern int nvm_set_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *, - const struct ppa_addr *, int); -extern void nvm_free_rqd_ppalist(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); extern int nvm_get_area(struct nvm_tgt_dev *, sector_t *, sector_t); @@ -491,8 +486,6 @@ extern void nvm_end_io(struct nvm_rq *); extern int nvm_bb_tbl_fold(struct nvm_dev *, u8 *, int); extern int nvm_get_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr, u8 *); -extern int nvm_dev_factory(struct nvm_dev *, int flags); - extern void nvm_part_to_tgt(struct nvm_dev *, sector_t *, int); #else /* CONFIG_NVM */ -- cgit v1.2.3 From 1a94b2d484677dc559c96251dd0e7c7b8811c378 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Javier=20Gonz=C3=A1lez?= Date: Fri, 13 Oct 2017 14:46:47 +0200 Subject: lightnvm: implement generic path for sync I/O MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Implement a generic path for sending sync I/O on LightNVM. This allows to reuse the standard synchronous path trough blk_execute_rq(), instead of implementing a wait_for_completion on the target side (e.g., pblk). Signed-off-by: Javier González Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 4f0e4a0fd204..b7f111ff4d3b 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -56,6 +56,7 @@ typedef int (nvm_get_l2p_tbl_fn)(struct nvm_dev *, u64, u32, typedef int (nvm_op_bb_tbl_fn)(struct nvm_dev *, struct ppa_addr, u8 *); typedef int (nvm_op_set_bb_fn)(struct nvm_dev *, struct ppa_addr *, int, int); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); +typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *); typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); typedef void (nvm_destroy_dma_pool_fn)(void *); typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, @@ -69,6 +70,7 @@ struct nvm_dev_ops { nvm_op_set_bb_fn *set_bb_tbl; nvm_submit_io_fn *submit_io; + nvm_submit_io_sync_fn *submit_io_sync; nvm_create_dma_pool_fn *create_dma_pool; nvm_destroy_dma_pool_fn *destroy_dma_pool; @@ -477,6 +479,7 @@ extern int nvm_set_tgt_bb_tbl(struct nvm_tgt_dev *, struct ppa_addr *, int, int); extern int nvm_max_phys_sects(struct nvm_tgt_dev *); extern int nvm_submit_io(struct nvm_tgt_dev *, struct nvm_rq *); +extern int nvm_submit_io_sync(struct nvm_tgt_dev *, struct nvm_rq *); extern int nvm_erase_sync(struct nvm_tgt_dev *, struct ppa_addr *, int); extern int nvm_get_l2p_tbl(struct nvm_tgt_dev *, u64, u32, nvm_l2p_update_fn *, void *); -- cgit v1.2.3 From 1b0c22e45508ffbd645430e049c52c46bdaad3b4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 13 Oct 2017 14:12:45 +0200 Subject: regmap: avoid -Wint-in-bool-context warning When we pass the result of a multiplication as the timeout or the delay, we can get a warning from gcc-7: drivers/mmc/host/bcm2835.c:596:149: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] drivers/mfd/arizona-core.c:247:195: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c:49:27: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] The warning is a bit questionable inside of a macro, but this is intentional on the side of the gcc developers. It is also an indication of another problem: we evaluate the timeout and sleep arguments multiple times, which can have undesired side-effects when those are complex expressions. This changes the two regmap variants to use local variables for storing copies of the timeouts. This adds some more type safety, and avoids both the double-evaluation and the gcc warning. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81484 Link: http://lkml.kernel.org/r/20170726133756.2161367-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Signed-off-by: Mark Brown --- include/linux/regmap.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 1e369f0e921d..edad98890b9b 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -120,22 +120,24 @@ struct reg_sequence { */ #define regmap_read_poll_timeout(map, addr, val, cond, sleep_us, timeout_us) \ ({ \ - ktime_t __timeout = ktime_add_us(ktime_get(), timeout_us); \ + u64 __timeout_us = (timeout_us); \ + unsigned long __sleep_us = (sleep_us); \ + ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ int __ret; \ - might_sleep_if(sleep_us); \ + might_sleep_if(__sleep_us); \ for (;;) { \ __ret = regmap_read((map), (addr), &(val)); \ if (__ret) \ break; \ if (cond) \ break; \ - if ((timeout_us) && \ + if ((__timeout_us) && \ ktime_compare(ktime_get(), __timeout) > 0) { \ __ret = regmap_read((map), (addr), &(val)); \ break; \ } \ - if (sleep_us) \ - usleep_range(((sleep_us) >> 2) + 1, sleep_us); \ + if (__sleep_us) \ + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ } \ __ret ?: ((cond) ? 0 : -ETIMEDOUT); \ }) @@ -160,21 +162,23 @@ struct reg_sequence { */ #define regmap_field_read_poll_timeout(field, val, cond, sleep_us, timeout_us) \ ({ \ - ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ + u64 __timeout_us = (timeout_us); \ + unsigned long __sleep_us = (sleep_us); \ + ktime_t timeout = ktime_add_us(ktime_get(), __timeout_us); \ int pollret; \ - might_sleep_if(sleep_us); \ + might_sleep_if(__sleep_us); \ for (;;) { \ pollret = regmap_field_read((field), &(val)); \ if (pollret) \ break; \ if (cond) \ break; \ - if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + if (__timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ pollret = regmap_field_read((field), &(val)); \ break; \ } \ - if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ + if (__sleep_us) \ + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ } \ pollret ?: ((cond) ? 0 : -ETIMEDOUT); \ }) -- cgit v1.2.3 From 17a9422de78c3a59b490b400f555635c477f1476 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Wed, 11 Oct 2017 14:49:43 -0700 Subject: i40e/i40evf: don't trust VF to reset itself When using 'ethtool -L' on a VF to change number of requested queues from PF, we shouldn't trust the VF to reset itself after making the request. Doing it that way opens the door for a potentially malicious VF to do nasty things to the PF which should never be the case. This makes it such that after VF makes a successful request, PF will then reset the VF to institute required changes. Only if the request fails will PF send a message back to VF letting it know the request was unsuccessful. Testing-hints: There should be no real functional changes. This is simply hardening against a potentially malicious VF. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/avf/virtchnl.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/avf/virtchnl.h b/include/linux/avf/virtchnl.h index 60e5d90cb18a..3ce61342fa31 100644 --- a/include/linux/avf/virtchnl.h +++ b/include/linux/avf/virtchnl.h @@ -333,8 +333,8 @@ struct virtchnl_vsi_queue_config_info { * additional queues must be negotiated. This is a best effort request as it * is possible the PF does not have enough queues left to support the request. * If the PF cannot support the number requested it will respond with the - * maximum number it is able to support; otherwise it will respond with the - * number requested. + * maximum number it is able to support. If the request is successful, PF will + * then reset the VF to institute required changes. */ /* VF resource request */ -- cgit v1.2.3 From 42f6284ae602469762ee721ec31ddfc6170e00bc Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 12 Oct 2017 15:07:23 +0530 Subject: PM / Domains: Add support to select performance-state of domains Some platforms have the capability to configure the performance state of PM domains. This patch enhances the genpd core to support such platforms. The performance levels (within the genpd core) are identified by positive integer values, a lower value represents lower performance state. This patch adds a new genpd API, which is called by user drivers (like OPP framework): - int dev_pm_genpd_set_performance_state(struct device *dev, unsigned int state); This updates the performance state constraint of the device on its PM domain. On success, the genpd will have its performance state set to a value which is >= "state" passed to this routine. The genpd core calls the genpd->set_performance_state() callback, if implemented, else -ENODEV is returned to the caller. The PM domain drivers need to implement the following callback if they want to support performance states. - int (*set_performance_state)(struct generic_pm_domain *genpd, unsigned int state); This is called internally by the genpd core on several occasions. The genpd core passes the genpd pointer and the aggregate of the performance states of the devices supported by that genpd to this callback. This callback must update the performance state of the genpd (in a platform dependent way). The power domains can avoid supplying above callback, if they don't support setting performance-states. Currently we aren't propagating performance state changes of a subdomain to its masters as we don't have hardware that needs it right now. Over that, the performance states of subdomain and its masters may not have one-to-one mapping and would require additional information. We can get back to this once we have hardware that needs it. Tested-by: Rajendra Nayak Signed-off-by: Viresh Kumar Acked-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 84f423d5633e..9af0356bd69c 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -64,8 +64,11 @@ struct generic_pm_domain { unsigned int device_count; /* Number of devices */ unsigned int suspended_count; /* System suspend device counter */ unsigned int prepared_count; /* Suspend counter of prepared devices */ + unsigned int performance_state; /* Aggregated max performance state */ int (*power_off)(struct generic_pm_domain *domain); int (*power_on)(struct generic_pm_domain *domain); + int (*set_performance_state)(struct generic_pm_domain *genpd, + unsigned int state); struct gpd_dev_ops dev_ops; s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ bool max_off_time_changed; @@ -121,6 +124,7 @@ struct generic_pm_domain_data { struct pm_domain_data base; struct gpd_timing_data td; struct notifier_block nb; + unsigned int performance_state; void *data; }; @@ -148,6 +152,8 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, extern int pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); extern int pm_genpd_remove(struct generic_pm_domain *genpd); +extern int dev_pm_genpd_set_performance_state(struct device *dev, + unsigned int state); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -188,6 +194,12 @@ static inline int pm_genpd_remove(struct generic_pm_domain *genpd) return -ENOTSUPP; } +static inline int dev_pm_genpd_set_performance_state(struct device *dev, + unsigned int state) +{ + return -ENOTSUPP; +} + #define simple_qos_governor (*(struct dev_power_governor *)(NULL)) #define pm_domain_always_on_gov (*(struct dev_power_governor *)(NULL)) #endif -- cgit v1.2.3 From b6aa98364f842f943495408895627702ad7ad44b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Wed, 11 Oct 2017 12:54:15 +0530 Subject: PM / OPP: Add dev_pm_opp_{un}register_get_pstate_helper() This adds the dev_pm_opp_{un}register_get_pstate_helper() helper routines which will be used to set the get_pstate() callback for a device. This callback will be later called internally by the OPP core to get performance state corresponding to an OPP. This is required temporarily until the time we have proper DT bindings to include the performance state information. Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 849d21dc4ca7..6c2d2e88f066 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -125,6 +125,8 @@ struct opp_table *dev_pm_opp_set_clkname(struct device *dev, const char * name); void dev_pm_opp_put_clkname(struct opp_table *opp_table); struct opp_table *dev_pm_opp_register_set_opp_helper(struct device *dev, int (*set_opp)(struct dev_pm_set_opp_data *data)); void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table); +struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev, int (*get_pstate)(struct device *dev, unsigned long rate)); +void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table); int dev_pm_opp_set_rate(struct device *dev, unsigned long target_freq); int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, const struct cpumask *cpumask); int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask); @@ -245,6 +247,14 @@ static inline struct opp_table *dev_pm_opp_register_set_opp_helper(struct device static inline void dev_pm_opp_unregister_set_opp_helper(struct opp_table *opp_table) {} +static inline struct opp_table *dev_pm_opp_register_get_pstate_helper(struct device *dev, + int (*get_pstate)(struct device *dev, unsigned long rate)) +{ + return ERR_PTR(-ENOTSUPP); +} + +static inline void dev_pm_opp_unregister_get_pstate_helper(struct opp_table *opp_table) {} + static inline struct opp_table *dev_pm_opp_set_prop_name(struct device *dev, const char *name) { return ERR_PTR(-ENOTSUPP); -- cgit v1.2.3 From 20f97caf1120bd02e8ff4adbad3b44b63626feb5 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 13 Oct 2017 15:27:24 +0200 Subject: PM / QoS: Drop PM_QOS_FLAG_REMOTE_WAKEUP The PM QoS flag PM_QOS_FLAG_REMOTE_WAKEUP is not used consistently and the vast majority of code simply assumes that remote wakeup should be enabled for devices in runtime suspend if they can generate wakeup signals, so drop it. Signed-off-by: Rafael J. Wysocki Acked-by: Ulf Hansson Reviewed-by: Mika Westerberg --- include/linux/pm_qos.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 032b55909145..51f0d7e0b15f 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -39,7 +39,6 @@ enum pm_qos_flags_status { #define PM_QOS_LATENCY_ANY ((s32)(~(__u32)0 >> 1)) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) -#define PM_QOS_FLAG_REMOTE_WAKEUP (1 << 1) struct pm_qos_request { struct plist_node node; -- cgit v1.2.3 From 086c321ec57bfda5b15f3553e7def33302955852 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Tue, 10 Oct 2017 14:38:07 +0200 Subject: mtd: nand: omap2: Remove omap_nand_platform_data As driver is now configured using DT, omap_nand_platform_data structure is no longer needed. Signed-off-by: Ladislav Michl Acked-by: Roger Quadros Signed-off-by: Boris Brezillon --- include/linux/platform_data/mtd-nand-omap2.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index 17d57a18bac5..8b8b124e8aea 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -66,21 +66,4 @@ struct gpmc_nand_regs { /* Deprecated. Do not use */ void __iomem *gpmc_status; }; - -struct omap_nand_platform_data { - int cs; - struct mtd_partition *parts; - int nr_parts; - bool flash_bbt; - enum nand_io xfer_type; - int devsize; - enum omap_ecc ecc_opt; - - struct device_node *elm_of_node; - - /* deprecated */ - struct gpmc_nand_regs reg; - struct device_node *of_node; - bool dev_ready; -}; #endif -- cgit v1.2.3 From e0005bd93ab5f7a80b08a7bdff9b2ade436f9482 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 10 Oct 2017 18:15:34 +0200 Subject: iio: Drop duplicate forward declaration Commit 5f420b42079c ("staging:iio: Add extended IIO channel info") added a forward declaration for struct iio_dev to but forgot to remove an existing forward declaration further down originating from commit 7ae8cf627558 ("staging: iio: chrdev.h rationalization"). Cc: Lars-Peter Clausen Signed-off-by: Lukas Wunner Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 50cae8504256..20b61347ea58 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -365,7 +365,6 @@ unsigned int iio_get_time_res(const struct iio_dev *indio_dev); #define INDIO_MAX_RAW_ELEMENTS 4 struct iio_trigger; /* forward declaration */ -struct iio_dev; /** * struct iio_info - constant information about device -- cgit v1.2.3 From 7c39afb394c79e72c3795b4a42d55155b34ee073 Mon Sep 17 00:00:00 2001 From: Feras Daoud Date: Tue, 15 Aug 2017 13:46:04 +0300 Subject: net/mlx5: PTP code migration to driver core section PTP code is moved to core section of mlx5 driver in order to share it between ethernet and infiniband. This movement involves the following changes: - Change mlx5e_ prefix to be mlx5_ - Add clock structs to Core - Add clock object to mlx5_core_dev - Call Init/Uninit clock from core init/cleanup - Rename mlx5e_tstamp to be mlx5_clock Signed-off-by: Feras Daoud Signed-off-by: Eitan Rabin Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 401c8972cc3a..08c77b7e59cb 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -49,6 +49,8 @@ #include #include #include +#include +#include enum { MLX5_BOARD_ID_LEN = 64, @@ -760,6 +762,27 @@ struct mlx5_rsvd_gids { struct ida ida; }; +#define MAX_PIN_NUM 8 +struct mlx5_pps { + u8 pin_caps[MAX_PIN_NUM]; + struct work_struct out_work; + u64 start[MAX_PIN_NUM]; + u8 enabled; +}; + +struct mlx5_clock { + rwlock_t lock; + struct cyclecounter cycles; + struct timecounter tc; + struct hwtstamp_config hwtstamp_config; + u32 nominal_c_mult; + unsigned long overflow_period; + struct delayed_work overflow_work; + struct ptp_clock *ptp; + struct ptp_clock_info ptp_info; + struct mlx5_pps pps_info; +}; + struct mlx5_core_dev { struct pci_dev *pdev; /* sync pci state */ @@ -800,6 +823,7 @@ struct mlx5_core_dev { #ifdef CONFIG_RFS_ACCEL struct cpu_rmap *rmap; #endif + struct mlx5_clock clock; }; struct mlx5_db { -- cgit v1.2.3 From ad7532cefd11d11a0814a75fb814c205ee3d9d4c Mon Sep 17 00:00:00 2001 From: Srinivas Pandruvada Date: Wed, 11 Oct 2017 09:35:01 -0700 Subject: iio: hid-sensor-trigger: Don't touch sensors unless user space requests One of the user complained that on his system Thinkpad Yoga S1, with commit f1664eaacec3 ("iio: hid-sensor-trigger: Fix the race with user space powering up sensors") causes the system to resume immediately on suspend (S3 operation). On this system the sensor hub is on USB and is a wake up device from S3. So if any sensor sends data on motion, the system will wake up. This can be a legitimate use case to wake up device motion, but that needs proper user space support to set right thresholds. In fact the above commit didn't cause this regression, but any operation which cause sensors to wake up would have caused the same issue. So if user reads the raw sensor data, same issue occurs, with or without this commit. Only difference is that the above commit by default will trigger a power up and power down of sensors as part of runtime pm enable (runtime enable will cause a runtime resume callback followed by runtime_suspend callback). Previously user has to do some action on sensors. On investigation it was observed that the current driver correctly changing the state of all sensors to power off but then also some sensor will still send some data. Only option is to never power up any sensor. Only good option is to: - Using sysfs interface disable USB as a wakeup device (This will not need any driver change) Since some user don't care about sensors. So for those users this change brings back old functionality. As long as they don't cause any operation to power up sensors (like raw read or start iio-sensor-proxy service), the sensors will not be to touched. This is done by delaying run time enable till user space does some operation with sensors. Link: https://bugzilla.kernel.org/show_bug.cgi?id=196853 Signed-off-by: Srinivas Pandruvada Signed-off-by: Jonathan Cameron --- include/linux/hid-sensor-hub.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hid-sensor-hub.h b/include/linux/hid-sensor-hub.h index fc7aae64dcde..331dc377c275 100644 --- a/include/linux/hid-sensor-hub.h +++ b/include/linux/hid-sensor-hub.h @@ -231,6 +231,7 @@ struct hid_sensor_common { unsigned usage_id; atomic_t data_ready; atomic_t user_requested_state; + atomic_t runtime_pm_enable; int poll_interval; int raw_hystersis; int latency_ms; -- cgit v1.2.3 From 53fd88ab61948f711147204c1c5017c7301979e9 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 14 Oct 2017 23:00:54 -0400 Subject: make vfs_ustat() static Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 339e73742e73..89323e03e648 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2169,7 +2169,6 @@ extern int iterate_mounts(int (*)(struct vfsmount *, void *), void *, extern int vfs_statfs(const struct path *, struct kstatfs *); extern int user_statfs(const char __user *, struct kstatfs *); extern int fd_statfs(int, struct kstatfs *); -extern int vfs_ustat(dev_t, struct kstatfs *); extern int freeze_super(struct super_block *super); extern int thaw_super(struct super_block *super); extern bool our_mnt(struct vfsmount *mnt); -- cgit v1.2.3 From f175f307dd0bd1ca3825d244f9b870ff12981d3c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 15 Oct 2017 00:38:00 -0400 Subject: stubs for mount_bdev() and kill_block_super() in !CONFIG_BLOCK case Signed-off-by: Al Viro --- include/linux/fs.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 89323e03e648..31f8b2ea358c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2094,9 +2094,18 @@ struct file_system_type { extern struct dentry *mount_ns(struct file_system_type *fs_type, int flags, void *data, void *ns, struct user_namespace *user_ns, int (*fill_super)(struct super_block *, void *, int)); +#ifdef CONFIG_BLOCK extern struct dentry *mount_bdev(struct file_system_type *fs_type, int flags, const char *dev_name, void *data, int (*fill_super)(struct super_block *, void *, int)); +#else +static inline struct dentry *mount_bdev(struct file_system_type *fs_type, + int flags, const char *dev_name, void *data, + int (*fill_super)(struct super_block *, void *, int)) +{ + return ERR_PTR(-ENODEV); +} +#endif extern struct dentry *mount_single(struct file_system_type *fs_type, int flags, void *data, int (*fill_super)(struct super_block *, void *, int)); @@ -2105,7 +2114,14 @@ extern struct dentry *mount_nodev(struct file_system_type *fs_type, int (*fill_super)(struct super_block *, void *, int)); extern struct dentry *mount_subtree(struct vfsmount *mnt, const char *path); void generic_shutdown_super(struct super_block *sb); +#ifdef CONFIG_BLOCK void kill_block_super(struct super_block *sb); +#else +static inline void kill_block_super(struct super_block *sb) +{ + BUG(); +} +#endif void kill_anon_super(struct super_block *sb); void kill_litter_super(struct super_block *sb); void deactivate_super(struct super_block *sb); -- cgit v1.2.3 From e3d4939267925ab66f39123744ffb4bc74a13149 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Thu, 28 Sep 2017 14:03:33 +0100 Subject: ACPI/IORT: Improve functions return type/storage class specifier indentation Some functions definition indentations are using a style that is frowned upon with return value type/storage class specifier in a separate line. Reindent the function definitions to fix them. Signed-off-by: Lorenzo Pieralisi Acked-by: Hanjun Guo Cc: Hanjun Guo Cc: Sudeep Holla --- include/linux/acpi_iort.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi_iort.h b/include/linux/acpi_iort.h index 8d3f0bf80379..2f7a29242b87 100644 --- a/include/linux/acpi_iort.h +++ b/include/linux/acpi_iort.h @@ -49,8 +49,8 @@ static inline void acpi_configure_pmsi_domain(struct device *dev) { } /* IOMMU interface */ static inline void iort_dma_setup(struct device *dev, u64 *dma_addr, u64 *size) { } -static inline -const struct iommu_ops *iort_iommu_configure(struct device *dev) +static inline const struct iommu_ops *iort_iommu_configure( + struct device *dev) { return NULL; } #endif -- cgit v1.2.3 From 3c1818275cc65c6d53e0adfde0c989bfe89ab8d7 Mon Sep 17 00:00:00 2001 From: Anna Schumaker Date: Wed, 26 Jul 2017 10:14:55 -0400 Subject: NFS: Create NFS_ACCESS_* flags Passing the NFS v4 flags into the v3 code seems weird to me, even if they are defined to the same values. This patch adds in generic flags to help me feel better Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index a0282ceaa48b..453f491a5fda 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -183,6 +183,16 @@ struct nfs_inode { struct inode vfs_inode; }; +/* + * Access bit flags + */ +#define NFS_ACCESS_READ 0x0001 +#define NFS_ACCESS_LOOKUP 0x0002 +#define NFS_ACCESS_MODIFY 0x0004 +#define NFS_ACCESS_EXTEND 0x0008 +#define NFS_ACCESS_DELETE 0x0010 +#define NFS_ACCESS_EXECUTE 0x0020 + /* * Cache validity bit flags */ -- cgit v1.2.3 From 36689ecd2c065a8879035e5bf1b4a0f4d5b65160 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Fri, 29 Sep 2017 20:08:28 -0500 Subject: of: remove struct property.unique_id for FDT Only Sparc uses unique_id, so remove it for FDT builds and shrink struct property a bit making the unflattened DT less of a memory hog. Tested-by: Nicolas Pitre Reviewed-by: Frank Rowand Acked-by: Grant Likely Signed-off-by: Rob Herring --- include/linux/of.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index cfc34117fc92..8f9e96752837 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -38,7 +38,9 @@ struct property { void *value; struct property *next; unsigned long _flags; +#if defined(CONFIG_OF_PROMTREE) unsigned int unique_id; +#endif struct bin_attribute attr; }; -- cgit v1.2.3 From 16bba30eab137aaa37538349c0a7496720e90c66 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 4 Oct 2017 14:30:02 -0500 Subject: of: make struct property _flags field configurable Only Sparc and CONFIG_OF_DYNAMIC use the struct property._flags field, so make it conditional shrinking struct property a bit. Tested-by: Nicolas Pitre Reviewed-by: Frank Rowand Acked-by: Grant Likely Signed-off-by: Rob Herring --- include/linux/of.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 8f9e96752837..7eb94b7fbcf3 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -37,7 +37,9 @@ struct property { int length; void *value; struct property *next; +#if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC) unsigned long _flags; +#endif #if defined(CONFIG_OF_PROMTREE) unsigned int unique_id; #endif @@ -205,6 +207,7 @@ static inline void of_node_clear_flag(struct device_node *n, unsigned long flag) clear_bit(flag, &n->_flags); } +#if defined(CONFIG_OF_DYNAMIC) || defined(CONFIG_SPARC) static inline int of_property_check_flag(struct property *p, unsigned long flag) { return test_bit(flag, &p->_flags); @@ -219,6 +222,7 @@ static inline void of_property_clear_flag(struct property *p, unsigned long flag { clear_bit(flag, &p->_flags); } +#endif extern struct device_node *__of_find_all_nodes(struct device_node *prev); extern struct device_node *of_find_all_nodes(struct device_node *prev); -- cgit v1.2.3 From 0c3c234b95fa7f1dfa19e1456a47ebafc300dd6b Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 4 Oct 2017 14:04:01 -0500 Subject: of: wrap accesses to device_node kobject In preparation to make kobject element in struct device_node optional, provide and use a macro to return the kobject pointer. The only user outside the DT core is the driver core. Acked-by: Greg Kroah-Hartman Tested-by: Nicolas Pitre Reviewed-by: Frank Rowand Acked-by: Grant Likely Signed-off-by: Rob Herring --- include/linux/of.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 7eb94b7fbcf3..2d685e769409 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -111,6 +111,8 @@ static inline void of_node_init(struct device_node *node) node->fwnode.ops = &of_fwnode_ops; } +#define of_node_kobj(n) (&(n)->kobj) + /* true when node is initialized */ static inline int of_node_is_initialized(struct device_node *node) { -- cgit v1.2.3 From b56b5528f5b3c3d47e7c0ca67318c45e980d93f0 Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Wed, 4 Oct 2017 14:09:40 -0500 Subject: of: make kobject and bin_attribute support configurable Having device_nodes be kobjects is only needed if sysfs or OF_DYNAMIC is enabled. Otherwise, having a kobject in struct device_node is unnecessary bloat in minimal kernel configurations. Likewise, bin_attribute is only needed in struct property when sysfs is enabled, so we can make it configurable too. Tested-by: Nicolas Pitre Reviewed-by: Frank Rowand Acked-by: Grant Likely Signed-off-by: Rob Herring --- include/linux/of.h | 22 ++++++++++------------ 1 file changed, 10 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 2d685e769409..7b0f17be7830 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -43,7 +43,9 @@ struct property { #if defined(CONFIG_OF_PROMTREE) unsigned int unique_id; #endif +#if defined(CONFIG_OF_KOBJ) struct bin_attribute attr; +#endif }; #if defined(CONFIG_SPARC) @@ -62,7 +64,9 @@ struct device_node { struct device_node *parent; struct device_node *child; struct device_node *sibling; +#if defined(CONFIG_OF_KOBJ) struct kobject kobj; +#endif unsigned long _flags; void *data; #if defined(CONFIG_SPARC) @@ -107,23 +111,17 @@ extern struct kobj_type of_node_ktype; extern const struct fwnode_operations of_fwnode_ops; static inline void of_node_init(struct device_node *node) { +#if defined(CONFIG_OF_KOBJ) kobject_init(&node->kobj, &of_node_ktype); +#endif node->fwnode.ops = &of_fwnode_ops; } +#if defined(CONFIG_OF_KOBJ) #define of_node_kobj(n) (&(n)->kobj) - -/* true when node is initialized */ -static inline int of_node_is_initialized(struct device_node *node) -{ - return node && node->kobj.state_initialized; -} - -/* true when node is attached (i.e. present on sysfs) */ -static inline int of_node_is_attached(struct device_node *node) -{ - return node && node->kobj.state_in_sysfs; -} +#else +#define of_node_kobj(n) NULL +#endif #ifdef CONFIG_OF_DYNAMIC extern struct device_node *of_node_get(struct device_node *node); -- cgit v1.2.3 From 62518c02f75ff9b19c07b53b8e13ed878211b795 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Mon, 16 Oct 2017 18:04:22 +0200 Subject: irqchip/irq-omap-intc: Remove omap3_init_irq() All mach-omap2 variants are device tree only now, so this function is dead code. Remove it. Signed-off-by: Ladislav Michl Signed-off-by: Thomas Gleixner Acked-by: Tony Lindgren Cc: Marc Zyngier Cc: linux-omap@vger.kernel.org Cc: Jason Cooper Link: https://lkml.kernel.org/r/20171016160422.uu2i7vvrgy7cc4aw@lenoch --- include/linux/irqchip/irq-omap-intc.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/irq-omap-intc.h b/include/linux/irqchip/irq-omap-intc.h index 2e3d1afeb674..f19ccee7749f 100644 --- a/include/linux/irqchip/irq-omap-intc.h +++ b/include/linux/irqchip/irq-omap-intc.h @@ -18,8 +18,6 @@ #ifndef __INCLUDE_LINUX_IRQCHIP_IRQ_OMAP_INTC_H #define __INCLUDE_LINUX_IRQCHIP_IRQ_OMAP_INTC_H -void omap3_init_irq(void); - int omap_irq_pending(void); void omap_intc_save_context(void); void omap_intc_restore_context(void); -- cgit v1.2.3 From 8fd0fbbe8888f295eb34172a7e47bf7d3a0a4687 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Oct 2017 09:45:29 +0200 Subject: perf/ftrace: Revert ("perf/ftrace: Fix double traces of perf on ftrace:function") Revert commit: 75e8387685f6 ("perf/ftrace: Fix double traces of perf on ftrace:function") The reason I instantly stumbled on that patch is that it only addresses the ftrace situation and doesn't mention the other _5_ places that use this interface. It doesn't explain why those don't have the problem and if not, why their solution doesn't work for ftrace. It doesn't, but this is just putting more duct tape on. Link: http://lkml.kernel.org/r/20171011080224.200565770@infradead.org Cc: Zhou Chengming Cc: Jiri Olsa Cc: Ingo Molnar Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/perf_event.h | 2 +- include/linux/trace_events.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e22f24ded6a..569d1b54e201 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -1184,7 +1184,7 @@ extern void perf_event_init(void); extern void perf_tp_event(u16 event_type, u64 count, void *record, int entry_size, struct pt_regs *regs, struct hlist_head *head, int rctx, - struct task_struct *task, struct perf_event *event); + struct task_struct *task); extern void perf_bp_event(struct perf_event *event, void *data); #ifndef perf_misc_flags diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2e0f22298fe9..a6349b76fd39 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -507,9 +507,9 @@ void perf_trace_run_bpf_submit(void *raw_data, int size, int rctx, static inline void perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, u64 count, struct pt_regs *regs, void *head, - struct task_struct *task, struct perf_event *event) + struct task_struct *task) { - perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); + perf_tp_event(type, count, raw_data, size, regs, head, rctx, task); } #endif -- cgit v1.2.3 From 466c81c45b650deca213fda3d0ec4761667379a9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 10 Oct 2017 17:15:47 +0200 Subject: perf/ftrace: Fix function trace events The function-trace <-> perf interface is a tad messed up. Where all the other trace <-> perf interfaces use a single trace hook registration and use per-cpu RCU based hlist to iterate the events, function-trace actually needs multiple hook registrations in order to minimize function entry patching when filters are present. The end result is that we iterate events both on the trace hook and on the hlist, which results in reporting events multiple times. Since function-trace cannot use the regular scheme, fix it the other way around, use singleton hlists. Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace_events.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index a6349b76fd39..ca4e67e466a7 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -173,6 +173,11 @@ enum trace_reg { TRACE_REG_PERF_UNREGISTER, TRACE_REG_PERF_OPEN, TRACE_REG_PERF_CLOSE, + /* + * These (ADD/DEL) use a 'boolean' return value, where 1 (true) means a + * custom action was taken and the default action is not to be + * performed. + */ TRACE_REG_PERF_ADD, TRACE_REG_PERF_DEL, #endif -- cgit v1.2.3 From b3a88803ac5b4bda26017b485c8722a8487fefb7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 11 Oct 2017 09:45:32 +0200 Subject: ftrace: Kill FTRACE_OPS_FL_PER_CPU The one and only user of FTRACE_OPS_FL_PER_CPU is gone, remove the lot. Link: http://lkml.kernel.org/r/20171011080224.372422809@infradead.org Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 83 +++++++++----------------------------------------- 1 file changed, 14 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 1f8545caa691..252e334e7b5f 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -102,10 +102,6 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); * ENABLED - set/unset when ftrace_ops is registered/unregistered * DYNAMIC - set when ftrace_ops is registered to denote dynamically * allocated ftrace_ops which need special care - * PER_CPU - set manualy by ftrace_ops user to denote the ftrace_ops - * could be controlled by following calls: - * ftrace_function_local_enable - * ftrace_function_local_disable * SAVE_REGS - The ftrace_ops wants regs saved at each function called * and passed to the callback. If this flag is set, but the * architecture does not support passing regs @@ -149,21 +145,20 @@ ftrace_func_t ftrace_ops_get_func(struct ftrace_ops *ops); enum { FTRACE_OPS_FL_ENABLED = 1 << 0, FTRACE_OPS_FL_DYNAMIC = 1 << 1, - FTRACE_OPS_FL_PER_CPU = 1 << 2, - FTRACE_OPS_FL_SAVE_REGS = 1 << 3, - FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 4, - FTRACE_OPS_FL_RECURSION_SAFE = 1 << 5, - FTRACE_OPS_FL_STUB = 1 << 6, - FTRACE_OPS_FL_INITIALIZED = 1 << 7, - FTRACE_OPS_FL_DELETED = 1 << 8, - FTRACE_OPS_FL_ADDING = 1 << 9, - FTRACE_OPS_FL_REMOVING = 1 << 10, - FTRACE_OPS_FL_MODIFYING = 1 << 11, - FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 12, - FTRACE_OPS_FL_IPMODIFY = 1 << 13, - FTRACE_OPS_FL_PID = 1 << 14, - FTRACE_OPS_FL_RCU = 1 << 15, - FTRACE_OPS_FL_TRACE_ARRAY = 1 << 16, + FTRACE_OPS_FL_SAVE_REGS = 1 << 2, + FTRACE_OPS_FL_SAVE_REGS_IF_SUPPORTED = 1 << 3, + FTRACE_OPS_FL_RECURSION_SAFE = 1 << 4, + FTRACE_OPS_FL_STUB = 1 << 5, + FTRACE_OPS_FL_INITIALIZED = 1 << 6, + FTRACE_OPS_FL_DELETED = 1 << 7, + FTRACE_OPS_FL_ADDING = 1 << 8, + FTRACE_OPS_FL_REMOVING = 1 << 9, + FTRACE_OPS_FL_MODIFYING = 1 << 10, + FTRACE_OPS_FL_ALLOC_TRAMP = 1 << 11, + FTRACE_OPS_FL_IPMODIFY = 1 << 12, + FTRACE_OPS_FL_PID = 1 << 13, + FTRACE_OPS_FL_RCU = 1 << 14, + FTRACE_OPS_FL_TRACE_ARRAY = 1 << 15, }; #ifdef CONFIG_DYNAMIC_FTRACE @@ -198,7 +193,6 @@ struct ftrace_ops { unsigned long flags; void *private; ftrace_func_t saved_func; - int __percpu *disabled; #ifdef CONFIG_DYNAMIC_FTRACE struct ftrace_ops_hash local_hash; struct ftrace_ops_hash *func_hash; @@ -230,55 +224,6 @@ int register_ftrace_function(struct ftrace_ops *ops); int unregister_ftrace_function(struct ftrace_ops *ops); void clear_ftrace_function(void); -/** - * ftrace_function_local_enable - enable ftrace_ops on current cpu - * - * This function enables tracing on current cpu by decreasing - * the per cpu control variable. - * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption - * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. - */ -static inline void ftrace_function_local_enable(struct ftrace_ops *ops) -{ - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) - return; - - (*this_cpu_ptr(ops->disabled))--; -} - -/** - * ftrace_function_local_disable - disable ftrace_ops on current cpu - * - * This function disables tracing on current cpu by increasing - * the per cpu control variable. - * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption - * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. - */ -static inline void ftrace_function_local_disable(struct ftrace_ops *ops) -{ - if (WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU))) - return; - - (*this_cpu_ptr(ops->disabled))++; -} - -/** - * ftrace_function_local_disabled - returns ftrace_ops disabled value - * on current cpu - * - * This function returns value of ftrace_ops::disabled on current cpu. - * It must be called with preemption disabled and only on ftrace_ops - * registered with FTRACE_OPS_FL_PER_CPU. If called without preemption - * disabled, this_cpu_ptr will complain when CONFIG_DEBUG_PREEMPT is enabled. - */ -static inline int ftrace_function_local_disabled(struct ftrace_ops *ops) -{ - WARN_ON_ONCE(!(ops->flags & FTRACE_OPS_FL_PER_CPU)); - return *this_cpu_ptr(ops->disabled); -} - extern void ftrace_stub(unsigned long a0, unsigned long a1, struct ftrace_ops *op, struct pt_regs *regs); -- cgit v1.2.3 From cbe25ce37d6c2623b5ac09128987e98848a54c6c Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Sat, 14 Oct 2017 17:43:15 +0200 Subject: ACPI / PM: Combine device suspend routines On top of a previous change getting rid of the PM QoS flag PM_QOS_FLAG_REMOTE_WAKEUP, combine two ACPI device suspend routines, acpi_dev_runtime_suspend() and acpi_dev_suspend_late(), into one, acpi_dev_suspend(), to eliminate some code duplication. It also avoids enabling wakeup for devices handled by the ACPI LPSS middle layer on driver removal. Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson --- include/linux/acpi.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 2b1738f840ab..0ada2a948b44 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -864,7 +864,7 @@ static inline void arch_reserve_mem_area(acpi_physical_address addr, #endif #if defined(CONFIG_ACPI) && defined(CONFIG_PM) -int acpi_dev_runtime_suspend(struct device *dev); +int acpi_dev_suspend(struct device *dev, bool wakeup); int acpi_dev_resume(struct device *dev); int acpi_subsys_runtime_suspend(struct device *dev); int acpi_subsys_runtime_resume(struct device *dev); @@ -889,7 +889,6 @@ int acpi_subsys_resume_early(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); #else -static inline int acpi_dev_suspend_late(struct device *dev) { return 0; } static inline int acpi_dev_resume_early(struct device *dev) { return 0; } static inline int acpi_subsys_prepare(struct device *dev) { return 0; } static inline void acpi_subsys_complete(struct device *dev) {} -- cgit v1.2.3 From 023b7b07ccb57317bee4d971be546cc4469f4e7e Mon Sep 17 00:00:00 2001 From: Arvind Yadav Date: Thu, 31 Aug 2017 11:30:45 +0530 Subject: thermal : Remove const to make same prototype Here, prototype of thermal_zone_device_register is not matching with static inline thermal_zone_device_register. One is using const thermal_zone_params. Other is using non-const. Signed-off-by: Arvind Yadav Signed-off-by: Zhang Rui --- include/linux/thermal.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index fd5b959c753c..8c5302374eaa 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -488,7 +488,7 @@ static inline int power_actor_set_power(struct thermal_cooling_device *cdev, static inline struct thermal_zone_device *thermal_zone_device_register( const char *type, int trips, int mask, void *devdata, struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, + struct thermal_zone_params *tzp, int passive_delay, int polling_delay) { return ERR_PTR(-ENODEV); } static inline void thermal_zone_device_unregister( -- cgit v1.2.3 From 0d96a4f6a03797b73bee465cada39133b7972e8d Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Fri, 6 Oct 2017 13:45:53 +0200 Subject: memory: omap-gpmc: Drop gpmc_status This field is no longer used, drop it. Signed-off-by: Ladislav Michl Signed-off-by: Roger Quadros --- include/linux/platform_data/mtd-nand-omap2.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nand-omap2.h b/include/linux/platform_data/mtd-nand-omap2.h index 17d57a18bac5..25e267f1970c 100644 --- a/include/linux/platform_data/mtd-nand-omap2.h +++ b/include/linux/platform_data/mtd-nand-omap2.h @@ -63,8 +63,6 @@ struct gpmc_nand_regs { void __iomem *gpmc_bch_result4[GPMC_BCH_NUM_REMAINDER]; void __iomem *gpmc_bch_result5[GPMC_BCH_NUM_REMAINDER]; void __iomem *gpmc_bch_result6[GPMC_BCH_NUM_REMAINDER]; - /* Deprecated. Do not use */ - void __iomem *gpmc_status; }; struct omap_nand_platform_data { -- cgit v1.2.3 From 5a6cd6de76ae78b651e7c36eba8b1da465d65f06 Mon Sep 17 00:00:00 2001 From: Alan Brady Date: Thu, 5 Oct 2017 14:53:40 -0700 Subject: ethtool: add ethtool_intersect_link_masks This function provides a way to intersect two link masks together to find the common ground between them. For example in i40e, the driver first generates link masks for what is supported by the PHY type. The driver then gets the link masks for what the NVM supports. The resulting intersection between them yields what can truly be supported. Signed-off-by: Alan Brady Tested-by: Andrew Bowers Signed-off-by: Jeff Kirsher --- include/linux/ethtool.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ethtool.h b/include/linux/ethtool.h index 4587a4c36923..c77fa3529e15 100644 --- a/include/linux/ethtool.h +++ b/include/linux/ethtool.h @@ -163,6 +163,16 @@ extern int __ethtool_get_link_ksettings(struct net_device *dev, struct ethtool_link_ksettings *link_ksettings); +/** + * ethtool_intersect_link_masks - Given two link masks, AND them together + * @dst: first mask and where result is stored + * @src: second mask to intersect with + * + * Given two link mode masks, AND them together and save the result in dst. + */ +void ethtool_intersect_link_masks(struct ethtool_link_ksettings *dst, + struct ethtool_link_ksettings *src); + void ethtool_convert_legacy_u32_to_link_mode(unsigned long *dst, u32 legacy_u32); -- cgit v1.2.3 From 0290c4ca2536a35e55c53cfb9058465b1f987b17 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Tue, 17 Oct 2017 16:36:23 -0700 Subject: of: overlay: rename identifiers to more reflect what they do This patch is aimed primarily at drivers/of/overlay.c, but those changes also have a small impact in a few other files. overlay.c is difficult to read and maintain. Improve readability: - Rename functions, types and variables to better reflect what they do and to be consistent with names in other places, such as the device tree overlay FDT (flattened device tree), and make the algorithms more clear - Use the same names consistently throughout the file - Update comments for name changes - Fix incorrect comments This patch is intended to not introduce any functional change. Signed-off-by: Frank Rowand Signed-off-by: Rob Herring --- include/linux/of.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 7b0f17be7830..7569e9cc45de 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1312,26 +1312,26 @@ struct of_overlay_notify_data { #ifdef CONFIG_OF_OVERLAY /* ID based overlays; the API for external users */ -int of_overlay_create(struct device_node *tree); -int of_overlay_destroy(int id); -int of_overlay_destroy_all(void); +int of_overlay_apply(struct device_node *tree); +int of_overlay_remove(int id); +int of_overlay_remove_all(void); int of_overlay_notifier_register(struct notifier_block *nb); int of_overlay_notifier_unregister(struct notifier_block *nb); #else -static inline int of_overlay_create(struct device_node *tree) +static inline int of_overlay_apply(struct device_node *tree) { return -ENOTSUPP; } -static inline int of_overlay_destroy(int id) +static inline int of_overlay_remove(int id) { return -ENOTSUPP; } -static inline int of_overlay_destroy_all(void) +static inline int of_overlay_remove_all(void) { return -ENOTSUPP; } -- cgit v1.2.3 From 24789c5ce5a373dd55640f9cd79117fcc3ccc46d Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Tue, 17 Oct 2017 16:36:26 -0700 Subject: of: overlay: detect cases where device tree may become corrupt When an attempt to apply an overlay changeset fails, an effort is made to revert any partial application of the changeset. When an attempt to remove an overlay changeset fails, an effort is made to re-apply any partial reversion of the changeset. The existing code does not check for failure to recover a failed overlay changeset application or overlay changeset revert. Add the missing checks and flag the devicetree as corrupt if the state of the devicetree can not be determined. Improve and expand the returned errors to more fully reflect the result of the effort to undo the partial effects of a failed attempt to apply or remove an overlay changeset. If the device tree might be corrupt, do not allow further attempts to apply or remove an overlay changeset. When creating an overlay changeset from an overlay device tree, add some additional warnings if the state of the overlay device tree is not as expected. Signed-off-by: Frank Rowand Signed-off-by: Rob Herring --- include/linux/of.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 7569e9cc45de..96edda95c6b0 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1298,7 +1298,7 @@ static inline bool of_device_is_system_power_controller(const struct device_node */ enum of_overlay_notify_action { - OF_OVERLAY_PRE_APPLY, + OF_OVERLAY_PRE_APPLY = 0, OF_OVERLAY_POST_APPLY, OF_OVERLAY_PRE_REMOVE, OF_OVERLAY_POST_REMOVE, @@ -1312,8 +1312,8 @@ struct of_overlay_notify_data { #ifdef CONFIG_OF_OVERLAY /* ID based overlays; the API for external users */ -int of_overlay_apply(struct device_node *tree); -int of_overlay_remove(int id); +int of_overlay_apply(struct device_node *tree, int *ovcs_id); +int of_overlay_remove(int *ovcs_id); int of_overlay_remove_all(void); int of_overlay_notifier_register(struct notifier_block *nb); @@ -1321,12 +1321,12 @@ int of_overlay_notifier_unregister(struct notifier_block *nb); #else -static inline int of_overlay_apply(struct device_node *tree) +static inline int of_overlay_apply(struct device_node *tree, int *ovcs_id) { return -ENOTSUPP; } -static inline int of_overlay_remove(int id) +static inline int of_overlay_remove(int *ovcs_id) { return -ENOTSUPP; } -- cgit v1.2.3 From f948d6d8b792bb90041edc12eac35faf83030994 Mon Sep 17 00:00:00 2001 From: Frank Rowand Date: Tue, 17 Oct 2017 16:36:29 -0700 Subject: of: overlay: avoid race condition between applying multiple overlays The process of applying an overlay consists of: - unflatten an overlay FDT (flattened device tree) into an EDT (expanded device tree) - fixup the phandle values in the overlay EDT to fit in a range above the phandle values in the live device tree - create the overlay changeset to reflect the contents of the overlay EDT - apply the overlay changeset, to modify the live device tree, potentially changing the maximum phandle value in the live device tree There is currently no protection against two overlay applies concurrently determining what range of phandle values are in use in the live device tree, and subsequently changing that range. Add a mutex to prevent multiple overlay applies from occurring simultaneously. Move of_resolve_phandles() into of_overlay_apply() so that it does not have to be duplicated by each caller of of_overlay_apply(). The test in of_resolve_phandles() that the overlay tree is detached is temporarily disabled so that old style overlay unittests do not fail. Signed-off-by: Frank Rowand Signed-off-by: Rob Herring --- include/linux/of.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 96edda95c6b0..ef4c9ff5955a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -1279,9 +1279,6 @@ static inline int of_reconfig_get_state_change(unsigned long action, } #endif /* CONFIG_OF_DYNAMIC */ -/* CONFIG_OF_RESOLVE api */ -extern int of_resolve_phandles(struct device_node *tree); - /** * of_device_is_system_power_controller - Tells if system-power-controller is found for device_node * @np: Pointer to the given device_node -- cgit v1.2.3 From 6710e1126934d8b4372b4d2f9ae1646cd3f151bf Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:28 +0200 Subject: bpf: introduce new bpf cpu map type BPF_MAP_TYPE_CPUMAP The 'cpumap' is primarily used as a backend map for XDP BPF helper call bpf_redirect_map() and XDP_REDIRECT action, like 'devmap'. This patch implement the main part of the map. It is not connected to the XDP redirect system yet, and no SKB allocation are done yet. The main concern in this patch is to ensure the datapath can run without any locking. This adds complexity to the setup and tear-down procedure, which assumptions are extra carefully documented in the code comments. V2: - make sure array isn't larger than NR_CPUS - make sure CPUs added is a valid possible CPU V3: fix nitpicks from Jakub Kicinski V5: - Restrict map allocation to root / CAP_SYS_ADMIN - WARN_ON_ONCE if queue is not empty on tear-down - Return -EPERM on memlock limit instead of -ENOMEM - Error code in __cpu_map_entry_alloc() also handle ptr_ring_cleanup() - Moved cpu_map_enqueue() to next patch V6: all notice by Daniel Borkmann - Fix err return code in cpu_map_alloc() introduced in V5 - Move cpu_possible() check after max_entries boundary check - Forbid usage initially in check_map_func_compatibility() V7: - Fix alloc error path spotted by Daniel Borkmann - Did stress test adding+removing CPUs from the map concurrently - Fixed refcnt issue on cpu_map_entry, kthread started too soon - Make sure packets are flushed during tear-down, involved use of rcu_barrier() and kthread_run only exit after queue is empty - Fix alloc error path in __cpu_map_entry_alloc() for ptr_ring V8: - Nitpicking comments and gramma by Edward Cree - Fix missing semi-colon introduced in V7 due to rebasing - Move struct bpf_cpu_map_entry members cpu+map_id to tracepoint patch Signed-off-by: Jesper Dangaard Brouer Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 6f1a567667b8..814c1081a4a9 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -41,4 +41,5 @@ BPF_MAP_TYPE(BPF_MAP_TYPE_DEVMAP, dev_map_ops) #ifdef CONFIG_STREAM_PARSER BPF_MAP_TYPE(BPF_MAP_TYPE_SOCKMAP, sock_map_ops) #endif +BPF_MAP_TYPE(BPF_MAP_TYPE_CPUMAP, cpu_map_ops) #endif -- cgit v1.2.3 From 9c270af37bb62e708e3e4415d653ce73e713df02 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:34 +0200 Subject: bpf: XDP_REDIRECT enable use of cpumap This patch connects cpumap to the xdp_do_redirect_map infrastructure. Still no SKB allocation are done yet. The XDP frames are transferred to the other CPU, but they are simply refcnt decremented on the remote CPU. This served as a good benchmark for measuring the overhead of remote refcnt decrement. If driver page recycle cache is not efficient then this, exposes a bottleneck in the page allocator. A shout-out to MST's ptr_ring, which is the secret behind is being so efficient to transfer memory pointers between CPUs, without constantly bouncing cache-lines between CPUs. V3: Handle !CONFIG_BPF_SYSCALL pointed out by kbuild test robot. V4: Make Generic-XDP aware of cpumap type, but don't allow redirect yet, as implementation require a separate upstream discussion. V5: - Fix a maybe-uninitialized pointed out by kbuild test robot. - Restrict bpf-prog side access to cpumap, open when use-cases appear - Implement cpu_map_enqueue() as a more simple void pointer enqueue V6: - Allow cpumap type for usage in helper bpf_redirect_map, general bpf-prog side restriction moved to earlier patch. Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/bpf.h | 31 ++++++++++++++++++++++++++++++- 1 file changed, 30 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 4373125de1f3..6d4dd844828a 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -355,6 +355,13 @@ struct net_device *__dev_map_lookup_elem(struct bpf_map *map, u32 key); void __dev_map_insert_ctx(struct bpf_map *map, u32 index); void __dev_map_flush(struct bpf_map *map); +struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key); +void __cpu_map_insert_ctx(struct bpf_map *map, u32 index); +void __cpu_map_flush(struct bpf_map *map); +struct xdp_buff; +int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, struct xdp_buff *xdp, + struct net_device *dev_rx); + /* Return map's numa specified by userspace */ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) { @@ -362,7 +369,7 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) attr->numa_node : NUMA_NO_NODE; } -#else +#else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { return ERR_PTR(-EOPNOTSUPP); @@ -425,6 +432,28 @@ static inline void __dev_map_insert_ctx(struct bpf_map *map, u32 index) static inline void __dev_map_flush(struct bpf_map *map) { } + +static inline +struct bpf_cpu_map_entry *__cpu_map_lookup_elem(struct bpf_map *map, u32 key) +{ + return NULL; +} + +static inline void __cpu_map_insert_ctx(struct bpf_map *map, u32 index) +{ +} + +static inline void __cpu_map_flush(struct bpf_map *map) +{ +} + +struct xdp_buff; +static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, + struct xdp_buff *xdp, + struct net_device *dev_rx) +{ + return 0; +} #endif /* CONFIG_BPF_SYSCALL */ #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) -- cgit v1.2.3 From 1c601d829ab0d7ac3ac44853f83db2206afe67fc Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Mon, 16 Oct 2017 12:19:39 +0200 Subject: bpf: cpumap xdp_buff to skb conversion and allocation This patch makes cpumap functional, by adding SKB allocation and invoking the network stack on the dequeuing CPU. For constructing the SKB on the remote CPU, the xdp_buff in converted into a struct xdp_pkt, and it mapped into the top headroom of the packet, to avoid allocating separate mem. For now, struct xdp_pkt is just a cpumap internal data structure, with info carried between enqueue to dequeue. If a driver doesn't have enough headroom it is simply dropped, with return code -EOVERFLOW. This will be picked up the xdp tracepoint infrastructure, to allow users to catch this. V2: take into account xdp->data_meta V4: - Drop busypoll tricks, keeping it more simple. - Skip RPS and Generic-XDP-recursive-reinjection, suggested by Alexei V5: correct RCU read protection around __netif_receive_skb_core. V6: Setting TASK_RUNNING vs TASK_INTERRUPTIBLE based on talk with Rik van Riel Signed-off-by: Jesper Dangaard Brouer Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 31bb3010c69b..bf014afcb914 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3260,6 +3260,7 @@ int do_xdp_generic(struct bpf_prog *xdp_prog, struct sk_buff *skb); int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); int netif_receive_skb(struct sk_buff *skb); +int netif_receive_skb_core(struct sk_buff *skb); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); -- cgit v1.2.3 From 7de16e3a35578f4f5accc6f5f23970310483d0a2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 16:40:53 -0700 Subject: bpf: split verifier and program ops struct bpf_verifier_ops contains both verifier ops and operations used later during program's lifetime (test_run). Split the runtime ops into a different structure. BPF_PROG_TYPE() will now append ## _prog_ops or ## _verifier_ops to the names. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 15 ++++++++++----- include/linux/bpf_types.h | 28 ++++++++++++++-------------- 2 files changed, 24 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 6d4dd844828a..e1fba5504ca5 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -157,6 +157,11 @@ bpf_ctx_record_field_size(struct bpf_insn_access_aux *aux, u32 size) aux->ctx_field_size = size; } +struct bpf_prog_ops { + int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, + union bpf_attr __user *uattr); +}; + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -172,8 +177,6 @@ struct bpf_verifier_ops { const struct bpf_insn *src, struct bpf_insn *dst, struct bpf_prog *prog, u32 *target_size); - int (*test_run)(struct bpf_prog *prog, const union bpf_attr *kattr, - union bpf_attr __user *uattr); }; struct bpf_prog_aux { @@ -184,7 +187,8 @@ struct bpf_prog_aux { u32 id; struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; - const struct bpf_verifier_ops *ops; + const struct bpf_prog_ops *ops; + const struct bpf_verifier_ops *vops; struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; @@ -279,8 +283,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); -#define BPF_PROG_TYPE(_id, _ops) \ - extern const struct bpf_verifier_ops _ops; +#define BPF_PROG_TYPE(_id, _name) \ + extern const struct bpf_prog_ops _name ## _prog_ops; \ + extern const struct bpf_verifier_ops _name ## _verifier_ops; #define BPF_MAP_TYPE(_id, _ops) \ extern const struct bpf_map_ops _ops; #include diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 814c1081a4a9..36418ad43245 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -1,22 +1,22 @@ /* internal file - do not include directly */ #ifdef CONFIG_NET -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCKET_FILTER, sk_filter) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_CLS, tc_cls_act) +BPF_PROG_TYPE(BPF_PROG_TYPE_SCHED_ACT, tc_cls_act) +BPF_PROG_TYPE(BPF_PROG_TYPE_XDP, xdp) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SKB, cg_skb) +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_SOCK, cg_sock) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_IN, lwt_inout) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_OUT, lwt_inout) +BPF_PROG_TYPE(BPF_PROG_TYPE_LWT_XMIT, lwt_xmit) +BPF_PROG_TYPE(BPF_PROG_TYPE_SOCK_OPS, sock_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_SK_SKB, sk_skb) #endif #ifdef CONFIG_BPF_EVENTS -BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint_prog_ops) -BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event_prog_ops) +BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) +BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) +BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) #endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) -- cgit v1.2.3 From 00176a34d9e27ab1e77db75fe13abc005cffe0ca Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 16:40:54 -0700 Subject: bpf: remove the verifier ops from program structure Since the verifier ops don't have to be associated with the program for its entire lifetime we can move it to verifier's struct bpf_verifier_env. Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 - include/linux/bpf_verifier.h | 1 + 2 files changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e1fba5504ca5..cf91977e8719 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -188,7 +188,6 @@ struct bpf_prog_aux { struct latch_tree_node ksym_tnode; struct list_head ksym_lnode; const struct bpf_prog_ops *ops; - const struct bpf_verifier_ops *vops; struct bpf_map **used_maps; struct bpf_prog *prog; struct user_struct *user; diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index f00ef751c1c5..feeaea93d959 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -141,6 +141,7 @@ struct bpf_ext_analyzer_ops { */ struct bpf_verifier_env { struct bpf_prog *prog; /* eBPF program being verified */ + const struct bpf_verifier_ops *ops; struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ -- cgit v1.2.3 From 4f9218aaf8a463f76cac40aa08d859d065f8cc9e Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 16 Oct 2017 16:40:55 -0700 Subject: bpf: move knowledge about post-translation offsets out of verifier Use the fact that verifier ops are now separate from program ops to define a separate set of callbacks for verification of already translated programs. Since we expect the analyzer ops to be defined only for a small subset of all program types initialize their array by hand (don't use linux/bpf_types.h). Signed-off-by: Jakub Kicinski Acked-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index cf91977e8719..d67ccdc0099f 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -291,6 +291,9 @@ DECLARE_PER_CPU(int, bpf_prog_active); #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; +extern const struct bpf_verifier_ops xdp_analyzer_ops; + struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); -- cgit v1.2.3 From 448956d619a35538e71e8a2d8f1e4bc274037767 Mon Sep 17 00:00:00 2001 From: Frank Binns Date: Wed, 18 Oct 2017 14:30:23 +0100 Subject: dma-fence: remove duplicate word in comment Signed-off-by: Frank Binns Reviewed-by: Chris Wilson Signed-off-by: Daniel Vetter Link: https://patchwork.freedesktop.org/patch/msgid/1508333423-5394-1-git-send-email-frank.binns@imgtec.com --- include/linux/dma-fence.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dma-fence.h b/include/linux/dma-fence.h index ca974224d92e..efdabbb64e3c 100644 --- a/include/linux/dma-fence.h +++ b/include/linux/dma-fence.h @@ -128,7 +128,7 @@ struct dma_fence_cb { * implementation know that there is another driver waiting on * the signal (ie. hw->sw case). * - * This function can be called called from atomic context, but not + * This function can be called from atomic context, but not * from irq context, so normal spinlocks can be used. * * A return value of false indicates the fence already passed, -- cgit v1.2.3 From 149e10f8ff71439014dff97987e90e87e2684a16 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 12:53:06 +0300 Subject: block: introduce blk_mq_tagset_iter Iterator helper to apply a function on all the tags in a given tagset. export it as it will be used outside the block layer later on. Reviewed-by: Bart Van Assche Reviewed-by: Jens Axboe Reviewed-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 50c6485cb04f..6bc29f73a9aa 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -259,6 +259,8 @@ void blk_freeze_queue_start(struct request_queue *q); void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); +int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data, + int (reinit_request)(void *, struct request *)); int blk_mq_reinit_tagset(struct blk_mq_tag_set *set, int (reinit_request)(void *, struct request *)); -- cgit v1.2.3 From dab7487bdf63c6f2d70aac604494d023b189a9fd Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Wed, 11 Oct 2017 12:53:08 +0300 Subject: block: remove blk_mq_reinit_tagset No callers left. Reviewed-by: Jens Axboe Reviewed-by: Bart Van Assche Reviewed-by: Max Gurtovoy Reviewed-by: Johannes Thumshirn Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/blk-mq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 6bc29f73a9aa..cfd64e500d82 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -261,8 +261,6 @@ int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); int blk_mq_tagset_iter(struct blk_mq_tag_set *set, void *data, int (reinit_request)(void *, struct request *)); -int blk_mq_reinit_tagset(struct blk_mq_tag_set *set, - int (reinit_request)(void *, struct request *)); int blk_mq_map_queues(struct blk_mq_tag_set *set); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); -- cgit v1.2.3 From 734f0d241d2b4e47383bd0d16e21e06f6cb8d2c3 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Mon, 9 Oct 2017 12:15:34 -0700 Subject: fscrypt: clean up include file mess Filesystems have to include different header files based on whether they are compiled with encryption support or not. That's nasty and messy. Instead, rationalise the headers so we have a single include fscrypt.h and let it decide what internal implementation to include based on the __FS_HAS_ENCRYPTION define. Filesystems set __FS_HAS_ENCRYPTION to 1 before including linux/fscrypt.h if they are built with encryption support. Otherwise, they must set __FS_HAS_ENCRYPTION to 0. Add guards to prevent fscrypt_supp.h and fscrypt_notsupp.h from being directly included by filesystems. Signed-off-by: Dave Chinner [EB: use 1 and 0 rather than defined/undefined] Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 156 ++++++++++++++++++++++++++++++++++++++++ include/linux/fscrypt_common.h | 141 ------------------------------------ include/linux/fscrypt_notsupp.h | 7 +- include/linux/fscrypt_supp.h | 7 +- 4 files changed, 165 insertions(+), 146 deletions(-) create mode 100644 include/linux/fscrypt.h delete mode 100644 include/linux/fscrypt_common.h (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h new file mode 100644 index 000000000000..bfc962e62078 --- /dev/null +++ b/include/linux/fscrypt.h @@ -0,0 +1,156 @@ +/* + * fscrypt.h: declarations for per-file encryption + * + * Filesystems that implement per-file encryption include this header + * file with the __FS_HAS_ENCRYPTION set according to whether that filesystem + * is being built with encryption support or not. + * + * Copyright (C) 2015, Google, Inc. + * + * Written by Michael Halcrow, 2015. + * Modified by Jaegeuk Kim, 2015. + */ +#ifndef _LINUX_FSCRYPT_H +#define _LINUX_FSCRYPT_H + +#include +#include +#include +#include +#include +#include +#include + +#define FS_CRYPTO_BLOCK_SIZE 16 + +struct fscrypt_info; + +struct fscrypt_ctx { + union { + struct { + struct page *bounce_page; /* Ciphertext page */ + struct page *control_page; /* Original page */ + } w; + struct { + struct bio *bio; + struct work_struct work; + } r; + struct list_head free_list; /* Free list */ + }; + u8 flags; /* Flags */ +}; + +/** + * For encrypted symlinks, the ciphertext length is stored at the beginning + * of the string in little-endian format. + */ +struct fscrypt_symlink_data { + __le16 len; + char encrypted_path[1]; +} __packed; + +struct fscrypt_str { + unsigned char *name; + u32 len; +}; + +struct fscrypt_name { + const struct qstr *usr_fname; + struct fscrypt_str disk_name; + u32 hash; + u32 minor_hash; + struct fscrypt_str crypto_buf; +}; + +#define FSTR_INIT(n, l) { .name = n, .len = l } +#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) +#define fname_name(p) ((p)->disk_name.name) +#define fname_len(p) ((p)->disk_name.len) + +/* + * fscrypt superblock flags + */ +#define FS_CFLG_OWN_PAGES (1U << 1) + +/* + * crypto opertions for filesystems + */ +struct fscrypt_operations { + unsigned int flags; + const char *key_prefix; + int (*get_context)(struct inode *, void *, size_t); + int (*set_context)(struct inode *, const void *, size_t, void *); + bool (*dummy_context)(struct inode *); + bool (*is_encrypted)(struct inode *); + bool (*empty_dir)(struct inode *); + unsigned (*max_namelen)(struct inode *); +}; + +/* Maximum value for the third parameter of fscrypt_operations.set_context(). */ +#define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 + +static inline bool fscrypt_dummy_context_enabled(struct inode *inode) +{ + if (inode->i_sb->s_cop->dummy_context && + inode->i_sb->s_cop->dummy_context(inode)) + return true; + return false; +} + +static inline bool fscrypt_valid_enc_modes(u32 contents_mode, + u32 filenames_mode) +{ + if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && + filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) + return true; + + if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && + filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) + return true; + + return false; +} + +static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) +{ + if (str->len == 1 && str->name[0] == '.') + return true; + + if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') + return true; + + return false; +} + +#if __FS_HAS_ENCRYPTION + +static inline struct page *fscrypt_control_page(struct page *page) +{ + return ((struct fscrypt_ctx *)page_private(page))->w.control_page; +} + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return (inode->i_crypt_info != NULL); +} + +#include + +#else /* !__FS_HAS_ENCRYPTION */ + +static inline struct page *fscrypt_control_page(struct page *page) +{ + WARN_ON_ONCE(1); + return ERR_PTR(-EINVAL); +} + +static inline bool fscrypt_has_encryption_key(const struct inode *inode) +{ + return 0; +} + +#include +#endif /* __FS_HAS_ENCRYPTION */ + + +#endif /* _LINUX_FSCRYPT_H */ diff --git a/include/linux/fscrypt_common.h b/include/linux/fscrypt_common.h deleted file mode 100644 index 97f738628b36..000000000000 --- a/include/linux/fscrypt_common.h +++ /dev/null @@ -1,141 +0,0 @@ -/* - * fscrypt_common.h: common declarations for per-file encryption - * - * Copyright (C) 2015, Google, Inc. - * - * Written by Michael Halcrow, 2015. - * Modified by Jaegeuk Kim, 2015. - */ - -#ifndef _LINUX_FSCRYPT_COMMON_H -#define _LINUX_FSCRYPT_COMMON_H - -#include -#include -#include -#include -#include -#include -#include - -#define FS_CRYPTO_BLOCK_SIZE 16 - -struct fscrypt_info; - -struct fscrypt_ctx { - union { - struct { - struct page *bounce_page; /* Ciphertext page */ - struct page *control_page; /* Original page */ - } w; - struct { - struct bio *bio; - struct work_struct work; - } r; - struct list_head free_list; /* Free list */ - }; - u8 flags; /* Flags */ -}; - -/** - * For encrypted symlinks, the ciphertext length is stored at the beginning - * of the string in little-endian format. - */ -struct fscrypt_symlink_data { - __le16 len; - char encrypted_path[1]; -} __packed; - -struct fscrypt_str { - unsigned char *name; - u32 len; -}; - -struct fscrypt_name { - const struct qstr *usr_fname; - struct fscrypt_str disk_name; - u32 hash; - u32 minor_hash; - struct fscrypt_str crypto_buf; -}; - -#define FSTR_INIT(n, l) { .name = n, .len = l } -#define FSTR_TO_QSTR(f) QSTR_INIT((f)->name, (f)->len) -#define fname_name(p) ((p)->disk_name.name) -#define fname_len(p) ((p)->disk_name.len) - -/* - * fscrypt superblock flags - */ -#define FS_CFLG_OWN_PAGES (1U << 1) - -/* - * crypto opertions for filesystems - */ -struct fscrypt_operations { - unsigned int flags; - const char *key_prefix; - int (*get_context)(struct inode *, void *, size_t); - int (*set_context)(struct inode *, const void *, size_t, void *); - bool (*dummy_context)(struct inode *); - bool (*is_encrypted)(struct inode *); - bool (*empty_dir)(struct inode *); - unsigned (*max_namelen)(struct inode *); -}; - -/* Maximum value for the third parameter of fscrypt_operations.set_context(). */ -#define FSCRYPT_SET_CONTEXT_MAX_SIZE 28 - -static inline bool fscrypt_dummy_context_enabled(struct inode *inode) -{ - if (inode->i_sb->s_cop->dummy_context && - inode->i_sb->s_cop->dummy_context(inode)) - return true; - return false; -} - -static inline bool fscrypt_valid_enc_modes(u32 contents_mode, - u32 filenames_mode) -{ - if (contents_mode == FS_ENCRYPTION_MODE_AES_128_CBC && - filenames_mode == FS_ENCRYPTION_MODE_AES_128_CTS) - return true; - - if (contents_mode == FS_ENCRYPTION_MODE_AES_256_XTS && - filenames_mode == FS_ENCRYPTION_MODE_AES_256_CTS) - return true; - - return false; -} - -static inline bool fscrypt_is_dot_dotdot(const struct qstr *str) -{ - if (str->len == 1 && str->name[0] == '.') - return true; - - if (str->len == 2 && str->name[0] == '.' && str->name[1] == '.') - return true; - - return false; -} - -static inline struct page *fscrypt_control_page(struct page *page) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return ((struct fscrypt_ctx *)page_private(page))->w.control_page; -#else - WARN_ON_ONCE(1); - return ERR_PTR(-EINVAL); -#endif -} - -static inline int fscrypt_has_encryption_key(const struct inode *inode) -{ -#if IS_ENABLED(CONFIG_FS_ENCRYPTION) - return (inode->i_crypt_info != NULL); -#else - return 0; -#endif -} - -#endif /* _LINUX_FSCRYPT_COMMON_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index ec406aed2f2f..2d0b6960831e 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -3,13 +3,16 @@ * * This stubs out the fscrypt functions for filesystems configured without * encryption support. + * + * Do not include this file directly. Use fscrypt.h instead! */ +#ifndef _LINUX_FSCRYPT_H +#error "Incorrect include of linux/fscrypt_notsupp.h!" +#endif #ifndef _LINUX_FSCRYPT_NOTSUPP_H #define _LINUX_FSCRYPT_NOTSUPP_H -#include - /* crypto.c */ static inline struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *inode, gfp_t gfp_flags) diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 32e2fcf13b01..5a90e5ef4687 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -1,14 +1,15 @@ /* * fscrypt_supp.h * - * This is included by filesystems configured with encryption support. + * Do not include this file directly. Use fscrypt.h instead! */ +#ifndef _LINUX_FSCRYPT_H +#error "Incorrect include of linux/fscrypt_supp.h!" +#endif #ifndef _LINUX_FSCRYPT_SUPP_H #define _LINUX_FSCRYPT_SUPP_H -#include - /* crypto.c */ extern struct kmem_cache *fscrypt_info_cachep; extern struct fscrypt_ctx *fscrypt_get_ctx(const struct inode *, gfp_t); -- cgit v1.2.3 From 2ee6a576be56427209d370d8a511d49340c84139 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:15:35 -0700 Subject: fs, fscrypt: add an S_ENCRYPTED inode flag Introduce a flag S_ENCRYPTED which can be set in ->i_flags to indicate that the inode is encrypted using the fscrypt (fs/crypto/) mechanism. Checking this flag will give the same information that inode->i_sb->s_cop->is_encrypted(inode) currently does, but will be more efficient. This will be useful for adding higher-level helper functions for filesystems to use. For example we'll be able to replace this: if (ext4_encrypted_inode(inode)) { ret = fscrypt_get_encryption_info(inode); if (ret) return ret; if (!fscrypt_has_encryption_key(inode)) return -ENOKEY; } with this: ret = fscrypt_require_key(inode); if (ret) return ret; ... since we'll be able to retain the fast path for unencrypted files as a single flag check, using an inline function. This wasn't possible before because we'd have had to frequently call through the ->i_sb->s_cop->is_encrypted function pointer, even when the encryption support was disabled or not being used. Note: we don't define S_ENCRYPTED to 0 if CONFIG_FS_ENCRYPTION is disabled because we want to continue to return an error if an encrypted file is accessed without encryption support, rather than pretending that it is unencrypted. Reviewed-by: Chao Yu Acked-by: Dave Chinner Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 339e73742e73..055d2fbf8eca 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1853,6 +1853,7 @@ struct super_operations { #else #define S_DAX 0 /* Make all the DAX code disappear */ #endif +#define S_ENCRYPTED 16384 /* Encrypted file (using fs/crypto/) */ /* * Note that nosuid etc flags are inode-specific: setting some file-system @@ -1892,6 +1893,7 @@ static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags #define IS_AUTOMOUNT(inode) ((inode)->i_flags & S_AUTOMOUNT) #define IS_NOSEC(inode) ((inode)->i_flags & S_NOSEC) #define IS_DAX(inode) ((inode)->i_flags & S_DAX) +#define IS_ENCRYPTED(inode) ((inode)->i_flags & S_ENCRYPTED) #define IS_WHITEOUT(inode) (S_ISCHR(inode->i_mode) && \ (inode)->i_rdev == WHITEOUT_DEV) -- cgit v1.2.3 From e0428a266d5a29a3c2eec287fcd49be9e8e2468e Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:15:36 -0700 Subject: fscrypt: switch from ->is_encrypted() to IS_ENCRYPTED() IS_ENCRYPTED() now gives the same information as i_sb->s_cop->is_encrypted() but is more efficient, since IS_ENCRYPTED() is just a simple flag check. Prepare to remove ->is_encrypted() by switching all callers to IS_ENCRYPTED(). Acked-by: Dave Chinner Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_notsupp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 2d0b6960831e..7b390e356f7f 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -100,7 +100,7 @@ static inline int fscrypt_setup_filename(struct inode *dir, const struct qstr *iname, int lookup, struct fscrypt_name *fname) { - if (dir->i_sb->s_cop->is_encrypted(dir)) + if (IS_ENCRYPTED(dir)) return -EOPNOTSUPP; memset(fname, 0, sizeof(struct fscrypt_name)); -- cgit v1.2.3 From f7293e48bb1d0c482cd706deb1256a6be718f4f5 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:15:37 -0700 Subject: fscrypt: remove ->is_encrypted() Now that all callers of fscrypt_operations.is_encrypted() have been switched to IS_ENCRYPTED(), remove ->is_encrypted(). Reviewed-by: Chao Yu Acked-by: Dave Chinner Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index bfc962e62078..d58e6a90cfe4 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -81,7 +81,6 @@ struct fscrypt_operations { int (*get_context)(struct inode *, void *, size_t); int (*set_context)(struct inode *, const void *, size_t, void *); bool (*dummy_context)(struct inode *); - bool (*is_encrypted)(struct inode *); bool (*empty_dir)(struct inode *); unsigned (*max_namelen)(struct inode *); }; -- cgit v1.2.3 From d293c3e4e07334d761c88df9d68b3aa800a83dd9 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:15:39 -0700 Subject: fscrypt: new helper function - fscrypt_require_key() Add a helper function which checks if an inode is encrypted, and if so, tries to set up its encryption key. This is a pattern which is duplicated in multiple places in each of ext4, f2fs, and ubifs --- for example, when a regular file is asked to be opened or truncated. Acked-by: Dave Chinner Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index d58e6a90cfe4..b3e2a5f93415 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -151,5 +151,30 @@ static inline bool fscrypt_has_encryption_key(const struct inode *inode) #include #endif /* __FS_HAS_ENCRYPTION */ +/** + * fscrypt_require_key - require an inode's encryption key + * @inode: the inode we need the key for + * + * If the inode is encrypted, set up its encryption key if not already done. + * Then require that the key be present and return -ENOKEY otherwise. + * + * No locks are needed, and the key will live as long as the struct inode --- so + * it won't go away from under you. + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + * if a problem occurred while setting up the encryption key. + */ +static inline int fscrypt_require_key(struct inode *inode) +{ + if (IS_ENCRYPTED(inode)) { + int err = fscrypt_get_encryption_info(inode); + + if (err) + return err; + if (!fscrypt_has_encryption_key(inode)) + return -ENOKEY; + } + return 0; +} #endif /* _LINUX_FSCRYPT_H */ -- cgit v1.2.3 From efcc7ae2c9172d9a7ae94afdaf066a7abf0b9a90 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:15:40 -0700 Subject: fscrypt: new helper function - fscrypt_file_open() Add a helper function which prepares to open a regular file which may be encrypted. It handles setting up the file's encryption key, then checking that the file's encryption policy matches that of its parent directory (if the parent directory is encrypted). It may be set as the ->open() method or it can be called from another ->open() method. Acked-by: Dave Chinner Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt_notsupp.h | 9 +++++++++ include/linux/fscrypt_supp.h | 3 +++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 7b390e356f7f..162da6517ac4 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -177,4 +177,13 @@ static inline int fscrypt_zeroout_range(const struct inode *inode, pgoff_t lblk, return -EOPNOTSUPP; } +/* hooks.c */ + +static inline int fscrypt_file_open(struct inode *inode, struct file *filp) +{ + if (IS_ENCRYPTED(inode)) + return -EOPNOTSUPP; + return 0; +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 5a90e5ef4687..fd2f6decaee4 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -143,4 +143,7 @@ extern void fscrypt_pullback_bio_page(struct page **, bool); extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, unsigned int); +/* hooks.c */ +extern int fscrypt_file_open(struct inode *inode, struct file *filp); + #endif /* _LINUX_FSCRYPT_SUPP_H */ -- cgit v1.2.3 From 0ea87a9644ebb5c9a3b100585d10533366de3269 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:15:41 -0700 Subject: fscrypt: new helper function - fscrypt_prepare_link() Introduce a helper function which prepares to link an inode into a possibly-encrypted directory. It handles setting up the target directory's encryption key, then verifying that the link won't violate the constraint that all files in an encrypted directory tree use the same encryption policy. Acked-by: Dave Chinner Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 27 +++++++++++++++++++++++++++ include/linux/fscrypt_notsupp.h | 6 ++++++ include/linux/fscrypt_supp.h | 1 + 3 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index b3e2a5f93415..9c9a53f99327 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -177,4 +177,31 @@ static inline int fscrypt_require_key(struct inode *inode) return 0; } +/** + * fscrypt_prepare_link - prepare to link an inode into a possibly-encrypted directory + * @old_dentry: an existing dentry for the inode being linked + * @dir: the target directory + * @dentry: negative dentry for the target filename + * + * A new link can only be added to an encrypted directory if the directory's + * encryption key is available --- since otherwise we'd have no way to encrypt + * the filename. Therefore, we first set up the directory's encryption key (if + * not already done) and return an error if it's unavailable. + * + * We also verify that the link will not violate the constraint that all files + * in an encrypted directory tree use the same encryption policy. + * + * Return: 0 on success, -ENOKEY if the directory's encryption key is missing, + * -EPERM if the link would result in an inconsistent encryption policy, or + * another -errno code. + */ +static inline int fscrypt_prepare_link(struct dentry *old_dentry, + struct inode *dir, + struct dentry *dentry) +{ + if (IS_ENCRYPTED(dir)) + return __fscrypt_prepare_link(d_inode(old_dentry), dir); + return 0; +} + #endif /* _LINUX_FSCRYPT_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 162da6517ac4..d7d1039eb6b5 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -186,4 +186,10 @@ static inline int fscrypt_file_open(struct inode *inode, struct file *filp) return 0; } +static inline int __fscrypt_prepare_link(struct inode *inode, + struct inode *dir) +{ + return -EOPNOTSUPP; +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index fd2f6decaee4..80706283da75 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -145,5 +145,6 @@ extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, /* hooks.c */ extern int fscrypt_file_open(struct inode *inode, struct file *filp); +extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); #endif /* _LINUX_FSCRYPT_SUPP_H */ -- cgit v1.2.3 From 94b26f3672a0e41025104c7e46943917544e1c87 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:15:42 -0700 Subject: fscrypt: new helper function - fscrypt_prepare_rename() Introduce a helper function which prepares to rename a file into a possibly encrypted directory. It handles loading the encryption keys for the source and target directories if needed, and it handles enforcing that if the target directory (and the source directory for a cross-rename) is encrypted, then the file being moved into the directory has the same encryption policy as its containing directory. Acked-by: Dave Chinner Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 33 +++++++++++++++++++++++++++++++++ include/linux/fscrypt_notsupp.h | 9 +++++++++ include/linux/fscrypt_supp.h | 5 +++++ 3 files changed, 47 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 9c9a53f99327..c422367baed9 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -204,4 +204,37 @@ static inline int fscrypt_prepare_link(struct dentry *old_dentry, return 0; } +/** + * fscrypt_prepare_rename - prepare for a rename between possibly-encrypted directories + * @old_dir: source directory + * @old_dentry: dentry for source file + * @new_dir: target directory + * @new_dentry: dentry for target location (may be negative unless exchanging) + * @flags: rename flags (we care at least about %RENAME_EXCHANGE) + * + * Prepare for ->rename() where the source and/or target directories may be + * encrypted. A new link can only be added to an encrypted directory if the + * directory's encryption key is available --- since otherwise we'd have no way + * to encrypt the filename. A rename to an existing name, on the other hand, + * *is* cryptographically possible without the key. However, we take the more + * conservative approach and just forbid all no-key renames. + * + * We also verify that the rename will not violate the constraint that all files + * in an encrypted directory tree use the same encryption policy. + * + * Return: 0 on success, -ENOKEY if an encryption key is missing, -EPERM if the + * rename would cause inconsistent encryption policies, or another -errno code. + */ +static inline int fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags) +{ + if (IS_ENCRYPTED(old_dir) || IS_ENCRYPTED(new_dir)) + return __fscrypt_prepare_rename(old_dir, old_dentry, + new_dir, new_dentry, flags); + return 0; +} + #endif /* _LINUX_FSCRYPT_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index d7d1039eb6b5..6af378d8126e 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -192,4 +192,13 @@ static inline int __fscrypt_prepare_link(struct inode *inode, return -EOPNOTSUPP; } +static inline int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags) +{ + return -EOPNOTSUPP; +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 80706283da75..40f35073145f 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -146,5 +146,10 @@ extern int fscrypt_zeroout_range(const struct inode *, pgoff_t, sector_t, /* hooks.c */ extern int fscrypt_file_open(struct inode *inode, struct file *filp); extern int __fscrypt_prepare_link(struct inode *inode, struct inode *dir); +extern int __fscrypt_prepare_rename(struct inode *old_dir, + struct dentry *old_dentry, + struct inode *new_dir, + struct dentry *new_dentry, + unsigned int flags); #endif /* _LINUX_FSCRYPT_SUPP_H */ -- cgit v1.2.3 From 32c3cf028e747d1e9cf0679b16dcbe3794fe4853 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:15:43 -0700 Subject: fscrypt: new helper function - fscrypt_prepare_lookup() Introduce a helper function which prepares to look up the given dentry in the given directory. If the directory is encrypted, it handles loading the directory's encryption key, setting the dentry's ->d_op to fscrypt_d_ops, and setting DCACHE_ENCRYPTED_WITH_KEY if the directory's encryption key is available. Note: once all filesystems switch over to this, we'll be able to move fscrypt_d_ops and fscrypt_set_encrypted_dentry() to fscrypt_private.h. Acked-by: Dave Chinner Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 28 ++++++++++++++++++++++++++++ include/linux/fscrypt_notsupp.h | 6 ++++++ include/linux/fscrypt_supp.h | 1 + 3 files changed, 35 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index c422367baed9..2327859c8cd2 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -237,4 +237,32 @@ static inline int fscrypt_prepare_rename(struct inode *old_dir, return 0; } +/** + * fscrypt_prepare_lookup - prepare to lookup a name in a possibly-encrypted directory + * @dir: directory being searched + * @dentry: filename being looked up + * @flags: lookup flags + * + * Prepare for ->lookup() in a directory which may be encrypted. Lookups can be + * done with or without the directory's encryption key; without the key, + * filenames are presented in encrypted form. Therefore, we'll try to set up + * the directory's encryption key, but even without it the lookup can continue. + * + * To allow invalidating stale dentries if the directory's encryption key is + * added later, we also install a custom ->d_revalidate() method and use the + * DCACHE_ENCRYPTED_WITH_KEY flag to indicate whether a given dentry is a + * plaintext name (flag set) or a ciphertext name (flag cleared). + * + * Return: 0 on success, -errno if a problem occurred while setting up the + * encryption key + */ +static inline int fscrypt_prepare_lookup(struct inode *dir, + struct dentry *dentry, + unsigned int flags) +{ + if (IS_ENCRYPTED(dir)) + return __fscrypt_prepare_lookup(dir, dentry); + return 0; +} + #endif /* _LINUX_FSCRYPT_H */ diff --git a/include/linux/fscrypt_notsupp.h b/include/linux/fscrypt_notsupp.h index 6af378d8126e..c4c6bf2c390e 100644 --- a/include/linux/fscrypt_notsupp.h +++ b/include/linux/fscrypt_notsupp.h @@ -201,4 +201,10 @@ static inline int __fscrypt_prepare_rename(struct inode *old_dir, return -EOPNOTSUPP; } +static inline int __fscrypt_prepare_lookup(struct inode *dir, + struct dentry *dentry) +{ + return -EOPNOTSUPP; +} + #endif /* _LINUX_FSCRYPT_NOTSUPP_H */ diff --git a/include/linux/fscrypt_supp.h b/include/linux/fscrypt_supp.h index 40f35073145f..2db5e9706f60 100644 --- a/include/linux/fscrypt_supp.h +++ b/include/linux/fscrypt_supp.h @@ -151,5 +151,6 @@ extern int __fscrypt_prepare_rename(struct inode *old_dir, struct inode *new_dir, struct dentry *new_dentry, unsigned int flags); +extern int __fscrypt_prepare_lookup(struct inode *dir, struct dentry *dentry); #endif /* _LINUX_FSCRYPT_SUPP_H */ -- cgit v1.2.3 From 815dac33b27da9efce0f6008272e69f2a1ba1a33 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 9 Oct 2017 12:15:44 -0700 Subject: fscrypt: new helper function - fscrypt_prepare_setattr() Introduce a helper function for filesystems to call when processing ->setattr() on a possibly-encrypted inode. It handles enforcing that an encrypted file can only be truncated if its encryption key is available. Acked-by: Dave Chinner Signed-off-by: Eric Biggers Signed-off-by: Theodore Ts'o --- include/linux/fscrypt.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fscrypt.h b/include/linux/fscrypt.h index 2327859c8cd2..53437bfdfcbc 100644 --- a/include/linux/fscrypt.h +++ b/include/linux/fscrypt.h @@ -265,4 +265,29 @@ static inline int fscrypt_prepare_lookup(struct inode *dir, return 0; } +/** + * fscrypt_prepare_setattr - prepare to change a possibly-encrypted inode's attributes + * @dentry: dentry through which the inode is being changed + * @attr: attributes to change + * + * Prepare for ->setattr() on a possibly-encrypted inode. On an encrypted file, + * most attribute changes are allowed even without the encryption key. However, + * without the encryption key we do have to forbid truncates. This is needed + * because the size being truncated to may not be a multiple of the filesystem + * block size, and in that case we'd have to decrypt the final block, zero the + * portion past i_size, and re-encrypt it. (We *could* allow truncating to a + * filesystem block boundary, but it's simpler to just forbid all truncates --- + * and we already forbid all other contents modifications without the key.) + * + * Return: 0 on success, -ENOKEY if the key is missing, or another -errno code + * if a problem occurred while setting up the encryption key. + */ +static inline int fscrypt_prepare_setattr(struct dentry *dentry, + struct iattr *attr) +{ + if (attr->ia_valid & ATTR_SIZE) + return fscrypt_require_key(d_inode(dentry)); + return 0; +} + #endif /* _LINUX_FSCRYPT_H */ -- cgit v1.2.3 From 5cdda5117e125e0dbb020425cc55a4c143c6febc Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Wed, 18 Oct 2017 17:24:28 +0200 Subject: locking/static_keys: Improve uninitialized key warning Right now it says: static_key_disable_cpuslocked used before call to jump_label_init ------------[ cut here ]------------ WARNING: CPU: 0 PID: 0 at kernel/jump_label.c:161 static_key_disable_cpuslocked+0x68/0x70 Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.14.0-rc5+ #1 Hardware name: SGI.COM C2112-4GP3/X10DRT-P-Series, BIOS 2.0a 05/09/2016 task: ffffffff81c0e480 task.stack: ffffffff81c00000 RIP: 0010:static_key_disable_cpuslocked+0x68/0x70 RSP: 0000:ffffffff81c03ef0 EFLAGS: 00010096 ORIG_RAX: 0000000000000000 RAX: 0000000000000041 RBX: ffffffff81c32680 RCX: ffffffff81c5cbf8 RDX: 0000000000000001 RSI: 0000000000000092 RDI: 0000000000000002 RBP: ffff88807fffd240 R08: 726f666562206465 R09: 0000000000000136 R10: 0000000000000000 R11: 696e695f6c656261 R12: ffffffff82158900 R13: ffffffff8215f760 R14: 0000000000000001 R15: 0000000000000008 FS: 0000000000000000(0000) GS:ffff883f7f400000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88807ffff000 CR3: 0000000001c09000 CR4: 00000000000606b0 Call Trace: static_key_disable+0x16/0x20 start_kernel+0x15a/0x45d ? load_ucode_intel_bsp+0x11/0x2d secondary_startup_64+0xa5/0xb0 Code: 48 c7 c7 a0 15 cf 81 e9 47 53 4b 00 48 89 df e8 5f fc ff ff eb e8 48 c7 c6 \ c0 97 83 81 48 c7 c7 d0 ff a2 81 31 c0 e8 c5 9d f5 ff <0f> ff eb a7 0f ff eb \ b0 e8 eb a2 4b 00 53 48 89 fb e8 42 0e f0 but it doesn't tell me which key it is. So dump the key's name too: static_key_disable_cpuslocked(): static key 'virt_spin_lock_key' used before call to jump_label_init() And that makes pinpointing which key is causing that a lot easier. include/linux/jump_label.h | 14 +++++++------- include/linux/jump_label_ratelimit.h | 6 +++--- kernel/jump_label.c | 14 +++++++------- 3 files changed, 17 insertions(+), 17 deletions(-) Signed-off-by: Borislav Petkov Reviewed-by: Steven Rostedt (VMware) Cc: Boris Ostrovsky Cc: Hannes Frederic Sowa Cc: Jason Baron Cc: Juergen Gross Cc: Linus Torvalds Cc: Paolo Bonzini Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171018152428.ffjgak4o25f7ept6@pd.tnic Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 14 +++++++------- include/linux/jump_label_ratelimit.h | 6 +++--- 2 files changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index cd5861651b17..979a2f2d529b 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -81,9 +81,9 @@ extern bool static_key_initialized; -#define STATIC_KEY_CHECK_USE() WARN(!static_key_initialized, \ - "%s used before call to jump_label_init", \ - __func__) +#define STATIC_KEY_CHECK_USE(key) WARN(!static_key_initialized, \ + "%s(): static key '%pS' used before call to jump_label_init()", \ + __func__, (key)) #ifdef HAVE_JUMP_LABEL @@ -211,13 +211,13 @@ static __always_inline bool static_key_true(struct static_key *key) static inline void static_key_slow_inc(struct static_key *key) { - STATIC_KEY_CHECK_USE(); + STATIC_KEY_CHECK_USE(key); atomic_inc(&key->enabled); } static inline void static_key_slow_dec(struct static_key *key) { - STATIC_KEY_CHECK_USE(); + STATIC_KEY_CHECK_USE(key); atomic_dec(&key->enabled); } @@ -236,7 +236,7 @@ static inline int jump_label_apply_nops(struct module *mod) static inline void static_key_enable(struct static_key *key) { - STATIC_KEY_CHECK_USE(); + STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 0) { WARN_ON_ONCE(atomic_read(&key->enabled) != 1); @@ -247,7 +247,7 @@ static inline void static_key_enable(struct static_key *key) static inline void static_key_disable(struct static_key *key) { - STATIC_KEY_CHECK_USE(); + STATIC_KEY_CHECK_USE(key); if (atomic_read(&key->enabled) != 1) { WARN_ON_ONCE(atomic_read(&key->enabled) != 0); diff --git a/include/linux/jump_label_ratelimit.h b/include/linux/jump_label_ratelimit.h index 23da3af459fe..93086df0a847 100644 --- a/include/linux/jump_label_ratelimit.h +++ b/include/linux/jump_label_ratelimit.h @@ -24,18 +24,18 @@ struct static_key_deferred { }; static inline void static_key_slow_dec_deferred(struct static_key_deferred *key) { - STATIC_KEY_CHECK_USE(); + STATIC_KEY_CHECK_USE(key); static_key_slow_dec(&key->key); } static inline void static_key_deferred_flush(struct static_key_deferred *key) { - STATIC_KEY_CHECK_USE(); + STATIC_KEY_CHECK_USE(key); } static inline void jump_label_rate_limit(struct static_key_deferred *key, unsigned long rl) { - STATIC_KEY_CHECK_USE(); + STATIC_KEY_CHECK_USE(key); } #endif /* HAVE_JUMP_LABEL */ #endif /* _LINUX_JUMP_LABEL_RATELIMIT_H */ -- cgit v1.2.3 From bbb1cc050890dad2ad8747a23b9f22a53e726c9a Mon Sep 17 00:00:00 2001 From: Jonathan Corbet Date: Thu, 24 Aug 2017 16:13:18 -0600 Subject: usb: gadget: Add kerneldoc for some neglected structure fields A couple of structures in have incomplete kerneldoc comments, leading to these warnings in the docs build: ./include/linux/usb/gadget.h:230: warning: No description found for parameter 'claimed' ./include/linux/usb/gadget.h:230: warning: No description found for parameter 'enabled' ./include/linux/usb/gadget.h:412: warning: No description found for parameter 'quirk_altset_not_supp' ./include/linux/usb/gadget.h:412: warning: No description found for parameter 'quirk_stall_not_supp' ./include/linux/usb/gadget.h:412: warning: No description found for parameter 'quirk_zlp_not_supp' Document those fields to make the warnings go away. Signed-off-by: Jonathan Corbet Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 21468a722c4a..523e5a2b5015 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -188,6 +188,8 @@ struct usb_ep_caps { * @ops: Function pointers used to access hardware-specific operations. * @ep_list:the gadget's ep_list holds all of its endpoints * @caps:The structure describing types and directions supported by endoint. + * @enabled: The current endpoint enabled/disabled state. + * @claimed: True if this endpoint is claimed by a function. * @maxpacket:The maximum packet size used on this endpoint. The initial * value can sometimes be reduced (hardware allowing), according to * the endpoint descriptor used to configure the endpoint. @@ -349,6 +351,9 @@ struct usb_gadget_ops { * or B-Peripheral wants to take host role. * @quirk_ep_out_aligned_size: epout requires buffer size to be aligned to * MaxPacketSize. + * @quirk_altset_not_supp: UDC controller doesn't support alt settings. + * @quirk_stall_not_supp: UDC controller doesn't support stalling. + * @quirk_zlp_not_supp: UDC controller doesn't support ZLP. * @quirk_avoids_skb_reserve: udc/platform wants to avoid skb_reserve() in * u_ether.c to improve performance. * @is_selfpowered: if the gadget is self-powered. -- cgit v1.2.3 From 0f38672c629b79fa2b929d2c391bc063a08279eb Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 3 Oct 2017 20:09:14 +0900 Subject: usb: renesas_usbhs: add support for R-Car D3 This patch adds support for R-Car D3. This SoC needs to release the PLL reset by the UGCTRL register. So, since this is not the same as other R-Car Gen3 SoCs, this patch adds a new type as "USBHS_TYPE_RCAR_GEN3_WITH_PLL". Acked-by: Rob Herring Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- include/linux/usb/renesas_usbhs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index 00a47d058d83..d2f41e4dd7a8 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -183,8 +183,9 @@ struct renesas_usbhs_driver_param { #define USBHS_USB_DMAC_XFER_SIZE 32 /* hardcode the xfer size */ }; -#define USBHS_TYPE_RCAR_GEN2 1 -#define USBHS_TYPE_RCAR_GEN3 2 +#define USBHS_TYPE_RCAR_GEN2 1 +#define USBHS_TYPE_RCAR_GEN3 2 +#define USBHS_TYPE_RCAR_GEN3_WITH_PLL 3 /* * option: -- cgit v1.2.3 From 93862e385ded7c60351e09fcd2a541d273650905 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Fri, 13 Oct 2017 15:08:41 -0400 Subject: livepatch: add (un)patch callbacks Provide livepatch modules a klp_object (un)patching notification mechanism. Pre and post-(un)patch callbacks allow livepatch modules to setup or synchronize changes that would be difficult to support in only patched-or-unpatched code contexts. Callbacks can be registered for target module or vmlinux klp_objects, but each implementation is klp_object specific. - Pre-(un)patch callbacks run before any (un)patching transition starts. - Post-(un)patch callbacks run once an object has been (un)patched and the klp_patch fully transitioned to its target state. Example use cases include modification of global data and registration of newly available services/handlers. See Documentation/livepatch/callbacks.txt for details and samples/livepatch/ for examples. Signed-off-by: Joe Lawrence Acked-by: Josh Poimboeuf Acked-by: Miroslav Benes Signed-off-by: Jiri Kosina --- include/linux/livepatch.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/livepatch.h b/include/linux/livepatch.h index d08eddc00497..fc5c1be3f6f4 100644 --- a/include/linux/livepatch.h +++ b/include/linux/livepatch.h @@ -87,10 +87,35 @@ struct klp_func { bool transition; }; +struct klp_object; + +/** + * struct klp_callbacks - pre/post live-(un)patch callback structure + * @pre_patch: executed before code patching + * @post_patch: executed after code patching + * @pre_unpatch: executed before code unpatching + * @post_unpatch: executed after code unpatching + * @post_unpatch_enabled: flag indicating if post-unpatch callback + * should run + * + * All callbacks are optional. Only the pre-patch callback, if provided, + * will be unconditionally executed. If the parent klp_object fails to + * patch for any reason, including a non-zero error status returned from + * the pre-patch callback, no further callbacks will be executed. + */ +struct klp_callbacks { + int (*pre_patch)(struct klp_object *obj); + void (*post_patch)(struct klp_object *obj); + void (*pre_unpatch)(struct klp_object *obj); + void (*post_unpatch)(struct klp_object *obj); + bool post_unpatch_enabled; +}; + /** * struct klp_object - kernel object structure for live patching * @name: module name (or NULL for vmlinux) * @funcs: function entries for functions to be patched in the object + * @callbacks: functions to be executed pre/post (un)patching * @kobj: kobject for sysfs resources * @mod: kernel module associated with the patched object * (NULL for vmlinux) @@ -100,6 +125,7 @@ struct klp_object { /* external */ const char *name; struct klp_func *funcs; + struct klp_callbacks callbacks; /* internal */ struct kobject kobj; -- cgit v1.2.3 From f1d783585486c7c612f277c2a6f0c9bb5a67e463 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Thu, 5 Oct 2017 10:44:54 +0900 Subject: irqdomain: Move revmap_trees_mutex to struct irq_domain The revmap_trees_mutex protects domain->revmap_tree. There is no need to make it global because it is allowed to modify revmap_tree of two different domains concurrently. Having said that, this would not be a actual bottleneck because the interrupt map/unmap does not occur quite often. Rather, the motivation is to tidy up the code from a data structure point of view. Signed-off-by: Masahiro Yamada Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 7d0c6c144708..df162f7a4aad 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -32,6 +32,7 @@ #include #include #include +#include #include struct device_node; @@ -176,6 +177,7 @@ struct irq_domain { unsigned int revmap_direct_max_irq; unsigned int revmap_size; struct radix_tree_root revmap_tree; + struct mutex revmap_tree_mutex; unsigned int linear_revmap[]; }; -- cgit v1.2.3 From eda0d04acc5e317da675ee93a3f09e7c2e2fa592 Mon Sep 17 00:00:00 2001 From: Shanker Donthineni Date: Fri, 6 Oct 2017 10:24:00 -0500 Subject: irqchip/gic-v3: Add support for Range Selector (RS) feature A new feature Range Selector (RS) has been added to GIC specification in order to support more than 16 CPUs at affinity level 0. New fields are introduced in SGI system registers (ICC_SGI0R_EL1, ICC_SGI1R_EL1 and ICC_ASGI1R_EL1) to relax an artificial limit of 16 at level 0. - A new RSS field in ICC_CTLR_EL3, ICC_CTLR_EL1 and ICV_CTLR_EL1: [18] - Range Selector Support (RSS) 0b0 = Targeted SGIs with affinity level 0 values of 0-15 are supported. 0b1 = Targeted SGIs with affinity level 0 values of 0-255 are supported. - A new RS field in ICC_SGI0R_EL1, ICC_SGI1R_EL1 and ICC_ASGI1R_EL1: [47:44] - RangeSelector (RS) which group of 16 TargetList[n] field TargetList[n] represents aff0 value ((RS*16)+n) When ICC_CTLR_EL3.RSS==0 or ICC_CTLR_EL1.RSS==0, RS is RES0. - A new RSS field in GICD_TYPER: [26] - Range Selector Support (RSS) 0b0 = Targeted SGIs with affinity level 0 values of 0-15 are supported. 0b1 = Targeted SGIs with affinity level 0 values of 0-255 are supported. Signed-off-by: Shanker Donthineni Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 1ea576c8126f..b8b59989bd73 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -68,6 +68,7 @@ #define GICD_CTLR_ENABLE_SS_G1 (1U << 1) #define GICD_CTLR_ENABLE_SS_G0 (1U << 0) +#define GICD_TYPER_RSS (1U << 26) #define GICD_TYPER_LPIS (1U << 17) #define GICD_TYPER_MBIS (1U << 16) @@ -459,6 +460,7 @@ #define ICC_CTLR_EL1_SEIS_MASK (0x1 << ICC_CTLR_EL1_SEIS_SHIFT) #define ICC_CTLR_EL1_A3V_SHIFT 15 #define ICC_CTLR_EL1_A3V_MASK (0x1 << ICC_CTLR_EL1_A3V_SHIFT) +#define ICC_CTLR_EL1_RSS (0x1 << 18) #define ICC_PMR_EL1_SHIFT 0 #define ICC_PMR_EL1_MASK (0xff << ICC_PMR_EL1_SHIFT) #define ICC_BPR0_EL1_SHIFT 0 @@ -547,6 +549,8 @@ #define ICC_SGI1R_AFFINITY_2_SHIFT 32 #define ICC_SGI1R_AFFINITY_2_MASK (0xffULL << ICC_SGI1R_AFFINITY_2_SHIFT) #define ICC_SGI1R_IRQ_ROUTING_MODE_BIT 40 +#define ICC_SGI1R_RS_SHIFT 44 +#define ICC_SGI1R_RS_MASK (0xfULL << ICC_SGI1R_RS_SHIFT) #define ICC_SGI1R_AFFINITY_3_SHIFT 48 #define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_3_SHIFT) -- cgit v1.2.3 From ab60491ee5d346557f152c7e8d3e7238c9b96c5c Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 8 Oct 2017 18:48:06 +0100 Subject: irqchip/gic-v3-its: Make GICv4_ITS_LIST_MAX globally available As we're about to make use of the maximum number of ITSs in a GICv4 system, let's make this value global (and rename it to GICv4_ITS_LIST_MAX). Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 58a4d89aa82c..e26a668826e6 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -20,6 +20,12 @@ struct its_vpe; +/* + * Maximum number of ITTs when GITS_TYPER.VMOVP == 0, using the + * ITSList mechanism to perform inter-ITS synchronization. + */ +#define GICv4_ITS_LIST_MAX 16 + /* Embedded in kvm.arch */ struct its_vm { struct fwnode_handle *fwnode; -- cgit v1.2.3 From 2247e1bf70639642b1c1375aa9176ccd95736400 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Sun, 8 Oct 2017 18:50:36 +0100 Subject: irqchip/gic-v3-its: Limit scope of VPE mapping to be per ITS So far, we map all VPEs on all ITSs. While this is not wrong, this is quite a big hammer, as moving a VPE around requires all ITSs to be synchronized. Needles to say, this is an expensive proposition. Instead, let's switch to a mode where we issue VMAPP commands only on ITSs that are actually involved in reporting interrupts to the given VM. For that purpose, we refcount the number of interrupts are are mapped for this VM on each ITS, performing the map/unmap operations as required. It then allows us to use this refcount to only issue VMOVP to the ITSs that need to know about this VM. Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index e26a668826e6..43cde15f221b 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -36,6 +36,7 @@ struct its_vm { irq_hw_number_t db_lpi_base; unsigned long *db_bitmap; int nr_db_lpis; + u32 vlpi_count[GICv4_ITS_LIST_MAX]; }; /* Embedded in kvm_vcpu.arch */ -- cgit v1.2.3 From 7a0947e755084b918e33242fd558e55cb443408e Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Tue, 17 Oct 2017 17:16:52 -0700 Subject: dql: make dql_init return void dql_init always returned 0, and the only place that uses it in network core code didn't care about the return value anyway. Signed-off-by: Stephen Hemminger Acked-by: Hiroaki SHIMODA Signed-off-by: David S. Miller --- include/linux/dynamic_queue_limits.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index a4be70398ce1..f69f98541953 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -98,7 +98,7 @@ void dql_completed(struct dql *dql, unsigned int count); void dql_reset(struct dql *dql); /* Initialize dql state */ -int dql_init(struct dql *dql, unsigned hold_time); +void dql_init(struct dql *dql, unsigned int hold_time); #endif /* _KERNEL_ */ -- cgit v1.2.3 From 93b138dd9d6e01cd360f347512b6c5bf6f0c4ce2 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Mon, 18 Sep 2017 00:01:44 +0900 Subject: printk: simplify no_printk() Commit 069f0cd00df0 ("printk: Make the printk*once() variants return a value") surrounded the macro implementation with ({ ... }). Now, the inner do { ... } while (0); is redundant. Link: http://lkml.kernel.org/r/1505660504-11059-1-git-send-email-yamada.masahiro@socionext.com Cc: Andrew Morton Cc: Ingo Molnar Cc: Masahiro Yamada Cc: Steven Rostedt Cc: linux-kernel@vger.kernel.org Signed-off-by: Masahiro Yamada Reviewed-by: Sergey Senozhatsky Signed-off-by: Petr Mladek --- include/linux/printk.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index e10f27468322..7911f7364346 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -131,10 +131,8 @@ struct va_format { */ #define no_printk(fmt, ...) \ ({ \ - do { \ - if (0) \ - printk(fmt, ##__VA_ARGS__); \ - } while (0); \ + if (0) \ + printk(fmt, ##__VA_ARGS__); \ 0; \ }) -- cgit v1.2.3 From aa293583f0fe8b1634aeadbea06b4d0d04c30a95 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Mon, 16 Oct 2017 17:18:40 +0200 Subject: configfs: make ci_type field, some pointers and function arguments const The ci_type field of the config_item structure do not modify the fields of the config_item_type structure it points to. And the other pointers initialized with ci_type do not modify the fields as well. So, make the ci_type field and the pointers initialized with ci_type as const. Make the struct config_item_type *type function argument of functions config_{item/group}_init_type_name const as the argument in both the functions is only stored in the ci_type field of a config_item structure which is now made const. Make the argument of configfs_register_default_group const as it is only passed to the argument of the function config_group_init_type_name which is now const. Signed-off-by: Bhumika Goyal Acked-by: Greg Kroah-Hartman Signed-off-by: Christoph Hellwig --- include/linux/configfs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index c96709049683..90b90f8baf99 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -58,7 +58,7 @@ struct config_item { struct list_head ci_entry; struct config_item *ci_parent; struct config_group *ci_group; - struct config_item_type *ci_type; + const struct config_item_type *ci_type; struct dentry *ci_dentry; }; @@ -72,7 +72,7 @@ static inline char *config_item_name(struct config_item * item) extern void config_item_init_type_name(struct config_item *item, const char *name, - struct config_item_type *type); + const struct config_item_type *type); extern struct config_item *config_item_get(struct config_item *); extern struct config_item *config_item_get_unless_zero(struct config_item *); @@ -101,7 +101,7 @@ struct config_group { extern void config_group_init(struct config_group *group); extern void config_group_init_type_name(struct config_group *group, const char *name, - struct config_item_type *type); + const struct config_item_type *type); static inline struct config_group *to_config_group(struct config_item *item) { @@ -261,7 +261,7 @@ void configfs_remove_default_groups(struct config_group *group); struct config_group * configfs_register_default_group(struct config_group *parent_group, const char *name, - struct config_item_type *item_type); + const struct config_item_type *item_type); void configfs_unregister_default_group(struct config_group *group); /* These functions can sleep and can alloc with GFP_KERNEL */ -- cgit v1.2.3 From 612a462acbb9f3bfc7c8433b44118b0a156560da Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Mon, 16 Oct 2017 17:18:43 +0200 Subject: iio: make function argument and some structures const Make the argument of the functions iio_sw{d/t}_group_init_type_name const as they are only passed to the function config_group_init_type_name having the argument as const. Make the config_item_type structures const as they are either passed to the functions having the argument as const or they are stored in the const "ci_type" field of a config_item structure. Signed-off-by: Bhumika Goyal Acked-by: Jonathan Cameron Signed-off-by: Christoph Hellwig --- include/linux/iio/sw_device.h | 2 +- include/linux/iio/sw_trigger.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iio/sw_device.h b/include/linux/iio/sw_device.h index fa7931933067..8642b91a7577 100644 --- a/include/linux/iio/sw_device.h +++ b/include/linux/iio/sw_device.h @@ -60,7 +60,7 @@ void iio_sw_device_type_configfs_unregister(struct iio_sw_device_type *dt); static inline void iio_swd_group_init_type_name(struct iio_sw_device *d, const char *name, - struct config_item_type *type) + const struct config_item_type *type) { #if IS_ENABLED(CONFIG_CONFIGFS_FS) config_group_init_type_name(&d->group, name, type); diff --git a/include/linux/iio/sw_trigger.h b/include/linux/iio/sw_trigger.h index c97eab67558f..0c43738a9e24 100644 --- a/include/linux/iio/sw_trigger.h +++ b/include/linux/iio/sw_trigger.h @@ -60,7 +60,7 @@ void iio_sw_trigger_type_configfs_unregister(struct iio_sw_trigger_type *tt); static inline void iio_swt_group_init_type_name(struct iio_sw_trigger *t, const char *name, - struct config_item_type *type) + const struct config_item_type *type) { #if IS_ENABLED(CONFIG_CONFIGFS_FS) config_group_init_type_name(&t->group, name, type); -- cgit v1.2.3 From d89e2378a97fafdc74cbf997e7c88af75b81610a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 12 Oct 2017 16:56:14 +0100 Subject: drivers: flag buses which demand DMA configuration We do not want the common dma_configure() pathway to apply indiscriminately to all devices, since there are plenty of buses which do not have DMA capability, and if their child devices were used for DMA API calls it would only be indicative of a driver bug. However, there are a number of buses for which DMA is implicitly expected even when not described by firmware - those we whitelist with an automatic opt-in to dma_configure(), assuming that the DMA address space and the physical address space are equivalent if not otherwise specified. Commit 723288836628 ("of: restrict DMA configuration") introduced a short-term fix by comparing explicit bus types, but this approach is far from pretty, doesn't scale well, and fails to cope at all with bus drivers which may be built as modules, like host1x. Let's refine things by making that opt-in a property of the bus type, which neatly addresses those problems and lets the decision of whether firmware description of DMA capability should be optional or mandatory stay internal to the bus drivers themselves. Signed-off-by: Robin Murphy Acked-by: Rob Herring Acked-by: Greg Kroah-Hartman Acked-by: Thierry Reding Signed-off-by: Christoph Hellwig --- include/linux/device.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 66fe271c2544..d60f7701e245 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -97,6 +97,8 @@ extern void bus_remove_file(struct bus_type *, struct bus_attribute *); * @p: The private data of the driver core, only the driver core can * touch this. * @lock_key: Lock class key for use by the lock validator + * @force_dma: Assume devices on this bus should be set up by dma_configure() + * even if DMA capability is not explicitly described by firmware. * * A bus is a channel between the processor and one or more devices. For the * purposes of the device model, all devices are connected via a bus, even if @@ -135,6 +137,8 @@ struct bus_type { struct subsys_private *p; struct lock_class_key lock_key; + + bool force_dma; }; extern int __must_check bus_register(struct bus_type *bus); -- cgit v1.2.3 From c9eb6172c328dde7e14812f94f8da87b691e41b5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 27 Aug 2017 10:37:15 +0200 Subject: dma-mapping: turn dma_cache_sync into a dma_map_ops method After we removed all the dead wood it turns out only two architectures actually implement dma_cache_sync as a real op: mips and parisc. Add a cache_sync method to struct dma_map_ops and implement it for the mips defualt DMA ops, and the parisc pa11 ops. Note that arm, arc and openrisc support DMA_ATTR_NON_CONSISTENT, but never provided a functional dma_cache_sync implementations, which seems somewhat odd. Signed-off-by: Christoph Hellwig Reviewed-by: Robin Murphy --- include/linux/dma-mapping.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index 29ce9815da87..028a375d240d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -126,6 +126,8 @@ struct dma_map_ops { void (*sync_sg_for_device)(struct device *dev, struct scatterlist *sg, int nents, enum dma_data_direction dir); + void (*cache_sync)(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction direction); int (*mapping_error)(struct device *dev, dma_addr_t dma_addr); int (*dma_supported)(struct device *dev, u64 mask); #ifdef ARCH_HAS_DMA_GET_REQUIRED_MASK @@ -436,6 +438,17 @@ dma_sync_sg_for_device(struct device *dev, struct scatterlist *sg, #define dma_map_page(d, p, o, s, r) dma_map_page_attrs(d, p, o, s, r, 0) #define dma_unmap_page(d, a, s, r) dma_unmap_page_attrs(d, a, s, r, 0) +static inline void +dma_cache_sync(struct device *dev, void *vaddr, size_t size, + enum dma_data_direction dir) +{ + const struct dma_map_ops *ops = get_dma_ops(dev); + + BUG_ON(!valid_dma_direction(dir)); + if (ops->cache_sync) + ops->cache_sync(dev, vaddr, size, dir); +} + extern int dma_common_mmap(struct device *dev, struct vm_area_struct *vma, void *cpu_addr, dma_addr_t dma_addr, size_t size); -- cgit v1.2.3 From 2940bc4333707a05e69b3ffd737bda0dc0c3004f Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 19 Oct 2017 19:05:18 +0800 Subject: perf: hisi: Add support for HiSilicon SoC L3C PMU driver This patch adds support for L3C PMU driver in HiSilicon SoC chip, Each L3C has own control, counter and interrupt registers and is an separate PMU. For each L3C PMU, it has 8-programable counters and each counter is free-running. Interrupt is supported to handle counter (48-bits) overflow. Acked-by: Mark Rutland Reviewed-by: Jonathan Cameron Signed-off-by: Shaokun Zhang Signed-off-by: Anurup M Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6d508767e144..9621efdbb3bd 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -153,6 +153,7 @@ enum cpuhp_state { CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, + CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L3_ONLINE, -- cgit v1.2.3 From 2bab3cf9104c5ab80a1b9c706d81d997548401e4 Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 19 Oct 2017 19:05:19 +0800 Subject: perf: hisi: Add support for HiSilicon SoC HHA PMU driver L3 cache coherence is maintained by Hydra Home Agent (HHA) in HiSilicon SoC. This patch adds support for HHA PMU driver, Each HHA has own control, counter and interrupt registers and is an separate PMU. For each HHA PMU, it has 16-programable counters and each counter is free-running. Interrupt is supported to handle counter (48-bits) overflow. Acked-by: Mark Rutland Reviewed-by: Jonathan Cameron Signed-off-by: Shaokun Zhang Signed-off-by: Anurup M Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 9621efdbb3bd..cd63c52e7d93 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -153,6 +153,7 @@ enum cpuhp_state { CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, + CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, CPUHP_AP_PERF_ARM_QCOM_L2_ONLINE, -- cgit v1.2.3 From 904dcf03f086a2e3b9d1e02cb57c43ea2e588c8c Mon Sep 17 00:00:00 2001 From: Shaokun Zhang Date: Thu, 19 Oct 2017 19:05:20 +0800 Subject: perf: hisi: Add support for HiSilicon SoC DDRC PMU driver This patch adds support for DDRC PMU driver in HiSilicon SoC chip, Each DDRC has own control, counter and interrupt registers and is an separate PMU. For each DDRC PMU, it has 8-fixed-purpose counters which have been mapped to 8-events by hardware, it assumes that counter index is equal to event code (0 - 7) in DDRC PMU driver. Interrupt is supported to handle counter (32-bits) overflow. Acked-by: Mark Rutland Reviewed-by: Jonathan Cameron Signed-off-by: Shaokun Zhang Signed-off-by: Anurup M Signed-off-by: Will Deacon --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index cd63c52e7d93..587006de6f82 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -153,6 +153,7 @@ enum cpuhp_state { CPUHP_AP_PERF_S390_SF_ONLINE, CPUHP_AP_PERF_ARM_CCI_ONLINE, CPUHP_AP_PERF_ARM_CCN_ONLINE, + CPUHP_AP_PERF_ARM_HISI_DDRC_ONLINE, CPUHP_AP_PERF_ARM_HISI_HHA_ONLINE, CPUHP_AP_PERF_ARM_HISI_L3_ONLINE, CPUHP_AP_PERF_ARM_L2X0_ONLINE, -- cgit v1.2.3 From 7d7363e403ce959941f80684cc5f33e747afff17 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Mon, 16 Oct 2017 16:32:51 -0700 Subject: documentation: kernel-api: add more info on bitmap functions There are some good comments about bitmap operations in lib/bitmap.c and include/linux/bitmap.h, so format them for document generation and pull them into core-api/kernel-api.rst. I converted the "tables" of functions from using tabs to using spaces so that they are more readable in the source file and in the generated output. Signed-off-by: Randy Dunlap Signed-off-by: Jonathan Corbet --- include/linux/bitmap.h | 105 +++++++++++++++++++++++++++---------------------- 1 file changed, 57 insertions(+), 48 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitmap.h b/include/linux/bitmap.h index 5c4178016b1e..d9974c7a0a61 100644 --- a/include/linux/bitmap.h +++ b/include/linux/bitmap.h @@ -21,65 +21,74 @@ * See lib/bitmap.c for more details. */ -/* +/** + * DOC: bitmap overview + * * The available bitmap operations and their rough meaning in the * case that the bitmap is a single unsigned long are thus: * * Note that nbits should be always a compile time evaluable constant. * Otherwise many inlines will generate horrible code. * - * bitmap_zero(dst, nbits) *dst = 0UL - * bitmap_fill(dst, nbits) *dst = ~0UL - * bitmap_copy(dst, src, nbits) *dst = *src - * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 - * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 - * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 - * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) - * bitmap_complement(dst, src, nbits) *dst = ~(*src) - * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal? - * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap? - * bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2? - * bitmap_empty(src, nbits) Are all bits zero in *src? - * bitmap_full(src, nbits) Are all bits set in *src? - * bitmap_weight(src, nbits) Hamming Weight: number set bits - * bitmap_set(dst, pos, nbits) Set specified bit area - * bitmap_clear(dst, pos, nbits) Clear specified bit area - * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area - * bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above - * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n - * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n - * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) - * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) - * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap - * bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz - * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf - * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf - * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf - * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf - * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region - * bitmap_release_region(bitmap, pos, order) Free specified bit region - * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region - * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words) - * bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words) + * :: + * + * bitmap_zero(dst, nbits) *dst = 0UL + * bitmap_fill(dst, nbits) *dst = ~0UL + * bitmap_copy(dst, src, nbits) *dst = *src + * bitmap_and(dst, src1, src2, nbits) *dst = *src1 & *src2 + * bitmap_or(dst, src1, src2, nbits) *dst = *src1 | *src2 + * bitmap_xor(dst, src1, src2, nbits) *dst = *src1 ^ *src2 + * bitmap_andnot(dst, src1, src2, nbits) *dst = *src1 & ~(*src2) + * bitmap_complement(dst, src, nbits) *dst = ~(*src) + * bitmap_equal(src1, src2, nbits) Are *src1 and *src2 equal? + * bitmap_intersects(src1, src2, nbits) Do *src1 and *src2 overlap? + * bitmap_subset(src1, src2, nbits) Is *src1 a subset of *src2? + * bitmap_empty(src, nbits) Are all bits zero in *src? + * bitmap_full(src, nbits) Are all bits set in *src? + * bitmap_weight(src, nbits) Hamming Weight: number set bits + * bitmap_set(dst, pos, nbits) Set specified bit area + * bitmap_clear(dst, pos, nbits) Clear specified bit area + * bitmap_find_next_zero_area(buf, len, pos, n, mask) Find bit free area + * bitmap_find_next_zero_area_off(buf, len, pos, n, mask) as above + * bitmap_shift_right(dst, src, n, nbits) *dst = *src >> n + * bitmap_shift_left(dst, src, n, nbits) *dst = *src << n + * bitmap_remap(dst, src, old, new, nbits) *dst = map(old, new)(src) + * bitmap_bitremap(oldbit, old, new, nbits) newbit = map(old, new)(oldbit) + * bitmap_onto(dst, orig, relmap, nbits) *dst = orig relative to relmap + * bitmap_fold(dst, orig, sz, nbits) dst bits = orig bits mod sz + * bitmap_parse(buf, buflen, dst, nbits) Parse bitmap dst from kernel buf + * bitmap_parse_user(ubuf, ulen, dst, nbits) Parse bitmap dst from user buf + * bitmap_parselist(buf, dst, nbits) Parse bitmap dst from kernel buf + * bitmap_parselist_user(buf, dst, nbits) Parse bitmap dst from user buf + * bitmap_find_free_region(bitmap, bits, order) Find and allocate bit region + * bitmap_release_region(bitmap, pos, order) Free specified bit region + * bitmap_allocate_region(bitmap, pos, order) Allocate specified bit region + * bitmap_from_u32array(dst, nbits, buf, nwords) *dst = *buf (nwords 32b words) + * bitmap_to_u32array(buf, nwords, src, nbits) *buf = *dst (nwords 32b words) + * */ -/* - * Also the following operations in asm/bitops.h apply to bitmaps. +/** + * DOC: bitmap bitops + * + * Also the following operations in asm/bitops.h apply to bitmaps.:: + * + * set_bit(bit, addr) *addr |= bit + * clear_bit(bit, addr) *addr &= ~bit + * change_bit(bit, addr) *addr ^= bit + * test_bit(bit, addr) Is bit set in *addr? + * test_and_set_bit(bit, addr) Set bit and return old value + * test_and_clear_bit(bit, addr) Clear bit and return old value + * test_and_change_bit(bit, addr) Change bit and return old value + * find_first_zero_bit(addr, nbits) Position first zero bit in *addr + * find_first_bit(addr, nbits) Position first set bit in *addr + * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit + * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit * - * set_bit(bit, addr) *addr |= bit - * clear_bit(bit, addr) *addr &= ~bit - * change_bit(bit, addr) *addr ^= bit - * test_bit(bit, addr) Is bit set in *addr? - * test_and_set_bit(bit, addr) Set bit and return old value - * test_and_clear_bit(bit, addr) Clear bit and return old value - * test_and_change_bit(bit, addr) Change bit and return old value - * find_first_zero_bit(addr, nbits) Position first zero bit in *addr - * find_first_bit(addr, nbits) Position first set bit in *addr - * find_next_zero_bit(addr, nbits, bit) Position next zero bit in *addr >= bit - * find_next_bit(addr, nbits, bit) Position next set bit in *addr >= bit */ -/* +/** + * DOC: declare bitmap * The DECLARE_BITMAP(name,bits) macro, in linux/types.h, can be used * to declare an array named 'name' of just enough unsigned longs to * contain all bit positions from 0 to 'bits' - 1. -- cgit v1.2.3 From f628ba9e22e0fa32ac6270a0d696fe1509889540 Mon Sep 17 00:00:00 2001 From: Grygorii Strashko Date: Mon, 9 Oct 2017 17:16:05 -0500 Subject: gpiolib: drop irq_base field from gpio_chip struct Hence, the last user of irq_base field was removed by commit b4c495f03ae3 ("gpio: mockup: use irq_sim") it can be removed safely. Signed-off-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c97f8325e8bf..6bbda879fb8b 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -83,7 +83,6 @@ struct module; * @irqchip: GPIO IRQ chip impl, provided by GPIO driver * @irqdomain: Interrupt translation domain; responsible for mapping * between GPIO hwirq number and linux irq number - * @irq_base: first linux IRQ number assigned to GPIO IRQ chip (deprecated) * @irq_handler: the irq handler to use (often a predefined irq core function) * for GPIO IRQs, provided by GPIO driver * @irq_default_type: default IRQ triggering type applied during GPIO driver @@ -165,7 +164,6 @@ struct gpio_chip { */ struct irq_chip *irqchip; struct irq_domain *irqdomain; - unsigned int irq_base; irq_flow_handler_t irq_handler; unsigned int irq_default_type; unsigned int irq_chained_parent; -- cgit v1.2.3 From 5307e2ad69ab3b0e0622fdf8b254c1d4565eb924 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 12 Oct 2017 12:40:10 +0200 Subject: bitops: Introduce assign_bit() A common idiom is to assign a value to a bit with: if (value) set_bit(nr, addr); else clear_bit(nr, addr); Likewise common is the one-line expression variant: value ? set_bit(nr, addr) : clear_bit(nr, addr); Commit 9a8ac3ae682e ("dm mpath: cleanup QUEUE_IF_NO_PATH bit manipulation by introducing assign_bit()") introduced assign_bit() to the md subsystem for brevity. Make it available to others, specifically gpiolib and the upcoming driver for Maxim MAX3191x industrial serializer chips. As requested by Peter Zijlstra, change the argument order to reflect traditional "dst = src" in C, hence "assign_bit(nr, addr, value)". Cc: Bart Van Assche Cc: Alasdair Kergon Cc: Mike Snitzer Cc: Linus Walleij Cc: Neil Brown Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Theodore Ts'o Cc: Borislav Petkov Cc: "H. Peter Anvin" Cc: Denys Vlasenko Acked-by: Andrew Morton Signed-off-by: Lukas Wunner Signed-off-by: Linus Walleij --- include/linux/bitops.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 8fbe259b197c..9a874deee6e2 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -227,6 +227,30 @@ static inline unsigned long __ffs64(u64 word) return __ffs((unsigned long)word); } +/** + * assign_bit - Assign value to a bit in memory + * @nr: the bit to set + * @addr: the address to start counting from + * @value: the value to assign + */ +static __always_inline void assign_bit(long nr, volatile unsigned long *addr, + bool value) +{ + if (value) + set_bit(nr, addr); + else + clear_bit(nr, addr); +} + +static __always_inline void __assign_bit(long nr, volatile unsigned long *addr, + bool value) +{ + if (value) + __set_bit(nr, addr); + else + __clear_bit(nr, addr); +} + #ifdef __KERNEL__ #ifndef set_mask_bits -- cgit v1.2.3 From eec1d566cdf94b57e8f5ba9fe60eea214929bcfc Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Thu, 12 Oct 2017 12:40:10 +0200 Subject: gpio: Introduce ->get_multiple callback SPI-attached GPIO controllers typically read out all inputs in one go. If callers desire the values of multipe inputs, ideally a single readout should take place to return the desired values. However the current driver API only offers a ->get callback but no ->get_multiple (unlike ->set_multiple, which is present). Thus, to read multiple inputs, a full readout needs to be performed for every single value (barring driver-internal caching), which is inefficient. In fact, the lack of a ->get_multiple callback has been bemoaned repeatedly by the gpio subsystem maintainer: http://www.spinics.net/lists/linux-gpio/msg10571.html http://www.spinics.net/lists/devicetree/msg121734.html Introduce the missing callback. Add corresponding consumer functions such as gpiod_get_array_value(). Amend linehandle_ioctl() to take advantage of the newly added infrastructure. Update the documentation. Cc: Rojhalat Ibrahim Signed-off-by: Lukas Wunner Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 43 +++++++++++++++++++++++++++++++++++++++++++ include/linux/gpio/driver.h | 5 +++++ 2 files changed, 48 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 8f702fcbe485..d4920ec1f1da 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -99,10 +99,15 @@ int gpiod_direction_output_raw(struct gpio_desc *desc, int value); /* Value get/set from non-sleeping context */ int gpiod_get_value(const struct gpio_desc *desc); +int gpiod_get_array_value(unsigned int array_size, + struct gpio_desc **desc_array, int *value_array); void gpiod_set_value(struct gpio_desc *desc, int value); void gpiod_set_array_value(unsigned int array_size, struct gpio_desc **desc_array, int *value_array); int gpiod_get_raw_value(const struct gpio_desc *desc); +int gpiod_get_raw_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array); void gpiod_set_raw_value(struct gpio_desc *desc, int value); void gpiod_set_raw_array_value(unsigned int array_size, struct gpio_desc **desc_array, @@ -110,11 +115,17 @@ void gpiod_set_raw_array_value(unsigned int array_size, /* Value get/set from sleeping context */ int gpiod_get_value_cansleep(const struct gpio_desc *desc); +int gpiod_get_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array); void gpiod_set_value_cansleep(struct gpio_desc *desc, int value); void gpiod_set_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, int *value_array); int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc); +int gpiod_get_raw_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array); void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value); void gpiod_set_raw_array_value_cansleep(unsigned int array_size, struct gpio_desc **desc_array, @@ -305,6 +316,14 @@ static inline int gpiod_get_value(const struct gpio_desc *desc) WARN_ON(1); return 0; } +static inline int gpiod_get_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array) +{ + /* GPIO can never have been requested */ + WARN_ON(1); + return 0; +} static inline void gpiod_set_value(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ @@ -323,6 +342,14 @@ static inline int gpiod_get_raw_value(const struct gpio_desc *desc) WARN_ON(1); return 0; } +static inline int gpiod_get_raw_array_value(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array) +{ + /* GPIO can never have been requested */ + WARN_ON(1); + return 0; +} static inline void gpiod_set_raw_value(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ @@ -342,6 +369,14 @@ static inline int gpiod_get_value_cansleep(const struct gpio_desc *desc) WARN_ON(1); return 0; } +static inline int gpiod_get_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array) +{ + /* GPIO can never have been requested */ + WARN_ON(1); + return 0; +} static inline void gpiod_set_value_cansleep(struct gpio_desc *desc, int value) { /* GPIO can never have been requested */ @@ -360,6 +395,14 @@ static inline int gpiod_get_raw_value_cansleep(const struct gpio_desc *desc) WARN_ON(1); return 0; } +static inline int gpiod_get_raw_array_value_cansleep(unsigned int array_size, + struct gpio_desc **desc_array, + int *value_array) +{ + /* GPIO can never have been requested */ + WARN_ON(1); + return 0; +} static inline void gpiod_set_raw_value_cansleep(struct gpio_desc *desc, int value) { diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 6bbda879fb8b..bda95a9b7b8c 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -35,6 +35,8 @@ struct module; * @direction_input: configures signal "offset" as input, or returns error * @direction_output: configures signal "offset" as output, or returns error * @get: returns value for signal "offset", 0=low, 1=high, or negative error + * @get_multiple: reads values for multiple signals defined by "mask" and + * stores them in "bits", returns 0 on success or negative error * @set: assigns output value for signal "offset" * @set_multiple: assigns output values for multiple signals defined by "mask" * @set_config: optional hook for all kinds of settings. Uses the same @@ -125,6 +127,9 @@ struct gpio_chip { unsigned offset, int value); int (*get)(struct gpio_chip *chip, unsigned offset); + int (*get_multiple)(struct gpio_chip *chip, + unsigned long *mask, + unsigned long *bits); void (*set)(struct gpio_chip *chip, unsigned offset, int value); void (*set_multiple)(struct gpio_chip *chip, -- cgit v1.2.3 From c5053e695d621f8a8a3992c5e93dd3be088fa267 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Thu, 19 Oct 2017 16:19:03 -0700 Subject: Input: gpio_mouse - kill off platform data This is not used much: git grep gpio_mouse_platform_data shows that absolutely nothing in the kernel defines this platform data. It could be argued that the driver should be deleted. But that is a bit harsh I think since it seems generally useful. So this patch starts a series which repurposes it to be used with hardware nodes from device tree or ACPI. This first patch simply localize the platform data header and allocates a dummy platform data. Yes: this patch leaves the driver in a pretty useless state, but since nothing is instantiating this driver, it doesn't make it more useless than it already is. Later patches makes use of the driver. Acked-by: Hans-Christian Noren Egtvedt Signed-off-by: Linus Walleij Signed-off-by: Dmitry Torokhov --- include/linux/gpio_mouse.h | 61 ---------------------------------------------- 1 file changed, 61 deletions(-) delete mode 100644 include/linux/gpio_mouse.h (limited to 'include/linux') diff --git a/include/linux/gpio_mouse.h b/include/linux/gpio_mouse.h deleted file mode 100644 index 44ed7aa14d85..000000000000 --- a/include/linux/gpio_mouse.h +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Driver for simulating a mouse on GPIO lines. - * - * Copyright (C) 2007 Atmel Corporation - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef _GPIO_MOUSE_H -#define _GPIO_MOUSE_H - -#define GPIO_MOUSE_POLARITY_ACT_HIGH 0x00 -#define GPIO_MOUSE_POLARITY_ACT_LOW 0x01 - -#define GPIO_MOUSE_PIN_UP 0 -#define GPIO_MOUSE_PIN_DOWN 1 -#define GPIO_MOUSE_PIN_LEFT 2 -#define GPIO_MOUSE_PIN_RIGHT 3 -#define GPIO_MOUSE_PIN_BLEFT 4 -#define GPIO_MOUSE_PIN_BMIDDLE 5 -#define GPIO_MOUSE_PIN_BRIGHT 6 -#define GPIO_MOUSE_PIN_MAX 7 - -/** - * struct gpio_mouse_platform_data - * @scan_ms: integer in ms specifying the scan periode. - * @polarity: Pin polarity, active high or low. - * @up: GPIO line for up value. - * @down: GPIO line for down value. - * @left: GPIO line for left value. - * @right: GPIO line for right value. - * @bleft: GPIO line for left button. - * @bmiddle: GPIO line for middle button. - * @bright: GPIO line for right button. - * - * This struct must be added to the platform_device in the board code. - * It is used by the gpio_mouse driver to setup GPIO lines and to - * calculate mouse movement. - */ -struct gpio_mouse_platform_data { - int scan_ms; - int polarity; - - union { - struct { - int up; - int down; - int left; - int right; - - int bleft; - int bmiddle; - int bright; - }; - int pins[GPIO_MOUSE_PIN_MAX]; - }; -}; - -#endif /* _GPIO_MOUSE_H */ -- cgit v1.2.3 From 2cbfca66ba5e00606bb0f24aba1e9cd8efe58849 Mon Sep 17 00:00:00 2001 From: Andrew Jeffery Date: Fri, 20 Oct 2017 13:27:58 +1030 Subject: gpio: Fix loose spelling Literally. I expect "lose" was meant here, rather than "loose", though you could feasibly use a somewhat uncommon definition of "loose" to mean what would be meant by "lose": "Loose the hounds" for instance, as in "Release the hounds". Substituting in "value" for "hounds" gives "release the value", and makes some sense, but futher substituting back to loose gives "loose the value" which overall just seems a bit anachronistic. Instead, use modern, pragmatic English and save a character. Cc: Russell Currey Signed-off-by: Andrew Jeffery Signed-off-by: Linus Walleij --- include/linux/gpio/machine.h | 2 +- include/linux/of_gpio.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/machine.h b/include/linux/gpio/machine.h index ba4ccfd900f9..5e9f294c29eb 100644 --- a/include/linux/gpio/machine.h +++ b/include/linux/gpio/machine.h @@ -10,7 +10,7 @@ enum gpio_lookup_flags { GPIO_OPEN_DRAIN = (1 << 1), GPIO_OPEN_SOURCE = (1 << 2), GPIO_SLEEP_MAINTAIN_VALUE = (0 << 3), - GPIO_SLEEP_MAY_LOOSE_VALUE = (1 << 3), + GPIO_SLEEP_MAY_LOSE_VALUE = (1 << 3), }; /** diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index ca10f43564de..1fe205582111 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -31,7 +31,7 @@ enum of_gpio_flags { OF_GPIO_ACTIVE_LOW = 0x1, OF_GPIO_SINGLE_ENDED = 0x2, OF_GPIO_OPEN_DRAIN = 0x4, - OF_GPIO_SLEEP_MAY_LOOSE_VALUE = 0x8, + OF_GPIO_SLEEP_MAY_LOSE_VALUE = 0x8, }; #ifdef CONFIG_OF_GPIO -- cgit v1.2.3 From 590c845930457d25d27dc1fdd964a1ce18ef2d7d Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 6 Oct 2017 08:14:37 +0900 Subject: kprobes: Disable the jprobes APIs Disable the jprobes APIs and comment out the jprobes API function code. This is in preparation of removing all jprobes related code (including kprobe's break_handler). Nowadays ftrace and other tracing features are mature enough to replace jprobes use-cases. Users can safely use ftrace and perf probe etc. for their use cases. Signed-off-by: Masami Hiramatsu Cc: Alexei Starovoitov Cc: Ananth N Mavinakayanahalli Cc: Anil S Keshavamurthy Cc: David S . Miller Cc: Ian McDonald Cc: Kees Cook Cc: Linus Torvalds Cc: Paul E . McKenney Cc: Peter Zijlstra Cc: Stephen Hemminger Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Vlad Yasevich Link: http://lkml.kernel.org/r/150724527741.5014.15465541485637899227.stgit@devbox Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 40 ++++++++++++++++++---------------------- 1 file changed, 18 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index bd2684700b74..56b2e698dbad 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -391,10 +391,6 @@ int register_kprobes(struct kprobe **kps, int num); void unregister_kprobes(struct kprobe **kps, int num); int setjmp_pre_handler(struct kprobe *, struct pt_regs *); int longjmp_break_handler(struct kprobe *, struct pt_regs *); -int register_jprobe(struct jprobe *p); -void unregister_jprobe(struct jprobe *p); -int register_jprobes(struct jprobe **jps, int num); -void unregister_jprobes(struct jprobe **jps, int num); void jprobe_return(void); unsigned long arch_deref_entry_point(void *); @@ -443,20 +439,6 @@ static inline void unregister_kprobe(struct kprobe *p) static inline void unregister_kprobes(struct kprobe **kps, int num) { } -static inline int register_jprobe(struct jprobe *p) -{ - return -ENOSYS; -} -static inline int register_jprobes(struct jprobe **jps, int num) -{ - return -ENOSYS; -} -static inline void unregister_jprobe(struct jprobe *p) -{ -} -static inline void unregister_jprobes(struct jprobe **jps, int num) -{ -} static inline void jprobe_return(void) { } @@ -486,6 +468,20 @@ static inline int enable_kprobe(struct kprobe *kp) return -ENOSYS; } #endif /* CONFIG_KPROBES */ +static inline int __deprecated register_jprobe(struct jprobe *p) +{ + return -ENOSYS; +} +static inline int __deprecated register_jprobes(struct jprobe **jps, int num) +{ + return -ENOSYS; +} +static inline void __deprecated unregister_jprobe(struct jprobe *p) +{ +} +static inline void __deprecated unregister_jprobes(struct jprobe **jps, int num) +{ +} static inline int disable_kretprobe(struct kretprobe *rp) { return disable_kprobe(&rp->kp); @@ -494,13 +490,13 @@ static inline int enable_kretprobe(struct kretprobe *rp) { return enable_kprobe(&rp->kp); } -static inline int disable_jprobe(struct jprobe *jp) +static inline int __deprecated disable_jprobe(struct jprobe *jp) { - return disable_kprobe(&jp->kp); + return -ENOSYS; } -static inline int enable_jprobe(struct jprobe *jp) +static inline int __deprecated enable_jprobe(struct jprobe *jp) { - return enable_kprobe(&jp->kp); + return -ENOSYS; } #ifndef CONFIG_KPROBES -- cgit v1.2.3 From 52f737c2da40259ac9962170ce608b6fb1b55ee4 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 19 Oct 2017 13:28:38 -0700 Subject: timer: Provide wrappers safe for use with LOCKDEP Under LOCKDEP, the timer lock_class_key (set up in __setup_timer) needs to be tied to the caller's context, so an inline for timer_setup() won't work. We do, however, want to keep the inline version around for argument type checking, though, so this provides macro wrappers in the LOCKDEP case. This fixes the case of different timers sharing the same LOCKDEP instance, and producing a false positive warning: [ 580.840858] ====================================================== [ 580.842299] WARNING: possible circular locking dependency detected [ 580.843684] 4.14.0-rc4+ #17 Not tainted [ 580.844554] ------------------------------------------------------ [ 580.845945] swapper/9/0 is trying to acquire lock: [ 580.847024] (slock-AF_INET){+.-.}, at: [] tcp_write_timer+0x24/0xd0 [ 580.848834] but task is already holding lock: [ 580.850107] ((timer)#2){+.-.}, at: [] call_timer_fn+0x0/0x300 [ 580.851663] which lock already depends on the new lock. [ 580.853439] the existing dependency chain (in reverse order) is: [ 580.855311] -> #1 ((timer)#2){+.-.}: [ 580.856538] __lock_acquire+0x114d/0x11a0 [ 580.857506] lock_acquire+0xb0/0x1d0 [ 580.858373] del_timer_sync+0x3c/0xb0 [ 580.859260] inet_csk_reqsk_queue_drop+0x7f/0x1b0 ... -> #0 (slock-AF_INET){+.-.}: [ 580.884980] check_prev_add+0x666/0x700 [ 580.885790] __lock_acquire+0x114d/0x11a0 [ 580.886575] lock_acquire+0xb0/0x1d0 [ 580.887289] _raw_spin_lock+0x2c/0x40 [ 580.888021] tcp_write_timer+0x24/0xd0 ... [ 580.900055] Possible unsafe locking scenario: [ 580.901043] CPU0 CPU1 [ 580.901797] ---- ---- [ 580.902540] lock((timer)#2); [ 580.903046] lock(slock-AF_INET); [ 580.904006] lock((timer)#2); [ 580.904915] lock(slock-AF_INET); [ 580.905502] In this report, del_timer_sync() is from: inet_csk_reqsk_queue_drop() reqsk_queue_unlink() del_timer_sync(&req->rsk_timer) but tcp_write_timer()'s timer is attached to icsk_retransmit_timer. Both had the same lock_class_key, since they were using timer_setup(). Switching to a macro allows for a separate context, avoiding the false positive. Fixes: 686fef928bba ("timer: Prepare to change timer callback argument type") Reported-by: Craig Gallek Suggested-by: Eric Dumazet Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Cc: netdev@vger.kernel.org Cc: "David S. Miller" Link: https://lkml.kernel.org/r/20171019202838.GA43223@beast --- include/linux/timer.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 10685c33e679..09950482309b 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -150,6 +150,7 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define TIMER_DATA_TYPE unsigned long #define TIMER_FUNC_TYPE void (*)(TIMER_DATA_TYPE) +#ifndef CONFIG_LOCKDEP static inline void timer_setup(struct timer_list *timer, void (*callback)(struct timer_list *), unsigned int flags) @@ -165,6 +166,19 @@ static inline void timer_setup_on_stack(struct timer_list *timer, __setup_timer_on_stack(timer, (TIMER_FUNC_TYPE)callback, (TIMER_DATA_TYPE)timer, flags); } +#else +/* + * Under LOCKDEP, the timer lock_class_key (set up in __init_timer) needs + * to be tied to the caller's context, so an inline (above) won't work. We + * do want to keep the inline for argument type checking, though. + */ +# define timer_setup(timer, callback, flags) \ + __setup_timer(timer, (TIMER_FUNC_TYPE)callback, \ + (TIMER_DATA_TYPE)timer, flags) +# define timer_setup_on_stack(timer, callback, flags) \ + __setup_timer_on_stack(timer, (TIMER_FUNC_TYPE)callback,\ + (TIMER_DATA_TYPE)timer, flags) +#endif #define from_timer(var, callback_timer, timer_fieldname) \ container_of(callback_timer, typeof(*var), timer_fieldname) -- cgit v1.2.3 From de95e04791a03de5cb681980a3880db6919e3b4a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Wed, 18 Oct 2017 09:56:54 -0700 Subject: net: Add extack to validator_info structs used for address notifier Add extack to in_validator_info and in6_validator_info. Update the one user of each, ipvlan, to return an error message for failures. Only manual configuration of an address is plumbed in the IPv6 code path. Signed-off-by: David Ahern Reviewed-by: Ido Schimmel Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 751d051f0bc7..681dff30940b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -154,6 +154,7 @@ struct in_ifaddr { struct in_validator_info { __be32 ivi_addr; struct in_device *ivi_dev; + struct netlink_ext_ack *extack; }; int register_inetaddr_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 617dd7cc490b72345277e2666c8ed34d4f47f0da Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Wed, 30 Aug 2017 12:48:31 +0200 Subject: gpu: host1x: syncpt: Request syncpoints per client Rather than request syncpoints for a struct device *, request them for a struct host1x_client *. This is important because subsequent patches are going to break the assumption that host1x will always be the parent for devices requesting a syncpoint. It's also a more natural choice because host1x clients are really the only ones that will know how to deal with syncpoints. Note that host1x clients are always guaranteed to be children of host1x, regardless of their location in the device tree. Signed-off-by: Thierry Reding --- include/linux/host1x.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 630b1a98ab58..ddf7f9ca86cc 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -157,7 +157,7 @@ int host1x_syncpt_incr(struct host1x_syncpt *sp); u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs); int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value); -struct host1x_syncpt *host1x_syncpt_request(struct device *dev, +struct host1x_syncpt *host1x_syncpt_request(struct host1x_client *client, unsigned long flags); void host1x_syncpt_free(struct host1x_syncpt *sp); -- cgit v1.2.3 From 6e71b04a82248ccf13a94b85cbc674a9fefe53f5 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 18 Oct 2017 13:00:22 -0700 Subject: bpf: Add file mode configuration into bpf maps Introduce the map read/write flags to the eBPF syscalls that returns the map fd. The flags is used to set up the file mode when construct a new file descriptor for bpf maps. To not break the backward capability, the f_flags is set to O_RDWR if the flag passed by syscall is 0. Otherwise it should be O_RDONLY or O_WRONLY. When the userspace want to modify or read the map content, it will check the file mode to see if it is allowed to make the change. Signed-off-by: Chenbo Feng Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index d67ccdc0099f..3e5508f2fa87 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -315,11 +315,11 @@ void bpf_map_area_free(void *base); extern int sysctl_unprivileged_bpf_disabled; -int bpf_map_new_fd(struct bpf_map *map); +int bpf_map_new_fd(struct bpf_map *map, int flags); int bpf_prog_new_fd(struct bpf_prog *prog); int bpf_obj_pin_user(u32 ufd, const char __user *pathname); -int bpf_obj_get_user(const char __user *pathname); +int bpf_obj_get_user(const char __user *pathname, int flags); int bpf_percpu_hash_copy(struct bpf_map *map, void *key, void *value); int bpf_percpu_array_copy(struct bpf_map *map, void *key, void *value); @@ -338,6 +338,8 @@ int bpf_fd_htab_map_update_elem(struct bpf_map *map, struct file *map_file, void *key, void *value, u64 map_flags); int bpf_fd_htab_map_lookup_elem(struct bpf_map *map, void *key, u32 *value); +int bpf_get_file_flag(int flags); + /* memcpy that is used with 8-byte aligned pointers, power-of-8 size and * forced to use 'long' read/writes to try to atomically copy long counters. * Best-effort only. No barriers here, since it _will_ race with concurrent @@ -421,7 +423,7 @@ static inline void __bpf_prog_uncharge(struct user_struct *user, u32 pages) { } -static inline int bpf_obj_get_user(const char __user *pathname) +static inline int bpf_obj_get_user(const char __user *pathname, int flags) { return -EOPNOTSUPP; } -- cgit v1.2.3 From afdb09c720b62b8090584c11151d856df330e57d Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 18 Oct 2017 13:00:24 -0700 Subject: security: bpf: Add LSM hooks for bpf object related syscall Introduce several LSM hooks for the syscalls that will allow the userspace to access to eBPF object such as eBPF programs and eBPF maps. The security check is aimed to enforce a per object security protection for eBPF object so only processes with the right priviliges can read/write to a specific map or use a specific eBPF program. Besides that, a general security hook is added before the multiplexer of bpf syscall to check the cmd and the attribute used for the command. The actual security module can decide which command need to be checked and how the cmd should be checked. Signed-off-by: Chenbo Feng Acked-by: James Morris Signed-off-by: David S. Miller --- include/linux/bpf.h | 6 ++++++ include/linux/lsm_hooks.h | 54 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/security.h | 45 +++++++++++++++++++++++++++++++++++++++ 3 files changed, 105 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3e5508f2fa87..84c192da3e0b 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -57,6 +57,9 @@ struct bpf_map { atomic_t usercnt; struct bpf_map *inner_map_meta; char name[BPF_OBJ_NAME_LEN]; +#ifdef CONFIG_SECURITY + void *security; +#endif }; /* function argument constraints */ @@ -193,6 +196,9 @@ struct bpf_prog_aux { struct user_struct *user; u64 load_time; /* ns since boottime */ char name[BPF_OBJ_NAME_LEN]; +#ifdef CONFIG_SECURITY + void *security; +#endif union { struct work_struct work; struct rcu_head rcu; diff --git a/include/linux/lsm_hooks.h b/include/linux/lsm_hooks.h index c9258124e417..7161d8e7ee79 100644 --- a/include/linux/lsm_hooks.h +++ b/include/linux/lsm_hooks.h @@ -1351,6 +1351,40 @@ * @inode we wish to get the security context of. * @ctx is a pointer in which to place the allocated security context. * @ctxlen points to the place to put the length of @ctx. + * + * Security hooks for using the eBPF maps and programs functionalities through + * eBPF syscalls. + * + * @bpf: + * Do a initial check for all bpf syscalls after the attribute is copied + * into the kernel. The actual security module can implement their own + * rules to check the specific cmd they need. + * + * @bpf_map: + * Do a check when the kernel generate and return a file descriptor for + * eBPF maps. + * + * @map: bpf map that we want to access + * @mask: the access flags + * + * @bpf_prog: + * Do a check when the kernel generate and return a file descriptor for + * eBPF programs. + * + * @prog: bpf prog that userspace want to use. + * + * @bpf_map_alloc_security: + * Initialize the security field inside bpf map. + * + * @bpf_map_free_security: + * Clean up the security information stored inside bpf map. + * + * @bpf_prog_alloc_security: + * Initialize the security field inside bpf program. + * + * @bpf_prog_free_security: + * Clean up the security information stored inside bpf prog. + * */ union security_list_options { int (*binder_set_context_mgr)(struct task_struct *mgr); @@ -1682,6 +1716,17 @@ union security_list_options { struct audit_context *actx); void (*audit_rule_free)(void *lsmrule); #endif /* CONFIG_AUDIT */ + +#ifdef CONFIG_BPF_SYSCALL + int (*bpf)(int cmd, union bpf_attr *attr, + unsigned int size); + int (*bpf_map)(struct bpf_map *map, fmode_t fmode); + int (*bpf_prog)(struct bpf_prog *prog); + int (*bpf_map_alloc_security)(struct bpf_map *map); + void (*bpf_map_free_security)(struct bpf_map *map); + int (*bpf_prog_alloc_security)(struct bpf_prog_aux *aux); + void (*bpf_prog_free_security)(struct bpf_prog_aux *aux); +#endif /* CONFIG_BPF_SYSCALL */ }; struct security_hook_heads { @@ -1901,6 +1946,15 @@ struct security_hook_heads { struct list_head audit_rule_match; struct list_head audit_rule_free; #endif /* CONFIG_AUDIT */ +#ifdef CONFIG_BPF_SYSCALL + struct list_head bpf; + struct list_head bpf_map; + struct list_head bpf_prog; + struct list_head bpf_map_alloc_security; + struct list_head bpf_map_free_security; + struct list_head bpf_prog_alloc_security; + struct list_head bpf_prog_free_security; +#endif /* CONFIG_BPF_SYSCALL */ } __randomize_layout; /* diff --git a/include/linux/security.h b/include/linux/security.h index ce6265960d6c..18800b0911e5 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -31,6 +31,7 @@ #include #include #include +#include struct linux_binprm; struct cred; @@ -1730,6 +1731,50 @@ static inline void securityfs_remove(struct dentry *dentry) #endif +#ifdef CONFIG_BPF_SYSCALL +#ifdef CONFIG_SECURITY +extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); +extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); +extern int security_bpf_prog(struct bpf_prog *prog); +extern int security_bpf_map_alloc(struct bpf_map *map); +extern void security_bpf_map_free(struct bpf_map *map); +extern int security_bpf_prog_alloc(struct bpf_prog_aux *aux); +extern void security_bpf_prog_free(struct bpf_prog_aux *aux); +#else +static inline int security_bpf(int cmd, union bpf_attr *attr, + unsigned int size) +{ + return 0; +} + +static inline int security_bpf_map(struct bpf_map *map, fmode_t fmode) +{ + return 0; +} + +static inline int security_bpf_prog(struct bpf_prog *prog) +{ + return 0; +} + +static inline int security_bpf_map_alloc(struct bpf_map *map) +{ + return 0; +} + +static inline void security_bpf_map_free(struct bpf_map *map) +{ } + +static inline int security_bpf_prog_alloc(struct bpf_prog_aux *aux) +{ + return 0; +} + +static inline void security_bpf_prog_free(struct bpf_prog_aux *aux) +{ } +#endif /* CONFIG_SECURITY */ +#endif /* CONFIG_BPF_SYSCALL */ + #ifdef CONFIG_SECURITY static inline char *alloc_secdata(void) -- cgit v1.2.3 From f66e448cfda021b0bcd884f26709796fe19c7cc1 Mon Sep 17 00:00:00 2001 From: Chenbo Feng Date: Wed, 18 Oct 2017 13:00:26 -0700 Subject: selinux: bpf: Add addtional check for bpf object file receive Introduce a bpf object related check when sending and receiving files through unix domain socket as well as binder. It checks if the receiving process have privilege to read/write the bpf map or use the bpf program. This check is necessary because the bpf maps and programs are using a anonymous inode as their shared inode so the normal way of checking the files and sockets when passing between processes cannot work properly on eBPF object. This check only works when the BPF_SYSCALL is configured. Signed-off-by: Chenbo Feng Acked-by: Stephen Smalley Reviewed-by: James Morris Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 84c192da3e0b..1e334b248ff6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -288,6 +288,9 @@ int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); +extern const struct file_operations bpf_map_fops; +extern const struct file_operations bpf_prog_fops; + #define BPF_PROG_TYPE(_id, _name) \ extern const struct bpf_prog_ops _name ## _prog_ops; \ extern const struct bpf_verifier_ops _name ## _verifier_ops; -- cgit v1.2.3 From e4d0b679a8467b6d0c169642f0b5f57d8d6eacc2 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 17 Sep 2017 14:30:06 -0700 Subject: srcu: Add parameters to SRCU docbook comments Signed-off-by: Paul E. McKenney --- include/linux/srcu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index 39af9bc0f653..62be8966e837 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -78,6 +78,7 @@ void synchronize_srcu(struct srcu_struct *sp); /** * srcu_read_lock_held - might we be in SRCU read-side critical section? + * @sp: The srcu_struct structure to check * * If CONFIG_DEBUG_LOCK_ALLOC is selected, returns nonzero iff in an SRCU * read-side critical section. In absence of CONFIG_DEBUG_LOCK_ALLOC, -- cgit v1.2.3 From 8c4083b30e56fc71b0e94c26374b32d95d5ea461 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Thu, 19 Oct 2017 15:50:29 +0200 Subject: net: sched: add block bind/unbind notif. and extended block_get/put Introduce new type of ndo_setup_tc message to propage binding/unbinding of a block to driver. Call this ndo whenever qdisc gets/puts a block. Alongside with this, there's need to propagate binder type from qdisc code down to the notifier. So introduce extended variants of block_get/put in order to pass this info. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index bf014afcb914..4de5b08ee0fb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -775,6 +775,7 @@ enum tc_setup_type { TC_SETUP_CLSFLOWER, TC_SETUP_CLSMATCHALL, TC_SETUP_CLSBPF, + TC_SETUP_BLOCK, }; /* These structures hold the attributes of xdp state that are being passed -- cgit v1.2.3 From ff61b5e3f041c2f1aa8d7c700af3007889973889 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:37 +0300 Subject: drivers, net, mlx4: convert mlx4_cq.refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable mlx4_cq.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b0a57e043fa3..daac2e3a1a58 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -40,7 +40,7 @@ #include #include -#include +#include #include @@ -751,7 +751,7 @@ struct mlx4_cq { int cqn; unsigned vector; - atomic_t refcount; + refcount_t refcount; struct completion free; struct { struct list_head list; -- cgit v1.2.3 From 0068895ff845c38e9e2b65c002c53c623379e436 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:38 +0300 Subject: drivers, net, mlx4: convert mlx4_qp.refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable mlx4_qp.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index daac2e3a1a58..b8e19c4d6caa 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -768,7 +768,7 @@ struct mlx4_qp { int qpn; - atomic_t refcount; + refcount_t refcount; struct completion free; u8 usage; }; -- cgit v1.2.3 From 17ac99b2b8d08ed40f4525491d6eff330329a6d2 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:39 +0300 Subject: drivers, net, mlx4: convert mlx4_srq.refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable mlx4_srq.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index b8e19c4d6caa..a9b5fed8f7c6 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -781,7 +781,7 @@ struct mlx4_srq { int max_gs; int wqe_shift; - atomic_t refcount; + refcount_t refcount; struct completion free; }; -- cgit v1.2.3 From a4b51a9f83c6d359ff8fc0c66009283b6fdeeaf8 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:40 +0300 Subject: drivers, net, mlx5: convert mlx5_cq.refcount from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable mlx5_cq.refcount is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- include/linux/mlx5/cq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 95898847c7d4..6a57ec2f1ef7 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -35,7 +35,7 @@ #include #include - +#include struct mlx5_core_cq { u32 cqn; @@ -43,7 +43,7 @@ struct mlx5_core_cq { __be32 *set_ci_db; __be32 *arm_db; struct mlx5_uars_page *uar; - atomic_t refcount; + refcount_t refcount; struct completion free; unsigned vector; unsigned int irqn; -- cgit v1.2.3 From e65f7ee39b4d7604a78b03ed35d723e1001fc241 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 10:23:49 +0300 Subject: drivers, connector: convert cn_callback_entry.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable cn_callback_entry.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: David S. Miller --- include/linux/connector.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/connector.h b/include/linux/connector.h index f8fe8637d771..032102b19645 100644 --- a/include/linux/connector.h +++ b/include/linux/connector.h @@ -22,7 +22,7 @@ #define __CONNECTOR_H -#include +#include #include #include @@ -49,7 +49,7 @@ struct cn_callback_id { struct cn_callback_entry { struct list_head callback_entry; - atomic_t refcnt; + refcount_t refcnt; struct cn_queue_dev *pdev; struct cn_callback_id id; -- cgit v1.2.3 From eb297bc716ec2cb3147e6e99002e37058c65cba3 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Tue, 10 Oct 2017 22:08:54 -0700 Subject: of: reserved_mem: Accessor for acquiring reserved_mem In some cases drivers referencing a reserved-memory region might want to remap the entire region, but when defining the reserved-memory by "size" the client driver has no means to know the associated base address of the reserved memory region. This patch adds an accessor for such drivers to acquire a handle to their associated reserved-memory for this purpose. A complicating factor for the implementation is that the reserved_mem objects are created from the flattened DeviceTree, as such we can't use the device_node address for comparison. Fortunately the name of the node will be used as "name" of the reserved_mem and will be used when building the full_name, so we can compare the "name" with the basename of the full_name to find the match. Reviewed-by: Rob Herring Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/of_reserved_mem.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_reserved_mem.h b/include/linux/of_reserved_mem.h index f8e1992d6423..c58f780104f9 100644 --- a/include/linux/of_reserved_mem.h +++ b/include/linux/of_reserved_mem.h @@ -44,6 +44,7 @@ int early_init_dt_alloc_reserved_memory_arch(phys_addr_t size, void fdt_init_reserved_mem(void); void fdt_reserved_mem_save_node(unsigned long node, const char *uname, phys_addr_t base, phys_addr_t size); +struct reserved_mem *of_reserved_mem_lookup(struct device_node *np); #else static inline int of_reserved_mem_device_init_by_idx(struct device *dev, struct device_node *np, int idx) @@ -55,6 +56,10 @@ static inline void of_reserved_mem_device_release(struct device *pdev) { } static inline void fdt_init_reserved_mem(void) { } static inline void fdt_reserved_mem_save_node(unsigned long node, const char *uname, phys_addr_t base, phys_addr_t size) { } +static inline struct reserved_mem *of_reserved_mem_lookup(struct device_node *np) +{ + return NULL; +} #endif /** -- cgit v1.2.3 From 176aa36012135d172394a928a03fb03dfecd83f9 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Thu, 21 Sep 2017 12:11:24 +0900 Subject: extcon: Split out extcon header file for consumer and provider device The extcon has two type of extcon devices as following. - 'extcon provider deivce' adds new extcon device and detect the state/properties of external connector. Also, it notifies the state/properties to the extcon consumer device. - 'extcon consumer device' gets the change state/properties from extcon provider device. Prior to that, include/linux/extcon.h contains all exported API for both provider and consumer device driver. To clarify the meaning of header file and to remove the wrong use-case on consumer device, this patch separates into extcon.h and extcon-provider.h. [Description for include/linux/{extcon.h|extcon-provider.h}] - extcon.h includes the extcon API and data structure for extcon consumer device driver. This header file contains the following APIs: : Register/unregister the notifier to catch the change of extcon device : Get the extcon device instance : Get the extcon device name : Get the state of each external connector : Get the property value of each external connector : Get the property capability of each external connector - extcon-provider.h includes the extcon API and data structure for extcon provider device driver. This header file contains the following APIs: : Include 'include/linux/extcon.h' : Allocate the memory for extcon device instance : Register/unregister extcon device : Set the state of each external connector : Set the property value of each external connector : Set the property capability of each external connector Signed-off-by: Chanwoo Choi Acked-by: Sebastian Reichel Acked-by: Chen-Yu Tsai Acked-by: Charles Keepax Acked-by: Lee Jones Acked-by: Felipe Balbi Acked-by: Yoshihiro Shimoda Acked-by: Kishon Vijay Abraham I --- include/linux/extcon-provider.h | 142 ++++++++++++++++++++++++++++++++++++++++ include/linux/extcon.h | 109 ++---------------------------- include/linux/mfd/palmas.h | 2 +- 3 files changed, 148 insertions(+), 105 deletions(-) create mode 100644 include/linux/extcon-provider.h (limited to 'include/linux') diff --git a/include/linux/extcon-provider.h b/include/linux/extcon-provider.h new file mode 100644 index 000000000000..2feca5881fa7 --- /dev/null +++ b/include/linux/extcon-provider.h @@ -0,0 +1,142 @@ +/* + * External Connector (extcon) framework + * - linux/include/linux/extcon-provider.h for extcon provider device driver. + * + * Copyright (C) 2017 Samsung Electronics + * Author: Chanwoo Choi + * + * This software is licensed under the terms of the GNU General Public + * License version 2, as published by the Free Software Foundation, and + * may be copied, distributed, and modified under those terms. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __LINUX_EXTCON_PROVIDER_H__ +#define __LINUX_EXTCON_PROVIDER_H__ + +#include + +struct extcon_dev; + +#if IS_ENABLED(CONFIG_EXTCON) + +/* Following APIs register/unregister the extcon device. */ +extern int extcon_dev_register(struct extcon_dev *edev); +extern void extcon_dev_unregister(struct extcon_dev *edev); +extern int devm_extcon_dev_register(struct device *dev, + struct extcon_dev *edev); +extern void devm_extcon_dev_unregister(struct device *dev, + struct extcon_dev *edev); + +/* Following APIs allocate/free the memory of the extcon device. */ +extern struct extcon_dev *extcon_dev_allocate(const unsigned int *cable); +extern void extcon_dev_free(struct extcon_dev *edev); +extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, + const unsigned int *cable); +extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); + +/* Synchronize the state and property value for each external connector. */ +extern int extcon_sync(struct extcon_dev *edev, unsigned int id); + +/* + * Following APIs set the connected state of each external connector. + * The 'id' argument indicates the defined external connector. + */ +extern int extcon_set_state(struct extcon_dev *edev, unsigned int id, + bool state); +extern int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, + bool state); + +/* + * Following APIs set the property of each external connector. + * The 'id' argument indicates the defined external connector + * and the 'prop' indicates the extcon property. + * + * And extcon_set_property_capability() set the capability of the property + * for each external connector. They are used to set the capability of the + * property of each external connector based on the id and property. + */ +extern int extcon_set_property(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value prop_val); +extern int extcon_set_property_sync(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value prop_val); +extern int extcon_set_property_capability(struct extcon_dev *edev, + unsigned int id, unsigned int prop); + +#else /* CONFIG_EXTCON */ +static inline int extcon_dev_register(struct extcon_dev *edev) +{ + return 0; +} + +static inline void extcon_dev_unregister(struct extcon_dev *edev) { } + +static inline int devm_extcon_dev_register(struct device *dev, + struct extcon_dev *edev) +{ + return -EINVAL; +} + +static inline void devm_extcon_dev_unregister(struct device *dev, + struct extcon_dev *edev) { } + +static inline struct extcon_dev *extcon_dev_allocate(const unsigned int *cable) +{ + return ERR_PTR(-ENOSYS); +} + +static inline void extcon_dev_free(struct extcon_dev *edev) { } + +static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, + const unsigned int *cable) +{ + return ERR_PTR(-ENOSYS); +} + +static inline void devm_extcon_dev_free(struct extcon_dev *edev) { } + + +static inline int extcon_set_state(struct extcon_dev *edev, unsigned int id, + bool state) +{ + return 0; +} + +static inline int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, + bool state) +{ + return 0; +} + +static inline int extcon_sync(struct extcon_dev *edev, unsigned int id) +{ + return 0; +} + +static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, + unsigned int prop, + union extcon_property_value prop_val) +{ + return 0; +} + +static inline int extcon_set_property_sync(struct extcon_dev *edev, + unsigned int id, unsigned int prop, + union extcon_property_value prop_val) +{ + return 0; +} + +static inline int extcon_set_property_capability(struct extcon_dev *edev, + unsigned int id, unsigned int prop) +{ + return 0; +} +#endif /* CONFIG_EXTCON */ +#endif /* __LINUX_EXTCON_PROVIDER_H__ */ diff --git a/include/linux/extcon.h b/include/linux/extcon.h index 744d60ca80c3..6d94e82c8ad9 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -1,5 +1,6 @@ /* * External Connector (extcon) framework + * - linux/include/linux/extcon.h for extcon consumer device driver. * * Copyright (C) 2015 Samsung Electronics * Author: Chanwoo Choi @@ -170,61 +171,29 @@ union extcon_property_value { int intval; /* type : integer (intval) */ }; -struct extcon_cable; struct extcon_dev; #if IS_ENABLED(CONFIG_EXTCON) - -/* Following APIs register/unregister the extcon device. */ -extern int extcon_dev_register(struct extcon_dev *edev); -extern void extcon_dev_unregister(struct extcon_dev *edev); -extern int devm_extcon_dev_register(struct device *dev, - struct extcon_dev *edev); -extern void devm_extcon_dev_unregister(struct device *dev, - struct extcon_dev *edev); - -/* Following APIs allocate/free the memory of the extcon device. */ -extern struct extcon_dev *extcon_dev_allocate(const unsigned int *cable); -extern void extcon_dev_free(struct extcon_dev *edev); -extern struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, - const unsigned int *cable); -extern void devm_extcon_dev_free(struct device *dev, struct extcon_dev *edev); - -/* Synchronize the state and property value for each external connector. */ -extern int extcon_sync(struct extcon_dev *edev, unsigned int id); - /* - * Following APIs get/set the connected state of each external connector. + * Following APIs get the connected state of each external connector. * The 'id' argument indicates the defined external connector. */ extern int extcon_get_state(struct extcon_dev *edev, unsigned int id); -extern int extcon_set_state(struct extcon_dev *edev, unsigned int id, - bool state); -extern int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, - bool state); /* - * Following APIs get/set the property of each external connector. + * Following APIs get the property of each external connector. * The 'id' argument indicates the defined external connector * and the 'prop' indicates the extcon property. * - * And extcon_get/set_property_capability() set the capability of the property - * for each external connector. They are used to set the capability of the + * And extcon_get_property_capability() get the capability of the property + * for each external connector. They are used to get the capability of the * property of each external connector based on the id and property. */ extern int extcon_get_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value *prop_val); -extern int extcon_set_property(struct extcon_dev *edev, unsigned int id, - unsigned int prop, - union extcon_property_value prop_val); -extern int extcon_set_property_sync(struct extcon_dev *edev, unsigned int id, - unsigned int prop, - union extcon_property_value prop_val); extern int extcon_get_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop); -extern int extcon_set_property_capability(struct extcon_dev *edev, - unsigned int id, unsigned int prop); /* * Following APIs register the notifier block in order to detect @@ -268,79 +237,17 @@ extern struct extcon_dev *extcon_get_edev_by_phandle(struct device *dev, extern const char *extcon_get_edev_name(struct extcon_dev *edev); #else /* CONFIG_EXTCON */ -static inline int extcon_dev_register(struct extcon_dev *edev) -{ - return 0; -} - -static inline void extcon_dev_unregister(struct extcon_dev *edev) { } - -static inline int devm_extcon_dev_register(struct device *dev, - struct extcon_dev *edev) -{ - return -EINVAL; -} - -static inline void devm_extcon_dev_unregister(struct device *dev, - struct extcon_dev *edev) { } - -static inline struct extcon_dev *extcon_dev_allocate(const unsigned int *cable) -{ - return ERR_PTR(-ENOSYS); -} - -static inline void extcon_dev_free(struct extcon_dev *edev) { } - -static inline struct extcon_dev *devm_extcon_dev_allocate(struct device *dev, - const unsigned int *cable) -{ - return ERR_PTR(-ENOSYS); -} - -static inline void devm_extcon_dev_free(struct extcon_dev *edev) { } - - static inline int extcon_get_state(struct extcon_dev *edev, unsigned int id) { return 0; } -static inline int extcon_set_state(struct extcon_dev *edev, unsigned int id, - bool state) -{ - return 0; -} - -static inline int extcon_set_state_sync(struct extcon_dev *edev, unsigned int id, - bool state) -{ - return 0; -} - -static inline int extcon_sync(struct extcon_dev *edev, unsigned int id) -{ - return 0; -} - static inline int extcon_get_property(struct extcon_dev *edev, unsigned int id, unsigned int prop, union extcon_property_value *prop_val) { return 0; } -static inline int extcon_set_property(struct extcon_dev *edev, unsigned int id, - unsigned int prop, - union extcon_property_value prop_val) -{ - return 0; -} - -static inline int extcon_set_property_sync(struct extcon_dev *edev, - unsigned int id, unsigned int prop, - union extcon_property_value prop_val) -{ - return 0; -} static inline int extcon_get_property_capability(struct extcon_dev *edev, unsigned int id, unsigned int prop) @@ -348,12 +255,6 @@ static inline int extcon_get_property_capability(struct extcon_dev *edev, return 0; } -static inline int extcon_set_property_capability(struct extcon_dev *edev, - unsigned int id, unsigned int prop) -{ - return 0; -} - static inline int extcon_register_notifier(struct extcon_dev *edev, unsigned int id, struct notifier_block *nb) { diff --git a/include/linux/mfd/palmas.h b/include/linux/mfd/palmas.h index 6dec43826303..3c8568aa82a5 100644 --- a/include/linux/mfd/palmas.h +++ b/include/linux/mfd/palmas.h @@ -20,7 +20,7 @@ #include #include #include -#include +#include #include #include -- cgit v1.2.3 From 11a6e41c0ee503ffcb971d260bd07dc99b77f13a Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Mon, 4 Sep 2017 14:53:13 +0200 Subject: phy: Return NULL if the phy is optional If we're trying to get a handle to an optional phy, then the phy framework being disabled shouldn't return an hard error. Instead, return NULL just like phy_optional_get does when there's no phy provided in the DT. Signed-off-by: Maxime Ripard Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index e694d4008c4a..10888a717860 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -291,7 +291,7 @@ static inline struct phy *devm_phy_get(struct device *dev, const char *string) static inline struct phy *devm_phy_optional_get(struct device *dev, const char *string) { - return ERR_PTR(-ENOSYS); + return NULL; } static inline struct phy *devm_of_phy_get(struct device *dev, -- cgit v1.2.3 From fd3e4c98e6e7a12dc47ef98ee12c0c3c024b5ee9 Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Thu, 12 Oct 2017 11:49:33 +0530 Subject: phy: Add UFS PHY modes UFS phy has two modes for each High speed generation. These modes are identified by two rates of operations - Rate A, and Rate B. Add these UFS phy modes to phy framework. Signed-off-by: Vivek Gautam Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 10888a717860..194d08174516 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -29,6 +29,8 @@ enum phy_mode { PHY_MODE_USB_OTG, PHY_MODE_SGMII, PHY_MODE_10GKR, + PHY_MODE_UFS_HS_A, + PHY_MODE_UFS_HS_B, }; /** -- cgit v1.2.3 From 052553af6a31b459adfdc6fd1eebced75de332fc Mon Sep 17 00:00:00 2001 From: Vivek Gautam Date: Thu, 12 Oct 2017 11:49:36 +0530 Subject: ufs/phy: qcom: Refactor to use phy_init call Refactor ufs_qcom_power_up_sequence() to get rid of ugly exported phy APIs and use the phy_init() and phy_power_on() to do the phy initialization. Signed-off-by: Vivek Gautam Reviewed-by: Subhash Jadavani Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy-qcom-ufs.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/phy/phy-qcom-ufs.h b/include/linux/phy/phy-qcom-ufs.h index 35c070ea6ea3..0a2c18a9771d 100644 --- a/include/linux/phy/phy-qcom-ufs.h +++ b/include/linux/phy/phy-qcom-ufs.h @@ -31,10 +31,7 @@ void ufs_qcom_phy_enable_dev_ref_clk(struct phy *phy); */ void ufs_qcom_phy_disable_dev_ref_clk(struct phy *phy); -int ufs_qcom_phy_start_serdes(struct phy *phy); int ufs_qcom_phy_set_tx_lane_enable(struct phy *phy, u32 tx_lanes); -int ufs_qcom_phy_calibrate_phy(struct phy *phy, bool is_rate_B); -int ufs_qcom_phy_is_pcs_ready(struct phy *phy); void ufs_qcom_phy_save_controller_version(struct phy *phy, u8 major, u16 minor, u16 step); -- cgit v1.2.3 From 36914111e6829be36b23d1109214250b5ee1ee9c Mon Sep 17 00:00:00 2001 From: Andrzej Pietrasiewicz Date: Mon, 9 Oct 2017 14:00:50 +0200 Subject: drivers: phy: add calibrate method Some quirky UDCs (like dwc3 on Exynos) need to have their phys calibrated e.g. for using super speed. This patch adds a new phy_calibrate() method. When the calibration should be used is dependent on actual chip. In case of dwc3 on Exynos the calibration must happen after usb_add_hcd() (while in host mode), because certain phy parameters like Tx LOS levels and boost levels need to be calibrated further post initialization of xHCI controller, to get SuperSpeed operations working. But an hcd must be prepared first in order to pass it to usb_add_hcd(), so, in particular, dwc3 registers must be available first, and in order for the latter to happen the phys must be initialized. This poses a chicken and egg problem if the calibration were to be performed in phy_init(). To break the circular dependency a separate method is added which can be called at a desired moment after phy intialization. Signed-off-by: Andrzej Pietrasiewicz Signed-off-by: Kishon Vijay Abraham I --- include/linux/phy/phy.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index 194d08174516..4f8423a948d5 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -41,6 +41,7 @@ enum phy_mode { * @power_off: powering off the phy * @set_mode: set the mode of the phy * @reset: resetting the phy + * @calibrate: calibrate the phy * @owner: the module owner containing the ops */ struct phy_ops { @@ -50,6 +51,7 @@ struct phy_ops { int (*power_off)(struct phy *phy); int (*set_mode)(struct phy *phy, enum phy_mode mode); int (*reset)(struct phy *phy); + int (*calibrate)(struct phy *phy); struct module *owner; }; @@ -143,6 +145,7 @@ int phy_power_on(struct phy *phy); int phy_power_off(struct phy *phy); int phy_set_mode(struct phy *phy, enum phy_mode mode); int phy_reset(struct phy *phy); +int phy_calibrate(struct phy *phy); static inline int phy_get_bus_width(struct phy *phy) { return phy->attrs.bus_width; @@ -264,6 +267,13 @@ static inline int phy_reset(struct phy *phy) return -ENOSYS; } +static inline int phy_calibrate(struct phy *phy) +{ + if (!phy) + return 0; + return -ENOSYS; +} + static inline int phy_get_bus_width(struct phy *phy) { return -ENOSYS; -- cgit v1.2.3 From 8dd8d2c95d0252bc64aa8e061a5fb4fbcd05f826 Mon Sep 17 00:00:00 2001 From: Daniel Drake Date: Wed, 18 Oct 2017 15:15:01 +0800 Subject: USB: Force disconnect Huawei 4G modem during suspend When going into S3 suspend, the Acer TravelMate P648-M and P648-G3 laptops immediately wake up 3-4 seconds later for no obvious reason. Unbinding the integrated Huawei 4G LTE modem before suspend avoids the issue, even though we are not using the modem at all (checked from rescue.target/runlevel1). The problem also occurs when the option and cdc-ether modem drivers aren't loaded; it reproduces just with the base usb driver. Under Windows the system can suspend fine. Seeking a better fix, we've tried a lot of things, including: - Check that the device's power/wakeup is disabled - Check that remote wakeup is off at the USB level - All the quirks in drivers/usb/core/quirks.c e.g. USB_QUIRK_RESET_RESUME, USB_QUIRK_RESET, USB_QUIRK_IGNORE_REMOTE_WAKEUP, USB_QUIRK_NO_LPM. but none of that makes any difference. There are no errors in the logs showing any suspend/resume-related issues. When the system wakes up due to the modem, log-wise it appears to be a normal resume. Introduce a quirk to disable the port during suspend when the modem is detected. The modem from the P648-G3 model is: T: Bus=01 Lev=01 Prnt=01 Port=08 Cnt=04 Dev#= 5 Spd=480 MxCh= 0 D: Ver= 2.00 Cls=00(>ifc ) Sub=00 Prot=ff MxPS=64 #Cfgs= 3 P: Vendor=12d1 ProdID=15c3 Rev= 1.02 S: Manufacturer=Huawei Technologies Co., Ltd. S: Product=HUAWEI Mobile S: SerialNumber=0123456789ABCDEF C: #Ifs= 5 Cfg#= 1 Atr=a0 MxPwr= 2mA I: If#= 0 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=06 Prot=10 Driver= E: Ad=82(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 1 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=13 Driver= E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 2 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=12 Driver= E: Ad=84(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 3 Alt= 0 #EPs= 1 Cls=ff(vend.) Sub=06 Prot=16 Driver= E: Ad=86(I) Atr=03(Int.) MxPS= 16 Ivl=2ms I: If#= 3 Alt= 1 #EPs= 3 Cls=ff(vend.) Sub=06 Prot=16 Driver= E: Ad=86(I) Atr=03(Int.) MxPS= 16 Ivl=2ms E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I: If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=1b Driver= E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms C:* #Ifs= 6 Cfg#= 2 Atr=a0 MxPwr= 2mA I:* If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=06 Prot=00 Driver=cdc_ether E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=2ms I:* If#= 1 Alt= 0 #EPs= 2 Cls=0a(data ) Sub=06 Prot=00 Driver=cdc_ether E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 2 Alt= 0 #EPs= 3 Cls=ff(vend.) Sub=06 Prot=10 Driver=option E: Ad=84(I) Atr=03(Int.) MxPS= 10 Ivl=32ms E: Ad=83(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=02(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 3 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=13 Driver=option E: Ad=85(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=03(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 4 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=12 Driver=option E: Ad=86(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=04(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms I:* If#= 5 Alt= 0 #EPs= 2 Cls=ff(vend.) Sub=06 Prot=1b Driver=option E: Ad=87(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=05(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms C: #Ifs= 2 Cfg#= 3 Atr=a0 MxPwr= 2mA A: FirstIf#= 0 IfCount= 2 Cls=02(comm.) Sub=0e Prot=00 I: If#= 0 Alt= 0 #EPs= 1 Cls=02(comm.) Sub=0e Prot=00 Driver= E: Ad=82(I) Atr=03(Int.) MxPS= 16 Ivl=2ms I: If#= 1 Alt= 0 #EPs= 0 Cls=0a(data ) Sub=00 Prot=02 Driver= I: If#= 1 Alt= 1 #EPs= 2 Cls=0a(data ) Sub=00 Prot=02 Driver= E: Ad=81(I) Atr=02(Bulk) MxPS= 512 Ivl=0ms E: Ad=01(O) Atr=02(Bulk) MxPS= 512 Ivl=0ms Based on an earlier patch by Chris Chiu. Signed-off-by: Daniel Drake Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/quirks.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/quirks.h b/include/linux/usb/quirks.h index de2a722fe3cf..bdc639cc80b4 100644 --- a/include/linux/usb/quirks.h +++ b/include/linux/usb/quirks.h @@ -56,4 +56,10 @@ */ #define USB_QUIRK_LINEAR_FRAME_INTR_BINTERVAL BIT(11) +/* + * Device needs to be disconnected before suspend to prevent spurious + * wakeup. + */ +#define USB_QUIRK_DISCONNECT_SUSPEND BIT(12) + #endif /* __LINUX_USB_QUIRKS_H */ -- cgit v1.2.3 From a11bc476b987925654369411dd8281a60cb5a175 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Mon, 4 Sep 2017 12:19:07 -0700 Subject: Input: uinput - fold header into the driver proper There is nothing in the uinput kernel header that is of use to anyone in the kernel besides the uinput driver itself, so let's fold it into the driver code (leaving uapi part intact). Signed-off-by: Dmitry Torokhov --- include/linux/uinput.h | 81 -------------------------------------------------- 1 file changed, 81 deletions(-) delete mode 100644 include/linux/uinput.h (limited to 'include/linux') diff --git a/include/linux/uinput.h b/include/linux/uinput.h deleted file mode 100644 index 75de43da2301..000000000000 --- a/include/linux/uinput.h +++ /dev/null @@ -1,81 +0,0 @@ -/* - * User level driver support for input subsystem - * - * Heavily based on evdev.c by Vojtech Pavlik - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * - * Author: Aristeu Sergio Rozanski Filho - * - * Changes/Revisions: - * 0.5 08/13/2015 (David Herrmann & - * Benjamin Tissoires ) - * - add UI_DEV_SETUP ioctl - * - add UI_ABS_SETUP ioctl - * - add UI_GET_VERSION ioctl - * 0.4 01/09/2014 (Benjamin Tissoires ) - * - add UI_GET_SYSNAME ioctl - * 0.3 24/05/2006 (Anssi Hannula ) - * - update ff support for the changes in kernel interface - * - add UINPUT_VERSION - * 0.2 16/10/2004 (Micah Dowty ) - * - added force feedback support - * - added UI_SET_PHYS - * 0.1 20/06/2002 - * - first public version - */ -#ifndef __UINPUT_H_ -#define __UINPUT_H_ - -#include - -#define UINPUT_NAME "uinput" -#define UINPUT_BUFFER_SIZE 16 -#define UINPUT_NUM_REQUESTS 16 - -enum uinput_state { UIST_NEW_DEVICE, UIST_SETUP_COMPLETE, UIST_CREATED }; - -struct uinput_request { - unsigned int id; - unsigned int code; /* UI_FF_UPLOAD, UI_FF_ERASE */ - - int retval; - struct completion done; - - union { - unsigned int effect_id; - struct { - struct ff_effect *effect; - struct ff_effect *old; - } upload; - } u; -}; - -struct uinput_device { - struct input_dev *dev; - struct mutex mutex; - enum uinput_state state; - wait_queue_head_t waitq; - unsigned char ready; - unsigned char head; - unsigned char tail; - struct input_event buff[UINPUT_BUFFER_SIZE]; - unsigned int ff_effects_max; - - struct uinput_request *requests[UINPUT_NUM_REQUESTS]; - wait_queue_head_t requests_waitq; - spinlock_t requests_lock; -}; -#endif /* __UINPUT_H_ */ -- cgit v1.2.3 From 7b9651103b64c8a55eb6cec4c2240584e968354c Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 18 Oct 2017 11:56:21 +0200 Subject: extcon: max77843: Add OTG power control to the MUIC driver Enabling power on VBUS micro-usb pin is required only when passive OTG cable is connected. Initially OTG VBUS power control was planned to be done in charger driver. However such information is not really available from the extcon notifications, so VBUS power control has to be done directly in MUIC driver, which has all information about the attached accessory. For example SmartDock is externally powered accessory, provides OTG (USB HOST) functionality and use VBUS pin for charging a device battery, so the VBUS charging pump should be disabled in such case. Signed-off-by: Marek Szyprowski Acked-by: Chanwoo Choi Acked-by: Lee Jones Signed-off-by: Chanwoo Choi --- include/linux/mfd/max77843-private.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/max77843-private.h b/include/linux/mfd/max77843-private.h index c19303b0ccfd..0223cd5941c8 100644 --- a/include/linux/mfd/max77843-private.h +++ b/include/linux/mfd/max77843-private.h @@ -245,10 +245,13 @@ enum max77843_irq_muic { #define MAX77843_CHG_OVER_CURRENT_BAT (0x06 << 4) /* MAX77843 CHG_CNFG_00 register */ +#define MAX77843_CHG_MODE_MASK 0x0f #define MAX77843_CHG_DISABLE 0x00 #define MAX77843_CHG_ENABLE 0x05 #define MAX77843_CHG_MASK 0x01 +#define MAX77843_CHG_OTG_MASK 0x02 #define MAX77843_CHG_BUCK_MASK 0x04 +#define MAX77843_CHG_BOOST_MASK 0x08 /* MAX77843 CHG_CNFG_01 register */ #define MAX77843_CHG_RESTART_THRESHOLD_100 0x00 -- cgit v1.2.3 From 4a4a87146a07c866ad2ef49cc32296e6583b1cee Mon Sep 17 00:00:00 2001 From: Marek Szyprowski Date: Wed, 18 Oct 2017 11:56:22 +0200 Subject: extcon: max77843: Add support for SmartDock accessory SmartDock uses ADC_RESERVED_ACC_3 (0x10) ADC ID type and provides following features: 1. USB host with embedded USB hub (2-4 ports) for mice, keyboard, etc, 2. MHL for video output, 3. charging. Tested with Unitek Y-2165 MHL+OTG Hub Smart Phone Dock. Signed-off-by: Marek Szyprowski Acked-by: Chanwoo Choi Acked-by: Lee Jones Signed-off-by: Chanwoo Choi --- include/linux/mfd/max77843-private.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/max77843-private.h b/include/linux/mfd/max77843-private.h index 0223cd5941c8..b8908bf8d315 100644 --- a/include/linux/mfd/max77843-private.h +++ b/include/linux/mfd/max77843-private.h @@ -350,6 +350,7 @@ enum max77843_irq_muic { /* MAX77843 CONTROL register */ #define MAX77843_MUIC_CONTROL1_COMP1SW_SHIFT 0 #define MAX77843_MUIC_CONTROL1_COMP2SW_SHIFT 3 +#define MAX77843_MUIC_CONTROL1_NOBCCOMP_SHIFT 6 #define MAX77843_MUIC_CONTROL1_IDBEN_SHIFT 7 #define MAX77843_MUIC_CONTROL2_LOWPWR_SHIFT 0 #define MAX77843_MUIC_CONTROL2_ADCEN_SHIFT 1 @@ -366,6 +367,7 @@ enum max77843_irq_muic { #define MAX77843_MUIC_CONTROL1_COMP1SW_MASK (0x7 << MAX77843_MUIC_CONTROL1_COMP1SW_SHIFT) #define MAX77843_MUIC_CONTROL1_COMP2SW_MASK (0x7 << MAX77843_MUIC_CONTROL1_COMP2SW_SHIFT) #define MAX77843_MUIC_CONTROL1_IDBEN_MASK BIT(MAX77843_MUIC_CONTROL1_IDBEN_SHIFT) +#define MAX77843_MUIC_CONTROL1_NOBCCOMP_MASK BIT(MAX77843_MUIC_CONTROL1_NOBCCOMP_SHIFT) #define MAX77843_MUIC_CONTROL2_LOWPWR_MASK BIT(MAX77843_MUIC_CONTROL2_LOWPWR_SHIFT) #define MAX77843_MUIC_CONTROL2_ADCEN_MASK BIT(MAX77843_MUIC_CONTROL2_ADCEN_SHIFT) #define MAX77843_MUIC_CONTROL2_CPEN_MASK BIT(MAX77843_MUIC_CONTROL2_CPEN_SHIFT) -- cgit v1.2.3 From c6cd924efe941ef62eb805c59e4a09e219ac5c6d Mon Sep 17 00:00:00 2001 From: Yanjiang Jin Date: Tue, 24 Oct 2017 14:23:41 +0800 Subject: cpu/hotplug: Remove obsolete notifier macros commit 530e9b76ae8f ("cpu/hotplug: Remove obsolete cpu hotplug register/unregister functions")' removed the below macros: - #define CPU_UP_CANCELED 0x0004 /* CPU (unsigned)v NOT coming up */ - #define CPU_DOWN_PREPARE 0x0005 /* CPU (unsigned)v going down */ - #define CPU_DOWN_FAILED 0x0006 /* CPU (unsigned)v NOT going down */ But "CPU_UP_CANCELED_FROZEN, CPU_DOWN_PREPARE_FROZEN and CPU_DOWN_FAILED_FROZEN" still refer to them, and nobody uses these "FROZEN" macros now, so remove them too. Signed-off-by: Yanjiang Jin Signed-off-by: Thomas Gleixner Cc: peterz@infradead.org Cc: bigeasy@linutronix.de Cc: jinyanjiang@gmail.com Link: https://lkml.kernel.org/r/20171024062341.179678-1-yanjiang.jin@windriver.com --- include/linux/cpu.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index ca73bc1563f4..cd4771b772c0 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -72,9 +72,6 @@ struct notifier_block; #define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) #define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) -#define CPU_UP_CANCELED_FROZEN (CPU_UP_CANCELED | CPU_TASKS_FROZEN) -#define CPU_DOWN_PREPARE_FROZEN (CPU_DOWN_PREPARE | CPU_TASKS_FROZEN) -#define CPU_DOWN_FAILED_FROZEN (CPU_DOWN_FAILED | CPU_TASKS_FROZEN) #define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) #ifdef CONFIG_SMP -- cgit v1.2.3 From e22cdc3fc5991956146b9856d36b4971fe54dcd6 Mon Sep 17 00:00:00 2001 From: Rakib Mullick Date: Mon, 23 Oct 2017 19:01:54 +0600 Subject: sched/isolcpus: Fix "isolcpus=" boot parameter handling when !CONFIG_CPUMASK_OFFSTACK cpulist_parse() uses nr_cpumask_bits as a limit to parse the passed buffer from kernel commandline. What nr_cpumask_bits represents varies depending upon the CONFIG_CPUMASK_OFFSTACK option: - If CONFIG_CPUMASK_OFFSTACK=n, then nr_cpumask_bits is the same as NR_CPUS, which might not represent the # of CPUs that really exist (default 64). So, there's a chance of a gap between nr_cpu_ids and NR_CPUS, which ultimately lead towards invalid cpulist_parse() operation. For example, if isolcpus=9 is passed on an 8 cpu system (CONFIG_CPUMASK_OFFSTACK=n) it doesn't show the error that it's supposed to. This patch fixes this bug by finding the last CPU of the passed isolcpus= list and checking it against nr_cpu_ids. It also fixes the error message where the nr_cpu_ids should be nr_cpu_ids-1, since CPU numbering starts from 0. Signed-off-by: Rakib Mullick Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: adobriyan@gmail.com Cc: akpm@linux-foundation.org Cc: longman@redhat.com Cc: mka@chromium.org Cc: tj@kernel.org Link: http://lkml.kernel.org/r/20171023130154.9050-1-rakib.mullick@gmail.com [ Enhanced the changelog and the kernel message. ] Signed-off-by: Ingo Molnar include/linux/cpumask.h | 16 ++++++++++++++++ kernel/sched/topology.c | 4 ++-- 2 files changed, 18 insertions(+), 2 deletions(-) --- include/linux/cpumask.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpumask.h b/include/linux/cpumask.h index cd415b733c2a..63661de67ad4 100644 --- a/include/linux/cpumask.h +++ b/include/linux/cpumask.h @@ -130,6 +130,11 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return 0; } +static inline unsigned int cpumask_last(const struct cpumask *srcp) +{ + return 0; +} + /* Valid inputs for n are -1 and 0. */ static inline unsigned int cpumask_next(int n, const struct cpumask *srcp) { @@ -178,6 +183,17 @@ static inline unsigned int cpumask_first(const struct cpumask *srcp) return find_first_bit(cpumask_bits(srcp), nr_cpumask_bits); } +/** + * cpumask_last - get the last CPU in a cpumask + * @srcp: - the cpumask pointer + * + * Returns >= nr_cpumask_bits if no CPUs set. + */ +static inline unsigned int cpumask_last(const struct cpumask *srcp) +{ + return find_last_bit(cpumask_bits(srcp), nr_cpumask_bits); +} + unsigned int cpumask_next(int n, const struct cpumask *srcp); /** -- cgit v1.2.3 From 71c02379c762cb616c00fd5c4ed253fbf6bbe11b Mon Sep 17 00:00:00 2001 From: Christoph Paasch Date: Mon, 23 Oct 2017 13:22:23 -0700 Subject: tcp: Configure TFO without cookie per socket and/or per route We already allow to enable TFO without a cookie by using the fastopen-sysctl and setting it to TFO_SERVER_COOKIE_NOT_REQD (or TFO_CLIENT_NO_COOKIE). This is safe to do in certain environments where we know that there isn't a malicous host (aka., data-centers) or when the application-protocol already provides an authentication mechanism in the first flight of data. A server however might be providing multiple services or talking to both sides (public Internet and data-center). So, this server would want to enable cookie-less TFO for certain services and/or for connections that go to the data-center. This patch exposes a socket-option and a per-route attribute to enable such fine-grained configurations. Signed-off-by: Christoph Paasch Reviewed-by: Yuchung Cheng Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 1d2c44e09e31..173a7c2f9636 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -215,7 +215,8 @@ struct tcp_sock { u8 chrono_type:2, /* current chronograph type */ rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ - unused:4; + fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ + unused:3; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ unused1 : 1, -- cgit v1.2.3 From d15155824c5014803d91b829736d249c500bdda6 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 24 Oct 2017 11:22:46 +0100 Subject: linux/compiler.h: Split into compiler.h and compiler_types.h linux/compiler.h is included indirectly by linux/types.h via uapi/linux/types.h -> uapi/linux/posix_types.h -> linux/stddef.h -> uapi/linux/stddef.h and is needed to provide a proper definition of offsetof. Unfortunately, compiler.h requires a definition of smp_read_barrier_depends() for defining lockless_dereference() and soon for defining READ_ONCE(), which means that all users of READ_ONCE() will need to include asm/barrier.h to avoid splats such as: In file included from include/uapi/linux/stddef.h:1:0, from include/linux/stddef.h:4, from arch/h8300/kernel/asm-offsets.c:11: include/linux/list.h: In function 'list_empty': >> include/linux/compiler.h:343:2: error: implicit declaration of function 'smp_read_barrier_depends' [-Werror=implicit-function-declaration] smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ ^ A better alternative is to include asm/barrier.h in linux/compiler.h, but this requires a type definition for "bool" on some architectures (e.g. x86), which is defined later by linux/types.h. Type "bool" is also used directly in linux/compiler.h, so the whole thing is pretty fragile. This patch splits compiler.h in two: compiler_types.h contains type annotations, definitions and the compiler-specific parts, whereas compiler.h #includes compiler-types.h and additionally defines macros such as {READ,WRITE.ACCESS}_ONCE(). uapi/linux/stddef.h and linux/linkage.h are then moved over to include linux/compiler_types.h, which fixes the build for h8 and blackfin. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-2-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/compiler-clang.h | 2 +- include/linux/compiler-gcc.h | 2 +- include/linux/compiler-intel.h | 2 +- include/linux/compiler.h | 265 +-------------------------------------- include/linux/compiler_types.h | 274 +++++++++++++++++++++++++++++++++++++++++ include/linux/linkage.h | 2 +- 6 files changed, 281 insertions(+), 266 deletions(-) create mode 100644 include/linux/compiler_types.h (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index de179993e039..5947a3e6c0e6 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -1,4 +1,4 @@ -#ifndef __LINUX_COMPILER_H +#ifndef __LINUX_COMPILER_TYPES_H #error "Please don't include directly, include instead." #endif diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 16d41de92ee3..ce8e965646ef 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -1,4 +1,4 @@ -#ifndef __LINUX_COMPILER_H +#ifndef __LINUX_COMPILER_TYPES_H #error "Please don't include directly, include instead." #endif diff --git a/include/linux/compiler-intel.h b/include/linux/compiler-intel.h index d4c71132d07f..e438ac89c692 100644 --- a/include/linux/compiler-intel.h +++ b/include/linux/compiler-intel.h @@ -1,4 +1,4 @@ -#ifndef __LINUX_COMPILER_H +#ifndef __LINUX_COMPILER_TYPES_H #error "Please don't include directly, include instead." #endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index e95a2631e545..08083186e54f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -1,111 +1,12 @@ #ifndef __LINUX_COMPILER_H #define __LINUX_COMPILER_H -#ifndef __ASSEMBLY__ +#include -#ifdef __CHECKER__ -# define __user __attribute__((noderef, address_space(1))) -# define __kernel __attribute__((address_space(0))) -# define __safe __attribute__((safe)) -# define __force __attribute__((force)) -# define __nocast __attribute__((nocast)) -# define __iomem __attribute__((noderef, address_space(2))) -# define __must_hold(x) __attribute__((context(x,1,1))) -# define __acquires(x) __attribute__((context(x,0,1))) -# define __releases(x) __attribute__((context(x,1,0))) -# define __acquire(x) __context__(x,1) -# define __release(x) __context__(x,-1) -# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) -# define __percpu __attribute__((noderef, address_space(3))) -# define __rcu __attribute__((noderef, address_space(4))) -# define __private __attribute__((noderef)) -extern void __chk_user_ptr(const volatile void __user *); -extern void __chk_io_ptr(const volatile void __iomem *); -# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) -#else /* __CHECKER__ */ -# ifdef STRUCTLEAK_PLUGIN -# define __user __attribute__((user)) -# else -# define __user -# endif -# define __kernel -# define __safe -# define __force -# define __nocast -# define __iomem -# define __chk_user_ptr(x) (void)0 -# define __chk_io_ptr(x) (void)0 -# define __builtin_warning(x, y...) (1) -# define __must_hold(x) -# define __acquires(x) -# define __releases(x) -# define __acquire(x) (void)0 -# define __release(x) (void)0 -# define __cond_lock(x,c) (c) -# define __percpu -# define __rcu -# define __private -# define ACCESS_PRIVATE(p, member) ((p)->member) -#endif /* __CHECKER__ */ - -/* Indirect macros required for expanded argument pasting, eg. __LINE__. */ -#define ___PASTE(a,b) a##b -#define __PASTE(a,b) ___PASTE(a,b) +#ifndef __ASSEMBLY__ #ifdef __KERNEL__ -#ifdef __GNUC__ -#include -#endif - -#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__) -#define notrace __attribute__((hotpatch(0,0))) -#else -#define notrace __attribute__((no_instrument_function)) -#endif - -/* Intel compiler defines __GNUC__. So we will overwrite implementations - * coming from above header files here - */ -#ifdef __INTEL_COMPILER -# include -#endif - -/* Clang compiler defines __GNUC__. So we will overwrite implementations - * coming from above header files here - */ -#ifdef __clang__ -#include -#endif - -/* - * Generic compiler-dependent macros required for kernel - * build go below this comment. Actual compiler/compiler version - * specific implementations come from the above header files - */ - -struct ftrace_branch_data { - const char *func; - const char *file; - unsigned line; - union { - struct { - unsigned long correct; - unsigned long incorrect; - }; - struct { - unsigned long miss; - unsigned long hit; - }; - unsigned long miss_hit[2]; - }; -}; - -struct ftrace_likely_data { - struct ftrace_branch_data data; - unsigned long constant; -}; - /* * Note: DISABLE_BRANCH_PROFILING can be used by special lowlevel code * to disable branch tracing on a per file basis. @@ -332,6 +233,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * with an explicit memory barrier or atomic instruction that provides the * required ordering. */ +#include #define __READ_ONCE(x, check) \ ({ \ @@ -362,167 +264,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s #endif /* __ASSEMBLY__ */ -#ifdef __KERNEL__ -/* - * Allow us to mark functions as 'deprecated' and have gcc emit a nice - * warning for each use, in hopes of speeding the functions removal. - * Usage is: - * int __deprecated foo(void) - */ -#ifndef __deprecated -# define __deprecated /* unimplemented */ -#endif - -#ifdef MODULE -#define __deprecated_for_modules __deprecated -#else -#define __deprecated_for_modules -#endif - -#ifndef __must_check -#define __must_check -#endif - -#ifndef CONFIG_ENABLE_MUST_CHECK -#undef __must_check -#define __must_check -#endif -#ifndef CONFIG_ENABLE_WARN_DEPRECATED -#undef __deprecated -#undef __deprecated_for_modules -#define __deprecated -#define __deprecated_for_modules -#endif - -#ifndef __malloc -#define __malloc -#endif - -/* - * Allow us to avoid 'defined but not used' warnings on functions and data, - * as well as force them to be emitted to the assembly file. - * - * As of gcc 3.4, static functions that are not marked with attribute((used)) - * may be elided from the assembly file. As of gcc 3.4, static data not so - * marked will not be elided, but this may change in a future gcc version. - * - * NOTE: Because distributions shipped with a backported unit-at-a-time - * compiler in gcc 3.3, we must define __used to be __attribute__((used)) - * for gcc >=3.3 instead of 3.4. - * - * In prior versions of gcc, such functions and data would be emitted, but - * would be warned about except with attribute((unused)). - * - * Mark functions that are referenced only in inline assembly as __used so - * the code is emitted even though it appears to be unreferenced. - */ -#ifndef __used -# define __used /* unimplemented */ -#endif - -#ifndef __maybe_unused -# define __maybe_unused /* unimplemented */ -#endif - -#ifndef __always_unused -# define __always_unused /* unimplemented */ -#endif - -#ifndef noinline -#define noinline -#endif - -/* - * Rather then using noinline to prevent stack consumption, use - * noinline_for_stack instead. For documentation reasons. - */ -#define noinline_for_stack noinline - -#ifndef __always_inline -#define __always_inline inline -#endif - -#endif /* __KERNEL__ */ - -/* - * From the GCC manual: - * - * Many functions do not examine any values except their arguments, - * and have no effects except the return value. Basically this is - * just slightly more strict class than the `pure' attribute above, - * since function is not allowed to read global memory. - * - * Note that a function that has pointer arguments and examines the - * data pointed to must _not_ be declared `const'. Likewise, a - * function that calls a non-`const' function usually must not be - * `const'. It does not make sense for a `const' function to return - * `void'. - */ -#ifndef __attribute_const__ -# define __attribute_const__ /* unimplemented */ -#endif - -#ifndef __designated_init -# define __designated_init -#endif - -#ifndef __latent_entropy -# define __latent_entropy -#endif - -#ifndef __randomize_layout -# define __randomize_layout __designated_init -#endif - -#ifndef __no_randomize_layout -# define __no_randomize_layout -#endif - -#ifndef randomized_struct_fields_start -# define randomized_struct_fields_start -# define randomized_struct_fields_end -#endif - -/* - * Tell gcc if a function is cold. The compiler will assume any path - * directly leading to the call is unlikely. - */ - -#ifndef __cold -#define __cold -#endif - -/* Simple shorthand for a section definition */ -#ifndef __section -# define __section(S) __attribute__ ((__section__(#S))) -#endif - -#ifndef __visible -#define __visible -#endif - -#ifndef __nostackprotector -# define __nostackprotector -#endif - -/* - * Assume alignment of return value. - */ -#ifndef __assume_aligned -#define __assume_aligned(a, ...) -#endif - - -/* Are two types/vars the same type (ignoring qualifiers)? */ -#ifndef __same_type -# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) -#endif - -/* Is this type a native word size -- useful for atomic operations */ -#ifndef __native_word -# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) -#endif - /* Compile time object size, -1 for unknown */ #ifndef __compiletime_object_size # define __compiletime_object_size(obj) -1 diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h new file mode 100644 index 000000000000..6b79a9bba9a7 --- /dev/null +++ b/include/linux/compiler_types.h @@ -0,0 +1,274 @@ +#ifndef __LINUX_COMPILER_TYPES_H +#define __LINUX_COMPILER_TYPES_H + +#ifndef __ASSEMBLY__ + +#ifdef __CHECKER__ +# define __user __attribute__((noderef, address_space(1))) +# define __kernel __attribute__((address_space(0))) +# define __safe __attribute__((safe)) +# define __force __attribute__((force)) +# define __nocast __attribute__((nocast)) +# define __iomem __attribute__((noderef, address_space(2))) +# define __must_hold(x) __attribute__((context(x,1,1))) +# define __acquires(x) __attribute__((context(x,0,1))) +# define __releases(x) __attribute__((context(x,1,0))) +# define __acquire(x) __context__(x,1) +# define __release(x) __context__(x,-1) +# define __cond_lock(x,c) ((c) ? ({ __acquire(x); 1; }) : 0) +# define __percpu __attribute__((noderef, address_space(3))) +# define __rcu __attribute__((noderef, address_space(4))) +# define __private __attribute__((noderef)) +extern void __chk_user_ptr(const volatile void __user *); +extern void __chk_io_ptr(const volatile void __iomem *); +# define ACCESS_PRIVATE(p, member) (*((typeof((p)->member) __force *) &(p)->member)) +#else /* __CHECKER__ */ +# ifdef STRUCTLEAK_PLUGIN +# define __user __attribute__((user)) +# else +# define __user +# endif +# define __kernel +# define __safe +# define __force +# define __nocast +# define __iomem +# define __chk_user_ptr(x) (void)0 +# define __chk_io_ptr(x) (void)0 +# define __builtin_warning(x, y...) (1) +# define __must_hold(x) +# define __acquires(x) +# define __releases(x) +# define __acquire(x) (void)0 +# define __release(x) (void)0 +# define __cond_lock(x,c) (c) +# define __percpu +# define __rcu +# define __private +# define ACCESS_PRIVATE(p, member) ((p)->member) +#endif /* __CHECKER__ */ + +/* Indirect macros required for expanded argument pasting, eg. __LINE__. */ +#define ___PASTE(a,b) a##b +#define __PASTE(a,b) ___PASTE(a,b) + +#ifdef __KERNEL__ + +#ifdef __GNUC__ +#include +#endif + +#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__) +#define notrace __attribute__((hotpatch(0,0))) +#else +#define notrace __attribute__((no_instrument_function)) +#endif + +/* Intel compiler defines __GNUC__. So we will overwrite implementations + * coming from above header files here + */ +#ifdef __INTEL_COMPILER +# include +#endif + +/* Clang compiler defines __GNUC__. So we will overwrite implementations + * coming from above header files here + */ +#ifdef __clang__ +#include +#endif + +/* + * Generic compiler-dependent macros required for kernel + * build go below this comment. Actual compiler/compiler version + * specific implementations come from the above header files + */ + +struct ftrace_branch_data { + const char *func; + const char *file; + unsigned line; + union { + struct { + unsigned long correct; + unsigned long incorrect; + }; + struct { + unsigned long miss; + unsigned long hit; + }; + unsigned long miss_hit[2]; + }; +}; + +struct ftrace_likely_data { + struct ftrace_branch_data data; + unsigned long constant; +}; + +#endif /* __KERNEL__ */ + +#endif /* __ASSEMBLY__ */ + +#ifdef __KERNEL__ +/* + * Allow us to mark functions as 'deprecated' and have gcc emit a nice + * warning for each use, in hopes of speeding the functions removal. + * Usage is: + * int __deprecated foo(void) + */ +#ifndef __deprecated +# define __deprecated /* unimplemented */ +#endif + +#ifdef MODULE +#define __deprecated_for_modules __deprecated +#else +#define __deprecated_for_modules +#endif + +#ifndef __must_check +#define __must_check +#endif + +#ifndef CONFIG_ENABLE_MUST_CHECK +#undef __must_check +#define __must_check +#endif +#ifndef CONFIG_ENABLE_WARN_DEPRECATED +#undef __deprecated +#undef __deprecated_for_modules +#define __deprecated +#define __deprecated_for_modules +#endif + +#ifndef __malloc +#define __malloc +#endif + +/* + * Allow us to avoid 'defined but not used' warnings on functions and data, + * as well as force them to be emitted to the assembly file. + * + * As of gcc 3.4, static functions that are not marked with attribute((used)) + * may be elided from the assembly file. As of gcc 3.4, static data not so + * marked will not be elided, but this may change in a future gcc version. + * + * NOTE: Because distributions shipped with a backported unit-at-a-time + * compiler in gcc 3.3, we must define __used to be __attribute__((used)) + * for gcc >=3.3 instead of 3.4. + * + * In prior versions of gcc, such functions and data would be emitted, but + * would be warned about except with attribute((unused)). + * + * Mark functions that are referenced only in inline assembly as __used so + * the code is emitted even though it appears to be unreferenced. + */ +#ifndef __used +# define __used /* unimplemented */ +#endif + +#ifndef __maybe_unused +# define __maybe_unused /* unimplemented */ +#endif + +#ifndef __always_unused +# define __always_unused /* unimplemented */ +#endif + +#ifndef noinline +#define noinline +#endif + +/* + * Rather then using noinline to prevent stack consumption, use + * noinline_for_stack instead. For documentation reasons. + */ +#define noinline_for_stack noinline + +#ifndef __always_inline +#define __always_inline inline +#endif + +#endif /* __KERNEL__ */ + +/* + * From the GCC manual: + * + * Many functions do not examine any values except their arguments, + * and have no effects except the return value. Basically this is + * just slightly more strict class than the `pure' attribute above, + * since function is not allowed to read global memory. + * + * Note that a function that has pointer arguments and examines the + * data pointed to must _not_ be declared `const'. Likewise, a + * function that calls a non-`const' function usually must not be + * `const'. It does not make sense for a `const' function to return + * `void'. + */ +#ifndef __attribute_const__ +# define __attribute_const__ /* unimplemented */ +#endif + +#ifndef __designated_init +# define __designated_init +#endif + +#ifndef __latent_entropy +# define __latent_entropy +#endif + +#ifndef __randomize_layout +# define __randomize_layout __designated_init +#endif + +#ifndef __no_randomize_layout +# define __no_randomize_layout +#endif + +#ifndef randomized_struct_fields_start +# define randomized_struct_fields_start +# define randomized_struct_fields_end +#endif + +/* + * Tell gcc if a function is cold. The compiler will assume any path + * directly leading to the call is unlikely. + */ + +#ifndef __cold +#define __cold +#endif + +/* Simple shorthand for a section definition */ +#ifndef __section +# define __section(S) __attribute__ ((__section__(#S))) +#endif + +#ifndef __visible +#define __visible +#endif + +#ifndef __nostackprotector +# define __nostackprotector +#endif + +/* + * Assume alignment of return value. + */ +#ifndef __assume_aligned +#define __assume_aligned(a, ...) +#endif + + +/* Are two types/vars the same type (ignoring qualifiers)? */ +#ifndef __same_type +# define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) +#endif + +/* Is this type a native word size -- useful for atomic operations */ +#ifndef __native_word +# define __native_word(t) (sizeof(t) == sizeof(char) || sizeof(t) == sizeof(short) || sizeof(t) == sizeof(int) || sizeof(t) == sizeof(long)) +#endif + +#endif /* __LINUX_COMPILER_TYPES_H */ diff --git a/include/linux/linkage.h b/include/linux/linkage.h index a6a42dd02466..ebd61b80fed4 100644 --- a/include/linux/linkage.h +++ b/include/linux/linkage.h @@ -1,7 +1,7 @@ #ifndef _LINUX_LINKAGE_H #define _LINUX_LINKAGE_H -#include +#include #include #include #include -- cgit v1.2.3 From 76ebbe78f7390aee075a7f3768af197ded1bdfbb Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 24 Oct 2017 11:22:47 +0100 Subject: locking/barriers: Add implicit smp_read_barrier_depends() to READ_ONCE() In preparation for the removal of lockless_dereference(), which is the same as READ_ONCE() on all architectures other than Alpha, add an implicit smp_read_barrier_depends() to READ_ONCE() so that it can be used to head dependency chains on all architectures. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-3-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 08083186e54f..7d7b77da9716 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -242,6 +242,7 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __read_once_size(&(x), __u.__c, sizeof(x)); \ else \ __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \ + smp_read_barrier_depends(); /* Enforce dependency ordering from x */ \ __u.__val; \ }) #define READ_ONCE(x) __READ_ONCE(x, 1) -- cgit v1.2.3 From 506458efaf153c1ea480591c5602a5a3ba5a3b76 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 24 Oct 2017 11:22:48 +0100 Subject: locking/barriers: Convert users of lockless_dereference() to READ_ONCE() READ_ONCE() now has an implicit smp_read_barrier_depends() call, so it can be used instead of lockless_dereference() without any change in semantics. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-4-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/rculist.h | 4 ++-- include/linux/rcupdate.h | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 2bea1d5e9930..5ed091c064b2 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -274,7 +274,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ - container_of(lockless_dereference(ptr), type, member) + container_of(READ_ONCE(ptr), type, member) /* * Where are list_empty_rcu() and list_first_entry_rcu()? @@ -367,7 +367,7 @@ static inline void list_splice_tail_init_rcu(struct list_head *list, * example is when items are added to the list, but never deleted. */ #define list_entry_lockless(ptr, type, member) \ - container_of((typeof(ptr))lockless_dereference(ptr), type, member) + container_of((typeof(ptr))READ_ONCE(ptr), type, member) /** * list_for_each_entry_lockless - iterate over rcu list of given type diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 1a9f70d44af9..a6ddc42f87a5 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -346,7 +346,7 @@ static inline void rcu_preempt_sleep_check(void) { } #define __rcu_dereference_check(p, c, space) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(*p) *________p1 = (typeof(*p) *__force)lockless_dereference(p); \ + typeof(*p) *________p1 = (typeof(*p) *__force)READ_ONCE(p); \ RCU_LOCKDEP_WARN(!(c), "suspicious rcu_dereference_check() usage"); \ rcu_dereference_sparse(p, space); \ ((typeof(*p) __force __kernel *)(________p1)); \ @@ -360,7 +360,7 @@ static inline void rcu_preempt_sleep_check(void) { } #define rcu_dereference_raw(p) \ ({ \ /* Dependency order vs. p above. */ \ - typeof(p) ________p1 = lockless_dereference(p); \ + typeof(p) ________p1 = READ_ONCE(p); \ ((typeof(*p) __force __kernel *)(________p1)); \ }) -- cgit v1.2.3 From 59ecbbe7b31cd2d86ff9a9f461a00f7e7533aedc Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 24 Oct 2017 11:22:49 +0100 Subject: locking/barriers: Kill lockless_dereference() lockless_dereference() is a nice idea, but it gained little traction in kernel code since its introduction three years ago. This is partly because it's a pain to type, but also because using READ_ONCE() instead has worked correctly on all architectures apart from Alpha, which is a fully supported but somewhat niche architecture these days. Now that READ_ONCE() has been upgraded to contain an implicit smp_read_barrier_depends() and the few callers of lockless_dereference() have been converted, we can remove lockless_dereference() altogether. Signed-off-by: Will Deacon Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1508840570-22169-5-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 7d7b77da9716..5a1cab48442c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -346,24 +346,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s (volatile typeof(x) *)&(x); }) #define ACCESS_ONCE(x) (*__ACCESS_ONCE(x)) -/** - * lockless_dereference() - safely load a pointer for later dereference - * @p: The pointer to load - * - * Similar to rcu_dereference(), but for situations where the pointed-to - * object's lifetime is managed by something other than RCU. That - * "something other" might be reference counting or simple immortality. - * - * The seemingly unused variable ___typecheck_p validates that @p is - * indeed a pointer type by using a pointer to typeof(*p) as the type. - * Taking a pointer to typeof(*p) again is needed in case p is void *. - */ -#define lockless_dereference(p) \ -({ \ - typeof(p) _________p1 = READ_ONCE(p); \ - typeof(*(p)) *___typecheck_p __maybe_unused; \ - smp_read_barrier_depends(); /* Dependency order vs. p above. */ \ - (_________p1); \ -}) - #endif /* __LINUX_COMPILER_H */ -- cgit v1.2.3 From 08395c7f4d9f5808b5754a0dbed969f378bde0c3 Mon Sep 17 00:00:00 2001 From: Julien Thierry Date: Fri, 13 Oct 2017 12:26:44 +0100 Subject: irqdesc: Add function to identify percpu_devid irqs irq_is_percpu indicates whether an irq should only target a single cpu. PERCPU_DEVID flag indicates that an irq can be configured differently on each cpu it can target. Provide a function to check whether an irq is PERCPU_DEVID. Reviewed-by: Thomas Gleixner Reviewed-by: Marc Zyngier Signed-off-by: Will Deacon Signed-off-by: Julien Thierry --- include/linux/irqdesc.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 3e90a094798d..93960cf36e23 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -244,6 +244,14 @@ static inline int irq_is_percpu(unsigned int irq) return desc->status_use_accessors & IRQ_PER_CPU; } +static inline int irq_is_percpu_devid(unsigned int irq) +{ + struct irq_desc *desc; + + desc = irq_to_desc(irq); + return desc->status_use_accessors & IRQ_PER_CPU_DEVID; +} + static inline void irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class) { -- cgit v1.2.3 From e87c6bc3852b981e71c757be20771546ce9f76f3 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Mon, 23 Oct 2017 23:53:08 -0700 Subject: bpf: permit multiple bpf attachments for a single perf event This patch enables multiple bpf attachments for a kprobe/uprobe/tracepoint single trace event. Each trace_event keeps a list of attached perf events. When an event happens, all attached bpf programs will be executed based on the order of attachment. A global bpf_event_mutex lock is introduced to protect prog_array attaching and detaching. An alternative will be introduce a mutex lock in every trace_event_call structure, but it takes a lot of extra memory. So a global bpf_event_mutex lock is a good compromise. The bpf prog detachment involves allocation of memory. If the allocation fails, a dummy do-nothing program will replace to-be-detached program in-place. Signed-off-by: Yonghong Song Acked-by: Alexei Starovoitov Acked-by: Martin KaFai Lau Signed-off-by: David S. Miller --- include/linux/bpf.h | 30 +++++++++++++++++++++++++----- include/linux/trace_events.h | 43 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 64 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1e334b248ff6..172be7faf7ba 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -273,18 +273,38 @@ int bpf_prog_array_length(struct bpf_prog_array __rcu *progs); int bpf_prog_array_copy_to_user(struct bpf_prog_array __rcu *progs, __u32 __user *prog_ids, u32 cnt); -#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ +void bpf_prog_array_delete_safe(struct bpf_prog_array __rcu *progs, + struct bpf_prog *old_prog); +int bpf_prog_array_copy(struct bpf_prog_array __rcu *old_array, + struct bpf_prog *exclude_prog, + struct bpf_prog *include_prog, + struct bpf_prog_array **new_array); + +#define __BPF_PROG_RUN_ARRAY(array, ctx, func, check_non_null) \ ({ \ - struct bpf_prog **_prog; \ + struct bpf_prog **_prog, *__prog; \ + struct bpf_prog_array *_array; \ u32 _ret = 1; \ rcu_read_lock(); \ - _prog = rcu_dereference(array)->progs; \ - for (; *_prog; _prog++) \ - _ret &= func(*_prog, ctx); \ + _array = rcu_dereference(array); \ + if (unlikely(check_non_null && !_array))\ + goto _out; \ + _prog = _array->progs; \ + while ((__prog = READ_ONCE(*_prog))) { \ + _ret &= func(__prog, ctx); \ + _prog++; \ + } \ +_out: \ rcu_read_unlock(); \ _ret; \ }) +#define BPF_PROG_RUN_ARRAY(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, false) + +#define BPF_PROG_RUN_ARRAY_CHECK(array, ctx, func) \ + __BPF_PROG_RUN_ARRAY(array, ctx, func, true) + #ifdef CONFIG_BPF_SYSCALL DECLARE_PER_CPU(int, bpf_prog_active); diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 2e0f22298fe9..fc6aeca945db 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -271,14 +271,37 @@ struct trace_event_call { #ifdef CONFIG_PERF_EVENTS int perf_refcount; struct hlist_head __percpu *perf_events; - struct bpf_prog *prog; - struct perf_event *bpf_prog_owner; + struct bpf_prog_array __rcu *prog_array; int (*perf_perm)(struct trace_event_call *, struct perf_event *); #endif }; +#ifdef CONFIG_PERF_EVENTS +static inline bool bpf_prog_array_valid(struct trace_event_call *call) +{ + /* + * This inline function checks whether call->prog_array + * is valid or not. The function is called in various places, + * outside rcu_read_lock/unlock, as a heuristic to speed up execution. + * + * If this function returns true, and later call->prog_array + * becomes false inside rcu_read_lock/unlock region, + * we bail out then. If this function return false, + * there is a risk that we might miss a few events if the checking + * were delayed until inside rcu_read_lock/unlock region and + * call->prog_array happened to become non-NULL then. + * + * Here, READ_ONCE() is used instead of rcu_access_pointer(). + * rcu_access_pointer() requires the actual definition of + * "struct bpf_prog_array" while READ_ONCE() only needs + * a declaration of the same type. + */ + return !!READ_ONCE(call->prog_array); +} +#endif + static inline const char * trace_event_name(struct trace_event_call *call) { @@ -435,12 +458,23 @@ trace_trigger_soft_disabled(struct trace_event_file *file) } #ifdef CONFIG_BPF_EVENTS -unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx); +unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx); +int perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog); +void perf_event_detach_bpf_prog(struct perf_event *event); #else -static inline unsigned int trace_call_bpf(struct bpf_prog *prog, void *ctx) +static inline unsigned int trace_call_bpf(struct trace_event_call *call, void *ctx) { return 1; } + +static inline int +perf_event_attach_bpf_prog(struct perf_event *event, struct bpf_prog *prog) +{ + return -EOPNOTSUPP; +} + +static inline void perf_event_detach_bpf_prog(struct perf_event *event) { } + #endif enum { @@ -511,6 +545,7 @@ perf_trace_buf_submit(void *raw_data, int size, int rctx, u16 type, { perf_tp_event(type, count, raw_data, size, regs, head, rctx, task, event); } + #endif #endif /* _LINUX_TRACE_EVENT_H */ -- cgit v1.2.3 From 4df714be4dcf40bfb0d4af0f851a6e1977afa02e Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Thu, 12 Oct 2017 13:20:48 +0100 Subject: locking/atomic: Add atomic_cond_read_acquire() smp_cond_load_acquire() provides a way to spin on a variable with acquire semantics until some conditional expression involving the variable is satisfied. Architectures such as arm64 can potentially enter a low-power state, waking up only when the value of the variable changes, which reduces the system impact of tight polling loops. This patch makes the same interface available to users of atomic_t, atomic64_t and atomic_long_t, rather than require messy accesses to the structure internals. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Boqun Feng Cc: Jeremy.Linton@arm.com Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Thomas Gleixner Cc: Waiman Long Cc: linux-arm-kernel@lists.infradead.org Link: http://lkml.kernel.org/r/1507810851-306-3-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 40d6bfec0e0d..0aeb2b3f4578 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -653,6 +653,8 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +#define atomic_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) + #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif @@ -1072,6 +1074,8 @@ static inline long long atomic64_fetch_andnot_release(long long i, atomic64_t *v } #endif +#define atomic64_cond_read_acquire(v, c) smp_cond_load_acquire(&(v)->counter, (c)) + #include #endif /* _LINUX_ATOMIC_H */ -- cgit v1.2.3 From 66702eb59064f1077e89cce8e7e3cd48ec4b486c Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Oct 2017 14:07:14 -0700 Subject: locking/atomics, fs/dcache: Convert ACCESS_ONCE() to READ_ONCE()/WRITE_ONCE() For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't currently harmful. However, for some features it is necessary to instrument reads and writes separately, which is not possible with ACCESS_ONCE(). This distinction is critical to correct operation. It's possible to transform the bulk of kernel code using the Coccinelle script below. However, this doesn't handle comments, leaving references to ACCESS_ONCE() instances which have been removed. As a preparatory step, this patch converts the dcache code and comments to use {READ,WRITE}_ONCE() consistently. ---- virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland Signed-off-by: Paul E. McKenney Cc: Al Viro Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: davem@davemloft.net Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-4-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/dcache.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index ed1a7cf6923a..1d8f5818f647 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -519,7 +519,7 @@ static inline struct inode *d_inode(const struct dentry *dentry) } /** - * d_inode_rcu - Get the actual inode of this dentry with ACCESS_ONCE() + * d_inode_rcu - Get the actual inode of this dentry with READ_ONCE() * @dentry: The dentry to query * * This is the helper normal filesystems should use to get at their own inodes @@ -527,7 +527,7 @@ static inline struct inode *d_inode(const struct dentry *dentry) */ static inline struct inode *d_inode_rcu(const struct dentry *dentry) { - return ACCESS_ONCE(dentry->d_inode); + return READ_ONCE(dentry->d_inode); } /** -- cgit v1.2.3 From 14cd5d4a0125f643350e7fa12f5384f1fc2d3e9d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Oct 2017 14:07:17 -0700 Subject: locking/atomics, net/netlink/netfilter: Convert ACCESS_ONCE() to READ_ONCE()/WRITE_ONCE() For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't currently harmful. However, for some features it is necessary to instrument reads and writes separately, which is not possible with ACCESS_ONCE(). This distinction is critical to correct operation. It's possible to transform the bulk of kernel code using the Coccinelle script below. However, this doesn't handle comments, leaving references to ACCESS_ONCE() instances which have been removed. As a preparatory step, this patch converts netlink and netfilter code and comments to use {READ,WRITE}_ONCE() consistently. ---- virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland Signed-off-by: Paul E. McKenney Cc: David S. Miller Cc: Florian Westphal Cc: Jozsef Kadlecsik Cc: Linus Torvalds Cc: Pablo Neira Ayuso Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: viro@zeniv.linux.org.uk Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-7-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/genetlink.h | 2 +- include/linux/netfilter/nfnetlink.h | 2 +- include/linux/rtnetlink.h | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index a4c61cbce777..0e694cf62414 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -30,7 +30,7 @@ extern wait_queue_head_t genl_sk_destructing_waitq; * @p: The pointer to read, prior to dereferencing * * Return the value of the specified RCU-protected pointer, but omit - * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because + * both the smp_read_barrier_depends() and the READ_ONCE(), because * caller holds genl mutex. */ #define genl_dereference(p) \ diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 41d04e9d088a..0f47a4aa7fc4 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -66,7 +66,7 @@ static inline bool lockdep_nfnl_is_held(__u8 subsys_id) * @ss: The nfnetlink subsystem ID * * Return the value of the specified RCU-protected pointer, but omit - * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because + * both the smp_read_barrier_depends() and the READ_ONCE(), because * caller holds the NFNL subsystem mutex. */ #define nfnl_dereference(p, ss) \ diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index dea59c8eec54..765f7b915475 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -67,7 +67,7 @@ static inline bool lockdep_rtnl_is_held(void) * @p: The pointer to read, prior to dereferencing * * Return the value of the specified RCU-protected pointer, but omit - * both the smp_read_barrier_depends() and the ACCESS_ONCE(), because + * both the smp_read_barrier_depends() and the READ_ONCE(), because * caller holds RTNL. */ #define rtnl_dereference(p) \ -- cgit v1.2.3 From ef4d9af62f47e3070b00c3307a4d8eb5092bb9a2 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Oct 2017 14:07:19 -0700 Subject: locking/atomics, net/average: Convert ACCESS_ONCE() to READ_ONCE()/WRITE_ONCE() For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't currently harmful. However, for some features it is necessary to instrument reads and writes separately, which is not possible with ACCESS_ONCE(). This distinction is critical to correct operation. It's possible to transform the bulk of kernel code using the Coccinelle script below. However, this doesn't pick up some uses, including those in . As a preparatory step, this patch converts the file to use {READ,WRITE}_ONCE() consistently. At the same time, this patch addds missing includes necessary for {READ,WRITE}_ONCE(), *BUG_ON*(), and ilog2(). ---- virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland Signed-off-by: Paul E. McKenney Reviewed-by: Johannes Berg Cc: David S. Miller Cc: Johannes Berg Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: viro@zeniv.linux.org.uk Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-9-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/average.h | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/average.h b/include/linux/average.h index 7ddaf340d2ac..3f462292269c 100644 --- a/include/linux/average.h +++ b/include/linux/average.h @@ -1,6 +1,10 @@ #ifndef _LINUX_AVERAGE_H #define _LINUX_AVERAGE_H +#include +#include +#include + /* * Exponentially weighted moving average (EWMA) * @@ -48,7 +52,7 @@ static inline void ewma_##name##_add(struct ewma_##name *e, \ unsigned long val) \ { \ - unsigned long internal = ACCESS_ONCE(e->internal); \ + unsigned long internal = READ_ONCE(e->internal); \ unsigned long weight_rcp = ilog2(_weight_rcp); \ unsigned long precision = _precision; \ \ @@ -57,10 +61,10 @@ BUILD_BUG_ON((_precision) > 30); \ BUILD_BUG_ON_NOT_POWER_OF_2(_weight_rcp); \ \ - ACCESS_ONCE(e->internal) = internal ? \ + WRITE_ONCE(e->internal, internal ? \ (((internal << weight_rcp) - internal) + \ (val << precision)) >> weight_rcp : \ - (val << precision); \ + (val << precision)); \ } #endif /* _LINUX_AVERAGE_H */ -- cgit v1.2.3 From 6aa7de059173a986114ac43b8f50b297a86f09a8 Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 23 Oct 2017 14:07:29 -0700 Subject: locking/atomics: COCCINELLE/treewide: Convert trivial ACCESS_ONCE() patterns to READ_ONCE()/WRITE_ONCE() Please do not apply this to mainline directly, instead please re-run the coccinelle script shown below and apply its output. For several reasons, it is desirable to use {READ,WRITE}_ONCE() in preference to ACCESS_ONCE(), and new code is expected to use one of the former. So far, there's been no reason to change most existing uses of ACCESS_ONCE(), as these aren't harmful, and changing them results in churn. However, for some features, the read/write distinction is critical to correct operation. To distinguish these cases, separate read/write accessors must be used. This patch migrates (most) remaining ACCESS_ONCE() instances to {READ,WRITE}_ONCE(), using the following coccinelle script: ---- // Convert trivial ACCESS_ONCE() uses to equivalent READ_ONCE() and // WRITE_ONCE() // $ make coccicheck COCCI=/home/mark/once.cocci SPFLAGS="--include-headers" MODE=patch virtual patch @ depends on patch @ expression E1, E2; @@ - ACCESS_ONCE(E1) = E2 + WRITE_ONCE(E1, E2) @ depends on patch @ expression E; @@ - ACCESS_ONCE(E) + READ_ONCE(E) ---- Signed-off-by: Mark Rutland Signed-off-by: Paul E. McKenney Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: davem@davemloft.net Cc: linux-arch@vger.kernel.org Cc: mpe@ellerman.id.au Cc: shuah@kernel.org Cc: snitzer@redhat.com Cc: thor.thayer@linux.intel.com Cc: tj@kernel.org Cc: viro@zeniv.linux.org.uk Cc: will.deacon@arm.com Link: http://lkml.kernel.org/r/1508792849-3115-19-git-send-email-paulmck@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/bitops.h | 4 ++-- include/linux/dynamic_queue_limits.h | 2 +- include/linux/huge_mm.h | 2 +- include/linux/if_team.h | 2 +- include/linux/llist.h | 2 +- include/linux/pm_runtime.h | 2 +- 6 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 8fbe259b197c..0a7ce668f8e0 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -236,7 +236,7 @@ static inline unsigned long __ffs64(u64 word) typeof(*ptr) old, new; \ \ do { \ - old = ACCESS_ONCE(*ptr); \ + old = READ_ONCE(*ptr); \ new = (old & ~mask) | bits; \ } while (cmpxchg(ptr, old, new) != old); \ \ @@ -251,7 +251,7 @@ static inline unsigned long __ffs64(u64 word) typeof(*ptr) old, new; \ \ do { \ - old = ACCESS_ONCE(*ptr); \ + old = READ_ONCE(*ptr); \ new = old & ~clear; \ } while (!(old & test) && \ cmpxchg(ptr, old, new) != old); \ diff --git a/include/linux/dynamic_queue_limits.h b/include/linux/dynamic_queue_limits.h index a4be70398ce1..36dd4ffb5715 100644 --- a/include/linux/dynamic_queue_limits.h +++ b/include/linux/dynamic_queue_limits.h @@ -88,7 +88,7 @@ static inline void dql_queued(struct dql *dql, unsigned int count) /* Returns how many objects can be queued, < 0 indicates over limit. */ static inline int dql_avail(const struct dql *dql) { - return ACCESS_ONCE(dql->adj_limit) - ACCESS_ONCE(dql->num_queued); + return READ_ONCE(dql->adj_limit) - READ_ONCE(dql->num_queued); } /* Record number of completed objects and recalculate the limit. */ diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 14bc21c2ee7f..785a00ca4628 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -221,7 +221,7 @@ extern struct page *huge_zero_page; static inline bool is_huge_zero_page(struct page *page) { - return ACCESS_ONCE(huge_zero_page) == page; + return READ_ONCE(huge_zero_page) == page; } static inline bool is_huge_zero_pmd(pmd_t pmd) diff --git a/include/linux/if_team.h b/include/linux/if_team.h index 30294603526f..d95cae09dea0 100644 --- a/include/linux/if_team.h +++ b/include/linux/if_team.h @@ -247,7 +247,7 @@ static inline struct team_port *team_get_port_by_index(struct team *team, static inline int team_num_to_port_index(struct team *team, unsigned int num) { - int en_port_count = ACCESS_ONCE(team->en_port_count); + int en_port_count = READ_ONCE(team->en_port_count); if (unlikely(!en_port_count)) return 0; diff --git a/include/linux/llist.h b/include/linux/llist.h index 1957635e6d5f..85abc2915e8d 100644 --- a/include/linux/llist.h +++ b/include/linux/llist.h @@ -198,7 +198,7 @@ static inline void init_llist_head(struct llist_head *list) */ static inline bool llist_empty(const struct llist_head *head) { - return ACCESS_ONCE(head->first) == NULL; + return READ_ONCE(head->first) == NULL; } static inline struct llist_node *llist_next(struct llist_node *node) diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h index 2efb08a60e63..f0fc4700b6ff 100644 --- a/include/linux/pm_runtime.h +++ b/include/linux/pm_runtime.h @@ -105,7 +105,7 @@ static inline bool pm_runtime_callbacks_present(struct device *dev) static inline void pm_runtime_mark_last_busy(struct device *dev) { - ACCESS_ONCE(dev->power.last_busy) = jiffies; + WRITE_ONCE(dev->power.last_busy, jiffies); } static inline bool pm_runtime_is_irq_safe(struct device *dev) -- cgit v1.2.3 From 24efd94bc38290dc1d9775a1e767ed4685d8a79b Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Fri, 20 Oct 2017 16:31:27 +0200 Subject: gpio: mmio: Make pin2mask() a private business The vtable call pin2mask() was introducing a vtable function call in every gpiochip callback for a generic MMIO GPIO chip. This was not exactly efficient. (Maybe link-time optimization could get rid of it, I don't know.) After removing all external calls into this API we can make it a boolean flag in the struct gpio_chip call and sink the function into the gpio-mmio driver yielding encapsulation and potential speedups. Cc: Anton Vorontsov Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index bda95a9b7b8c..ed04fa2a00a8 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -67,9 +67,9 @@ struct module; * registers. * @read_reg: reader function for generic GPIO * @write_reg: writer function for generic GPIO - * @pin2mask: some generic GPIO controllers work with the big-endian bits - * notation, e.g. in a 8-bits register, GPIO7 is the least significant - * bit. This callback assigns the right bit mask. + * @be_bits: if the generic GPIO has big endian bit order (bit 31 is representing + * line 0, bit 30 is line 1 ... bit 0 is line 31) this is set to true by the + * generic GPIO core. It is for internal housekeeping only. * @reg_dat: data (in) register for generic GPIO * @reg_set: output set register (out=high) for generic GPIO * @reg_clr: output clear register (out=low) for generic GPIO @@ -151,7 +151,7 @@ struct gpio_chip { #if IS_ENABLED(CONFIG_GPIO_GENERIC) unsigned long (*read_reg)(void __iomem *reg); void (*write_reg)(void __iomem *reg, unsigned long data); - unsigned long (*pin2mask)(struct gpio_chip *gc, unsigned int pin); + bool be_bits; void __iomem *reg_dat; void __iomem *reg_set; void __iomem *reg_clr; -- cgit v1.2.3 From 6f0397d7e100f3b3978d6ebb6b2dea29ee7c4a95 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Wed, 25 Oct 2017 17:55:58 +0900 Subject: locking/lockdep: Provide empty lockdep_map structure for !CONFIG_LOCKDEP After this patch the lockdep_map structure takes no space if lockdep is disabled, reducing the number of #ifdefs in unrelated kernel code. Signed-off-by: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: amir73il@gmail.com Cc: axboe@kernel.dk Cc: darrick.wong@oracle.com Cc: david@fromorbit.com Cc: hch@infradead.org Cc: idryomov@gmail.com Cc: johan@kernel.org Cc: johannes.berg@intel.com Cc: kernel-team@lge.com Cc: linux-block@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Cc: oleg@redhat.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/1508921765-15396-3-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index bfa8e0b0d6f1..b6662d05bcda 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -527,6 +527,11 @@ static inline void lockdep_on(void) */ struct lock_class_key { }; +/* + * The lockdep_map takes no space if lockdep is disabled: + */ +struct lockdep_map { }; + #define lockdep_depth(tsk) (0) #define lockdep_is_held_type(l, r) (1) -- cgit v1.2.3 From 24208435e343679b21502fb90786084dfaf15369 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Wed, 25 Oct 2017 17:55:59 +0900 Subject: locking/lockdep, sched/completions: Change the prefix of lock name for completion variables CONFIG_LOCKDEP_COMPLETIONS uses "(complete)" as a prefix of lock name for completion variable. However, what we should use here is a noun - so use "(completion)" instead. Suggested-by: Ingo Molnar Signed-off-by: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: amir73il@gmail.com Cc: axboe@kernel.dk Cc: darrick.wong@oracle.com Cc: david@fromorbit.com Cc: hch@infradead.org Cc: idryomov@gmail.com Cc: johan@kernel.org Cc: johannes.berg@intel.com Cc: kernel-team@lge.com Cc: linux-block@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Cc: oleg@redhat.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/1508921765-15396-4-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/completion.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index cae5400022a3..91218036b1c8 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -53,7 +53,7 @@ static inline void complete_release_commit(struct completion *x) do { \ static struct lock_class_key __key; \ lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \ - "(complete)" #x, \ + "(completion)" #x, \ &__key, 0); \ __init_completion(x); \ } while (0) @@ -67,7 +67,7 @@ static inline void complete_release_commit(struct completion *x) {} #ifdef CONFIG_LOCKDEP_COMPLETIONS #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ - STATIC_CROSS_LOCKDEP_MAP_INIT("(complete)" #work, &(work)) } + STATIC_CROSS_LOCKDEP_MAP_INIT("(completion)" #work, &(work)) } #else #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } -- cgit v1.2.3 From a7967bc31584bd282682981295861e7bcba19e65 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Wed, 25 Oct 2017 17:56:03 +0900 Subject: sched/completions: Add support for initializing completions with lockdep_map Sometimes we want to initialize completions with sparate lockdep maps to assign lock classes as desired. For example, the workqueue code needs to directly manage lockdep maps, since only the code is aware of how to classify lockdep maps properly. Provide additional macros initializing completions in that way. Signed-off-by: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: amir73il@gmail.com Cc: axboe@kernel.dk Cc: darrick.wong@oracle.com Cc: david@fromorbit.com Cc: hch@infradead.org Cc: idryomov@gmail.com Cc: johan@kernel.org Cc: johannes.berg@intel.com Cc: kernel-team@lge.com Cc: linux-block@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Cc: oleg@redhat.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/1508921765-15396-8-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/completion.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index 91218036b1c8..4da49916ef3f 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -49,6 +49,13 @@ static inline void complete_release_commit(struct completion *x) lock_commit_crosslock((struct lockdep_map *)&x->map); } +#define init_completion_map(x, m) \ +do { \ + lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \ + (m)->name, (m)->key, 0); \ + __init_completion(x); \ +} while (0) + #define init_completion(x) \ do { \ static struct lock_class_key __key; \ @@ -58,6 +65,7 @@ do { \ __init_completion(x); \ } while (0) #else +#define init_completion_map(x, m) __init_completion(x) #define init_completion(x) __init_completion(x) static inline void complete_acquire(struct completion *x) {} static inline void complete_release(struct completion *x) {} @@ -73,6 +81,9 @@ static inline void complete_release_commit(struct completion *x) {} { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } #endif +#define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \ + (*({ init_completion_map(&(work), &(map)); &(work); })) + #define COMPLETION_INITIALIZER_ONSTACK(work) \ (*({ init_completion(&work); &work; })) @@ -102,8 +113,11 @@ static inline void complete_release_commit(struct completion *x) {} #ifdef CONFIG_LOCKDEP # define DECLARE_COMPLETION_ONSTACK(work) \ struct completion work = COMPLETION_INITIALIZER_ONSTACK(work) +# define DECLARE_COMPLETION_ONSTACK_MAP(work, map) \ + struct completion work = COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) #else # define DECLARE_COMPLETION_ONSTACK(work) DECLARE_COMPLETION(work) +# define DECLARE_COMPLETION_ONSTACK_MAP(work, map) DECLARE_COMPLETION(work) #endif /** -- cgit v1.2.3 From fd1a5b04dfb899f84ddeb8acdaea6b98283df1e5 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Wed, 25 Oct 2017 17:56:04 +0900 Subject: workqueue: Remove now redundant lock acquisitions wrt. workqueue flushes The workqueue code added manual lock acquisition annotations to catch deadlocks. After lockdepcrossrelease was introduced, some of those became redundant, since wait_for_completion() already does the acquisition and tracking. Remove the duplicate annotations. Signed-off-by: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: amir73il@gmail.com Cc: axboe@kernel.dk Cc: darrick.wong@oracle.com Cc: david@fromorbit.com Cc: hch@infradead.org Cc: idryomov@gmail.com Cc: johan@kernel.org Cc: johannes.berg@intel.com Cc: kernel-team@lge.com Cc: linux-block@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Cc: oleg@redhat.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/1508921765-15396-9-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/workqueue.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 1c49431f3121..c8a572cb49be 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -218,7 +218,7 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } \ __init_work((_work), _onstack); \ (_work)->data = (atomic_long_t) WORK_DATA_INIT(); \ - lockdep_init_map(&(_work)->lockdep_map, #_work, &__key, 0); \ + lockdep_init_map(&(_work)->lockdep_map, "(work_completion)"#_work, &__key, 0); \ INIT_LIST_HEAD(&(_work)->entry); \ (_work)->func = (_func); \ } while (0) @@ -398,7 +398,7 @@ __alloc_workqueue_key(const char *fmt, unsigned int flags, int max_active, static struct lock_class_key __key; \ const char *__lock_name; \ \ - __lock_name = #fmt#args; \ + __lock_name = "(wq_completion)"#fmt#args; \ \ __alloc_workqueue_key((fmt), (flags), (max_active), \ &__key, __lock_name, ##args); \ -- cgit v1.2.3 From ccc8708790273811db24676223b040710793cba7 Mon Sep 17 00:00:00 2001 From: Noa Osherovich Date: Tue, 17 Oct 2017 18:01:13 +0300 Subject: IB/mlx5: Allow creation of a multi-packet RQ Allow creation of a multi-packet receive queue. In order to create a multi-packet RQ, the following fields in the mlx5_ib_rwq should be set: - log_num_strides: Log of number of strides per WQE - single_stride_log_num_of_bytes: Log of a single stride size - two_byte_shift_en: When enabled, hardware pads 2 bytes of zeros before writing the message to memory (e.g. for the IP alignment). Signed-off-by: Noa Osherovich Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 69772347f866..db655db45b77 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -744,6 +744,7 @@ enum { MLX5_WQ_TYPE_LINKED_LIST = 0x0, MLX5_WQ_TYPE_CYCLIC = 0x1, MLX5_WQ_TYPE_LINKED_LIST_STRIDING_RQ = 0x2, + MLX5_WQ_TYPE_CYCLIC_STRIDING_RQ = 0x3, }; enum { -- cgit v1.2.3 From 0ff8e79ca7c81fafa3f5c91a1b58efc85cbc2302 Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 19 Oct 2017 08:25:51 +0300 Subject: IB/mlx5: Add 128B CQE compression and padding HW bits Adding new bits in mlx5_ifc_cmd_hca_cap to get the hardware capabilities for: - compression_128: Support 128B CQE compression - cqe_128_always: Support 128B CQE padding Signed-off-by: Guy Levi Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index db655db45b77..d49928c314ed 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1047,7 +1047,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 num_of_uars_per_page[0x20]; u8 reserved_at_540[0x40]; - u8 reserved_at_580[0x3f]; + u8 reserved_at_580[0x3d]; + u8 cqe_128_always[0x1]; + u8 cqe_compression_128[0x1]; u8 cqe_compression[0x1]; u8 cqe_compression_timeout[0x10]; -- cgit v1.2.3 From 7a0c8f4244e9ec7a630563d294b211342b46223d Mon Sep 17 00:00:00 2001 From: Guy Levi Date: Thu, 19 Oct 2017 08:25:53 +0300 Subject: IB/mlx5: Support padded 128B CQE feature In some benchmarks and some CPU architectures, writing the CQE on a full cache line size improves performance by saving memory access operations (read-modify-write) relative to partial cache line change. This patch lets the user to configure the device to pad the CQE up to 128B in case its content is less than 128B. Currently the driver supports only padding for a CQE size of 128B. Signed-off-by: Guy Levi Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/cq.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index 95898847c7d4..cc718e245b1e 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -125,11 +125,13 @@ struct mlx5_cq_modify_params { enum { CQE_SIZE_64 = 0, CQE_SIZE_128 = 1, + CQE_SIZE_128_PAD = 2, }; -static inline int cqe_sz_to_mlx_sz(u8 size) +static inline int cqe_sz_to_mlx_sz(u8 size, int padding_128_en) { - return size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; + return padding_128_en ? CQE_SIZE_128_PAD : + size == 64 ? CQE_SIZE_64 : CQE_SIZE_128; } static inline void mlx5_cq_set_ci(struct mlx5_core_cq *cq) -- cgit v1.2.3 From 4d350f1f89ee8ea84be1ef09717bf392fa5a3b45 Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 19 Oct 2017 08:25:54 +0300 Subject: IB/mlx5: Update tunnel offloads bits This patch updates the mlx5_ifc with the following: - Fix tunnel_stateless_gre typo. - max_geneve_opt_len - Maximum geneve options length. - tunnel_stateless_geneve_rx - If set, receive Stateless Offloads for Geneve tunneled (inner) packets are supported. Signed-off-by: Maor Gottlieb Reviewed-by: Mark Bloch Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/mlx5_ifc.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d49928c314ed..d4c29c183d28 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -614,7 +614,9 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 swp[0x1]; u8 swp_csum[0x1]; u8 swp_lso[0x1]; - u8 reserved_at_23[0x1d]; + u8 reserved_at_23[0x1b]; + u8 max_geneve_opt_len[0x1]; + u8 tunnel_stateless_geneve_rx[0x1]; u8 reserved_at_40[0x10]; u8 lro_min_mss_size[0x10]; -- cgit v1.2.3 From 8ac0d9a81edf2ef4a2268b65b802a6b856dc77e6 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 25 Oct 2017 12:35:02 -0600 Subject: elevator: allow name aliases Since we now lookup elevator types with the appropriate multiqueue capability, allow schedulers to register with an alias alongside the real name. This is in preparation for allowing 'mq-deadline' to register an alias of 'deadline' as well. Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/elevator.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 5bc8f8682a3e..6df8b14f1f6a 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -144,6 +144,7 @@ struct elevator_type size_t icq_align; /* ditto */ struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; + const char *elevator_alias; struct module *elevator_owner; bool uses_mq; #ifdef CONFIG_BLK_DEBUG_FS -- cgit v1.2.3 From 8bb705e3e79d84e77edd4499e74483dd96a4626c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 24 Oct 2017 14:40:26 -0500 Subject: PCI: Add pci_resize_resource() for resizing BARs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a pci_resize_resource() interface to allow device drivers to resize BARs of their devices. This is useful for devices with large local storage, e.g., graphics devices. These devices often only expose 256MB BARs initially to be compatible with 32-bit systems. This function only tries to reprogram the windows of the bridge directly above the requesting device and only the BAR of the same type (usually mem, 64bit, prefetchable). This is done to avoid disturbing other drivers by changing the BARs of their devices. Drivers should use the following sequence to resize their BARs: 1. Disable memory decoding of the device using the PCI cfg dword. 2. Use pci_release_resource() to release all BARs which can move during the resize, including the one you want to resize. 3. Call pci_resize_resource() for each BAR you want to resize. 4. Call pci_assign_unassigned_bus_resources() to reassign new locations for all BARs which are not resized, but could move. 5. If everything worked as expected, enable memory decoding in the device again using the PCI cfg dword. Signed-off-by: Christian König Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f4f8ee5a7362..55e9fe6dd577 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1106,6 +1106,8 @@ void pci_reset_bridge_secondary_bus(struct pci_dev *dev); void pci_update_resource(struct pci_dev *dev, int resno); int __must_check pci_assign_resource(struct pci_dev *dev, int i); int __must_check pci_reassign_resource(struct pci_dev *dev, int i, resource_size_t add_size, resource_size_t align); +void pci_release_resource(struct pci_dev *dev, int resno); +int __must_check pci_resize_resource(struct pci_dev *dev, int i, int size); int pci_select_bars(struct pci_dev *dev, unsigned long flags); bool pci_device_is_present(struct pci_dev *pdev); void pci_ignore_hotplug(struct pci_dev *dev); @@ -1185,6 +1187,7 @@ void pci_assign_unassigned_resources(void); void pci_assign_unassigned_bridge_resources(struct pci_dev *bridge); void pci_assign_unassigned_bus_resources(struct pci_bus *bus); void pci_assign_unassigned_root_bus_resources(struct pci_bus *bus); +int pci_reassign_bridge_resources(struct pci_dev *bridge, unsigned long type); void pdev_enable_device(struct pci_dev *); int pci_enable_resources(struct pci_dev *, int mask); void pci_assign_irq(struct pci_dev *dev); -- cgit v1.2.3 From 88ca59d1aaf28c25b47a9f933090e480ba6dc92a Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Wed, 25 Oct 2017 12:26:43 -0700 Subject: macvlan: remove unused fields in struct macvlan_dev commit 635b8c8ecdd2 ("tap: Renaming tap related APIs, data structures, macros") captured all the tap related fields into a new struct tap_dev. However, it failed to remove those fields from struct macvlan_dev. Those fields are currently unused and must be removed. While there I moved the comment for MAX_TAP_QUEUES to the right place. Fixes: 635b8c8ecdd27142 (tap: Renaming tap related APIs, data structures, macros) Signed-off-by: Girish Moodalbail Signed-off-by: David S. Miller --- include/linux/if_macvlan.h | 15 --------------- include/linux/if_tap.h | 4 ++++ 2 files changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/if_macvlan.h b/include/linux/if_macvlan.h index 10e319f41fb1..e13b369df02b 100644 --- a/include/linux/if_macvlan.h +++ b/include/linux/if_macvlan.h @@ -10,13 +10,6 @@ #include struct macvlan_port; -struct macvtap_queue; - -/* - * Maximum times a macvtap device can be opened. This can be used to - * configure the number of receive queue, e.g. for multiqueue virtio. - */ -#define MAX_TAP_QUEUES 256 #define MACVLAN_MC_FILTER_BITS 8 #define MACVLAN_MC_FILTER_SZ (1 << MACVLAN_MC_FILTER_BITS) @@ -35,14 +28,6 @@ struct macvlan_dev { netdev_features_t set_features; enum macvlan_mode mode; u16 flags; - /* This array tracks active taps. */ - struct tap_queue __rcu *taps[MAX_TAP_QUEUES]; - /* This list tracks all taps (both enabled and disabled) */ - struct list_head queue_list; - int numvtaps; - int numqueues; - netdev_features_t tap_features; - int minor; int nest_level; #ifdef CONFIG_NET_POLL_CONTROLLER struct netpoll *netpoll; diff --git a/include/linux/if_tap.h b/include/linux/if_tap.h index 4837157da0dc..d1b5173ad8f0 100644 --- a/include/linux/if_tap.h +++ b/include/linux/if_tap.h @@ -22,6 +22,10 @@ static inline struct skb_array *tap_get_skb_array(struct file *f) #include #include +/* + * Maximum times a tap device can be opened. This can be used to + * configure the number of receive queue, e.g. for multiqueue virtio. + */ #define MAX_TAP_QUEUES 256 struct tap_queue; -- cgit v1.2.3 From e319e1fbd9d42420ab6eec0bfd75eb9ad7ca63b1 Mon Sep 17 00:00:00 2001 From: Byungchul Park Date: Wed, 25 Oct 2017 17:56:05 +0900 Subject: block, locking/lockdep: Assign a lock_class per gendisk used for wait_for_completion() Darrick posted the following warning and Dave Chinner analyzed it: > ====================================================== > WARNING: possible circular locking dependency detected > 4.14.0-rc1-fixes #1 Tainted: G W > ------------------------------------------------------ > loop0/31693 is trying to acquire lock: > (&(&ip->i_mmaplock)->mr_lock){++++}, at: [] xfs_ilock+0x23c/0x330 [xfs] > > but now in release context of a crosslock acquired at the following: > ((complete)&ret.event){+.+.}, at: [] submit_bio_wait+0x7f/0xb0 > > which lock already depends on the new lock. > > the existing dependency chain (in reverse order) is: > > -> #2 ((complete)&ret.event){+.+.}: > lock_acquire+0xab/0x200 > wait_for_completion_io+0x4e/0x1a0 > submit_bio_wait+0x7f/0xb0 > blkdev_issue_zeroout+0x71/0xa0 > xfs_bmapi_convert_unwritten+0x11f/0x1d0 [xfs] > xfs_bmapi_write+0x374/0x11f0 [xfs] > xfs_iomap_write_direct+0x2ac/0x430 [xfs] > xfs_file_iomap_begin+0x20d/0xd50 [xfs] > iomap_apply+0x43/0xe0 > dax_iomap_rw+0x89/0xf0 > xfs_file_dax_write+0xcc/0x220 [xfs] > xfs_file_write_iter+0xf0/0x130 [xfs] > __vfs_write+0xd9/0x150 > vfs_write+0xc8/0x1c0 > SyS_write+0x45/0xa0 > entry_SYSCALL_64_fastpath+0x1f/0xbe > > -> #1 (&xfs_nondir_ilock_class){++++}: > lock_acquire+0xab/0x200 > down_write_nested+0x4a/0xb0 > xfs_ilock+0x263/0x330 [xfs] > xfs_setattr_size+0x152/0x370 [xfs] > xfs_vn_setattr+0x6b/0x90 [xfs] > notify_change+0x27d/0x3f0 > do_truncate+0x5b/0x90 > path_openat+0x237/0xa90 > do_filp_open+0x8a/0xf0 > do_sys_open+0x11c/0x1f0 > entry_SYSCALL_64_fastpath+0x1f/0xbe > > -> #0 (&(&ip->i_mmaplock)->mr_lock){++++}: > up_write+0x1c/0x40 > xfs_iunlock+0x1d0/0x310 [xfs] > xfs_file_fallocate+0x8a/0x310 [xfs] > loop_queue_work+0xb7/0x8d0 > kthread_worker_fn+0xb9/0x1f0 > > Chain exists of: > &(&ip->i_mmaplock)->mr_lock --> &xfs_nondir_ilock_class --> (complete)&ret.event > > Possible unsafe locking scenario by crosslock: > > CPU0 CPU1 > ---- ---- > lock(&xfs_nondir_ilock_class); > lock((complete)&ret.event); > lock(&(&ip->i_mmaplock)->mr_lock); > unlock((complete)&ret.event); > > *** DEADLOCK *** The warning is a false positive, caused by the fact that all wait_for_completion()s in submit_bio_wait() are waiting with the same lock class. However, some bios have nothing to do with others, for example in the case of loop devices, there's no direct connection between the bios of an upper device and the bios of a lower device(=loop device). The safest way to assign different lock classes to different devices is to do it for each gendisk. In other words, this patch assigns a lockdep_map per gendisk and uses it when initializing completion in submit_bio_wait(). Analyzed-by: Dave Chinner Reported-by: Darrick J. Wong Signed-off-by: Byungchul Park Reviewed-by: Jens Axboe Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: amir73il@gmail.com Cc: axboe@kernel.dk Cc: david@fromorbit.com Cc: hch@infradead.org Cc: idryomov@gmail.com Cc: johan@kernel.org Cc: johannes.berg@intel.com Cc: kernel-team@lge.com Cc: linux-block@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: linux-mm@kvack.org Cc: linux-xfs@vger.kernel.org Cc: oleg@redhat.com Cc: tj@kernel.org Link: http://lkml.kernel.org/r/1508921765-15396-10-git-send-email-byungchul.park@lge.com Signed-off-by: Ingo Molnar --- include/linux/genhd.h | 22 ++++++++++++++++++++-- 1 file changed, 20 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ea652bfcd675..19d18710546a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -206,6 +206,7 @@ struct gendisk { #endif /* CONFIG_BLK_DEV_INTEGRITY */ int node_id; struct badblocks *bb; + struct lockdep_map lockdep_map; }; static inline struct gendisk *part_to_disk(struct hd_struct *part) @@ -590,8 +591,7 @@ extern void __delete_partition(struct percpu_ref *); extern void delete_partition(struct gendisk *, int); extern void printk_all_partitions(void); -extern struct gendisk *alloc_disk_node(int minors, int node_id); -extern struct gendisk *alloc_disk(int minors); +extern struct gendisk *__alloc_disk_node(int minors, int node_id); extern struct kobject *get_disk(struct gendisk *disk); extern void put_disk(struct gendisk *disk); extern void blk_register_region(dev_t devt, unsigned long range, @@ -615,6 +615,24 @@ extern ssize_t part_fail_store(struct device *dev, const char *buf, size_t count); #endif /* CONFIG_FAIL_MAKE_REQUEST */ +#define alloc_disk_node(minors, node_id) \ +({ \ + static struct lock_class_key __key; \ + const char *__name; \ + struct gendisk *__disk; \ + \ + __name = "(gendisk_completion)"#minors"("#node_id")"; \ + \ + __disk = __alloc_disk_node(minors, node_id); \ + \ + if (__disk) \ + lockdep_init_map(&__disk->lockdep_map, __name, &__key, 0); \ + \ + __disk; \ +}) + +#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) + static inline int hd_ref_init(struct hd_struct *part) { if (percpu_ref_init(&part->ref, __delete_partition, 0, -- cgit v1.2.3 From f1d981eaecf8ace68ec1d15bf05f28a4887ea6fb Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 23 Oct 2017 10:32:08 +0900 Subject: PM / devfreq: Use the available min/max frequency The commit a76caf55e5b35 ("thermal: Add devfreq cooling") is able to disable OPP as a cooling device. In result, both update_devfreq() and {min|max}_freq_show() have to consider the 'opp->available' status of each OPP. So, this patch adds the 'scaling_{min|max}_freq' to struct devfreq in order to indicate the available mininum and maximum frequency by adjusting OPP interface such as dev_pm_opp_{disable|enable}(). The 'scaling_{min|max}_freq' are used for on both update_devfreq() and {min|max}_freq_show(). Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- include/linux/devfreq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 597294e0cc40..997a9eb34191 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -120,6 +120,8 @@ struct devfreq_dev_profile { * touch this. * @min_freq: Limit minimum frequency requested by user (0: none) * @max_freq: Limit maximum frequency requested by user (0: none) + * @scaling_min_freq: Limit minimum frequency requested by OPP interface + * @scaling_max_freq: Limit maximum frequency requested by OPP interface * @stop_polling: devfreq polling status of a device. * @total_trans: Number of devfreq transitions * @trans_table: Statistics of devfreq transitions @@ -153,6 +155,8 @@ struct devfreq { unsigned long min_freq; unsigned long max_freq; + unsigned long scaling_min_freq; + unsigned long scaling_max_freq; bool stop_polling; /* information for device frequency transition */ -- cgit v1.2.3 From 416b46a2627ae8de1466f90787dede6f9c5a1bfa Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 23 Oct 2017 10:32:10 +0900 Subject: PM / devfreq: Show the all available frequencies The commit a76caf55e5b35 ("thermal: Add devfreq cooling") allows the devfreq device to use the cooling device. When the cooling down are required, the devfreq_cooling.c disables the OPP entry with the dev_pm_opp_disable(). In result, 'available_frequencies'[1] sysfs node never came to show the all available frequencies. [1] /sys/class/devfreq/.../available_frequencies So, this patch uses the 'freq_table' in the 'struct devfreq_dev_profile' in order to show the all available frequencies. - If 'freq_table' is NULL, devfreq core initializes them by using OPP values. - If 'freq_table' is initialized, devfreq core just uses the 'freq_table'. And this patch adds some comment about the sort way of 'freq_table'. Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham --- include/linux/devfreq.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 997a9eb34191..19520625ea94 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -84,8 +84,9 @@ struct devfreq_dev_status { * from devfreq_remove_device() call. If the user * has registered devfreq->nb at a notifier-head, * this is the time to unregister it. - * @freq_table: Optional list of frequencies to support statistics. - * @max_state: The size of freq_table. + * @freq_table: Optional list of frequencies to support statistics + * and freq_table must be generated in ascending order. + * @max_state: The size of freq_table. */ struct devfreq_dev_profile { unsigned long initial_freq; -- cgit v1.2.3 From aa7c352f9841ab3fee5bf1de127a45e6310124a6 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Mon, 23 Oct 2017 10:32:12 +0900 Subject: PM / devfreq: Define the constant governor name Prior to that, the devfreq device uses the governor name when adding the itself. In order to prevent the mistake used the wrong governor name, this patch defines the governor name as a constant and then uses them instead of using the string directly. Signed-off-by: Chanwoo Choi Signed-off-by: MyungJoo Ham Cc: Kukjin Kim Cc: Krzysztof Kozlowski Cc: linux-samsung-soc@vger.kernel.org Cc: linux-arm-kernel@lists.infradead.org Cc: linux-kernel@vger.kernel.org --- include/linux/devfreq.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h index 19520625ea94..3aae5b3af87c 100644 --- a/include/linux/devfreq.h +++ b/include/linux/devfreq.h @@ -19,6 +19,13 @@ #define DEVFREQ_NAME_LEN 16 +/* DEVFREQ governor name */ +#define DEVFREQ_GOV_SIMPLE_ONDEMAND "simple_ondemand" +#define DEVFREQ_GOV_PERFORMANCE "performance" +#define DEVFREQ_GOV_POWERSAVE "powersave" +#define DEVFREQ_GOV_USERSPACE "userspace" +#define DEVFREQ_GOV_PASSIVE "passive" + /* DEVFREQ notifier interface */ #define DEVFREQ_TRANSITION_NOTIFIER (0) -- cgit v1.2.3 From 60e2a7780793bae0debc275a9ccd57f7da0cf195 Mon Sep 17 00:00:00 2001 From: Ursula Braun Date: Wed, 25 Oct 2017 11:01:45 +0200 Subject: tcp: TCP experimental option for SMC The SMC protocol [1] relies on the use of a new TCP experimental option [2, 3]. With this option, SMC capabilities are exchanged between peers during the TCP three way handshake. This patch adds support for this experimental option to TCP. References: [1] SMC-R Informational RFC: http://www.rfc-editor.org/info/rfc7609 [2] Shared Use of TCP Experimental Options RFC 6994: https://tools.ietf.org/rfc/rfc6994.txt [3] IANA ExID SMCR: http://www.iana.org/assignments/tcp-parameters/tcp-parameters.xhtml#tcp-exids Signed-off-by: Ursula Braun Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 173a7c2f9636..8c431385b272 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -98,7 +98,8 @@ struct tcp_options_received { tstamp_ok : 1, /* TIMESTAMP seen on SYN packet */ dsack : 1, /* D-SACK is scheduled */ wscale_ok : 1, /* Wscale seen on SYN packet */ - sack_ok : 4, /* SACK seen on SYN packet */ + sack_ok : 3, /* SACK seen on SYN packet */ + smc_ok : 1, /* SMC seen on SYN packet */ snd_wscale : 4, /* Window scaling received from sender */ rcv_wscale : 4; /* Window scaling to send to receiver */ u8 num_sacks; /* Number of SACK blocks */ @@ -110,6 +111,9 @@ static inline void tcp_clear_options(struct tcp_options_received *rx_opt) { rx_opt->tstamp_ok = rx_opt->sack_ok = 0; rx_opt->wscale_ok = rx_opt->snd_wscale = 0; +#if IS_ENABLED(CONFIG_SMC) + rx_opt->smc_ok = 0; +#endif } /* This is the max number of SACKS that we'll generate and process. It's safe @@ -229,7 +233,8 @@ struct tcp_sock { syn_fastopen_ch:1, /* Active TFO re-enabling probe */ syn_data_acked:1,/* data in SYN is acked by SYN-ACK */ save_syn:1, /* Save headers of SYN packet */ - is_cwnd_limited:1;/* forward progress limited by snd_cwnd? */ + is_cwnd_limited:1,/* forward progress limited by snd_cwnd? */ + syn_smc:1; /* SYN includes SMC */ u32 tlp_high_seq; /* snd_nxt at the time of TLP retransmit. */ /* RTT measurement */ -- cgit v1.2.3 From d41bf8c9deaed1a90b18d3ffc5639d4c19f0259a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 23 Oct 2017 16:18:27 -0700 Subject: cgroup, sched: Move basic cpu stats from cgroup.stat to cpu.stat The basic cpu stat is currently shown with "cpu." prefix in cgroup.stat, and the same information is duplicated in cpu.stat when cpu controller is enabled. This is ugly and not very scalable as we want to expand the coverage of stat information which is always available. This patch makes cgroup core always create "cpu.stat" file and show the basic cpu stat there and calls the cpu controller to show the extra stats when enabled. This ensures that the same information isn't presented in multiple places and makes future expansion of basic stats easier. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra (Intel) --- include/linux/cgroup-defs.h | 2 ++ include/linux/cgroup.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 3e55bbd31ad1..ada6df7b1f55 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -569,6 +569,8 @@ struct cgroup_subsys { void (*css_released)(struct cgroup_subsys_state *css); void (*css_free)(struct cgroup_subsys_state *css); void (*css_reset)(struct cgroup_subsys_state *css); + int (*css_extra_stat_show)(struct seq_file *seq, + struct cgroup_subsys_state *css); int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 328a70ce0e23..03cad08b09d1 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -703,8 +703,6 @@ static inline void cpuacct_account_field(struct task_struct *tsk, int index, u64 val) {} #endif -void cgroup_stat_show_cputime(struct seq_file *seq, const char *prefix); - void __cgroup_account_cputime(struct cgroup *cgrp, u64 delta_exec); void __cgroup_account_cputime_field(struct cgroup *cgrp, enum cpu_usage_stat index, u64 delta_exec); -- cgit v1.2.3 From 032cfd66afcc2dd2c7be89c71b020fcb15bcc37d Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 25 Oct 2017 03:53:59 -0700 Subject: drivers/net: wan/sdla: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Allen Pais Cc: "David S. Miller" Cc: Tobias Klauser Cc: netdev@vger.kernel.org Signed-off-by: Kees Cook Signed-off-by: David S. Miller --- include/linux/if_frad.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/if_frad.h b/include/linux/if_frad.h index 46df7e565d6f..82a1b4e93570 100644 --- a/include/linux/if_frad.h +++ b/include/linux/if_frad.h @@ -83,6 +83,7 @@ struct frad_local /* fields that are used by the Sangoma SDLA cards */ struct timer_list timer; + struct net_device *dev; int type; /* adapter type */ int state; /* state of the S502/8 control latch */ int buffer; /* current buffer for S508 firmware */ -- cgit v1.2.3 From ecad0d2cb8a7997afdc95031ee3328b997aba5c4 Mon Sep 17 00:00:00 2001 From: James Smart Date: Mon, 23 Oct 2017 15:11:36 -0700 Subject: nvme-fc: remove NVME_FC_MAX_SEGMENTS The define is an arbitrary limit to the io size on the initiator, capping the io to 1MB-4KB. Remove the define from the transport. I/O size will solely be limited by the LLDD sg limits. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig --- include/linux/nvme-fc-driver.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 4ea03b9a5c8c..2be4db353937 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -102,8 +102,6 @@ enum nvmefc_fcp_datadir { }; -#define NVME_FC_MAX_SEGMENTS 256 - /** * struct nvmefc_fcp_req - Request structure passed from NVME-FC transport * to LLDD in order to perform a NVME FCP IO operation. -- cgit v1.2.3 From 7863406143d8bbbbda07a61285c5f4c217908dfd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:28 +0200 Subject: sched/isolation: Move housekeeping related code to its own file The housekeeping code is currently tied to the NOHZ code. As we are planning to make housekeeping independent from it, start with moving the relevant code to its own file. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Acked-by: Paul E. McKenney Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-2-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 56 +++++++++++++++++++++++++++++++++++++++++ include/linux/tick.h | 37 --------------------------- 2 files changed, 56 insertions(+), 37 deletions(-) create mode 100644 include/linux/sched/isolation.h (limited to 'include/linux') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h new file mode 100644 index 000000000000..b7cfbc46286c --- /dev/null +++ b/include/linux/sched/isolation.h @@ -0,0 +1,56 @@ +#ifndef _LINUX_SCHED_ISOLATION_H +#define _LINUX_SCHED_ISOLATION_H + +#include +#include +#include + +#ifdef CONFIG_NO_HZ_FULL +extern cpumask_var_t housekeeping_mask; + +static inline int housekeeping_any_cpu(void) +{ + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +} + +extern void __init housekeeping_init(void); + +#else + +static inline int housekeeping_any_cpu(void) +{ + return smp_processor_id(); +} + +static inline void housekeeping_init(void) { } +#endif /* CONFIG_NO_HZ_FULL */ + + +static inline const struct cpumask *housekeeping_cpumask(void) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return housekeeping_mask; +#endif + return cpu_possible_mask; +} + +static inline bool is_housekeeping_cpu(int cpu) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return cpumask_test_cpu(cpu, housekeeping_mask); +#endif + return true; +} + +static inline void housekeeping_affine(struct task_struct *t) +{ +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + set_cpus_allowed_ptr(t, housekeeping_mask); + +#endif +} + +#endif /* _LINUX_SCHED_ISOLATION_H */ diff --git a/include/linux/tick.h b/include/linux/tick.h index fe01e68bf520..68afc09aa8ac 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -137,7 +137,6 @@ static inline u64 get_cpu_iowait_time_us(int cpu, u64 *unused) { return -1; } #ifdef CONFIG_NO_HZ_FULL extern bool tick_nohz_full_running; extern cpumask_var_t tick_nohz_full_mask; -extern cpumask_var_t housekeeping_mask; static inline bool tick_nohz_full_enabled(void) { @@ -161,11 +160,6 @@ static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) cpumask_or(mask, mask, tick_nohz_full_mask); } -static inline int housekeeping_any_cpu(void) -{ - return cpumask_any_and(housekeeping_mask, cpu_online_mask); -} - extern void tick_nohz_dep_set(enum tick_dep_bits bit); extern void tick_nohz_dep_clear(enum tick_dep_bits bit); extern void tick_nohz_dep_set_cpu(int cpu, enum tick_dep_bits bit); @@ -235,10 +229,6 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, extern void tick_nohz_full_kick_cpu(int cpu); extern void __tick_nohz_task_switch(void); #else -static inline int housekeeping_any_cpu(void) -{ - return smp_processor_id(); -} static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } static inline void tick_nohz_full_add_cpus_to(struct cpumask *mask) { } @@ -260,33 +250,6 @@ static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void __tick_nohz_task_switch(void) { } #endif -static inline const struct cpumask *housekeeping_cpumask(void) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return housekeeping_mask; -#endif - return cpu_possible_mask; -} - -static inline bool is_housekeeping_cpu(int cpu) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return cpumask_test_cpu(cpu, housekeeping_mask); -#endif - return true; -} - -static inline void housekeeping_affine(struct task_struct *t) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - set_cpus_allowed_ptr(t, housekeeping_mask); - -#endif -} - static inline void tick_nohz_task_switch(void) { if (tick_nohz_full_enabled()) -- cgit v1.2.3 From 9f0ca2d97ef0b5e966be2cfef26c7c094ec14e41 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:30 +0200 Subject: sched/isolation: Provide a dynamic off-case to housekeeping_any_cpu() housekeeping_any_cpu() doesn't handle correctly the case where CONFIG_NO_HZ_FULL=y and no CPU is in nohz_full mode. So far no caller needs this but let's prepare to avoid any future surprise. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-4-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index b7cfbc46286c..040df04fa78a 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -7,25 +7,20 @@ #ifdef CONFIG_NO_HZ_FULL extern cpumask_var_t housekeeping_mask; - -static inline int housekeeping_any_cpu(void) -{ - return cpumask_any_and(housekeeping_mask, cpu_online_mask); -} - extern void __init housekeeping_init(void); - #else +static inline void housekeeping_init(void) { } +#endif /* CONFIG_NO_HZ_FULL */ static inline int housekeeping_any_cpu(void) { +#ifdef CONFIG_NO_HZ_FULL + if (tick_nohz_full_enabled()) + return cpumask_any_and(housekeeping_mask, cpu_online_mask); +#endif return smp_processor_id(); } -static inline void housekeeping_init(void) { } -#endif /* CONFIG_NO_HZ_FULL */ - - static inline const struct cpumask *housekeeping_cpumask(void) { #ifdef CONFIG_NO_HZ_FULL -- cgit v1.2.3 From 7e56a1cf4b28f5739526877b8dbad623fae2e4e7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:31 +0200 Subject: sched/isolation: Make the housekeeping cpumask private Nobody needs to access this detail. housekeeping_cpumask() already takes care of it. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-5-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 31 ++++++++++--------------------- 1 file changed, 10 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 040df04fa78a..ed935ffc6ffa 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -6,46 +6,35 @@ #include #ifdef CONFIG_NO_HZ_FULL -extern cpumask_var_t housekeeping_mask; +extern int housekeeping_any_cpu(void); +extern const struct cpumask *housekeeping_cpumask(void); +extern void housekeeping_affine(struct task_struct *t); +extern bool housekeeping_test_cpu(int cpu); extern void __init housekeeping_init(void); + #else -static inline void housekeeping_init(void) { } -#endif /* CONFIG_NO_HZ_FULL */ static inline int housekeeping_any_cpu(void) { -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return cpumask_any_and(housekeeping_mask, cpu_online_mask); -#endif return smp_processor_id(); } static inline const struct cpumask *housekeeping_cpumask(void) { -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - return housekeeping_mask; -#endif return cpu_possible_mask; } +static inline void housekeeping_affine(struct task_struct *t) { } +static inline void housekeeping_init(void) { } +#endif /* CONFIG_NO_HZ_FULL */ + static inline bool is_housekeeping_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL if (tick_nohz_full_enabled()) - return cpumask_test_cpu(cpu, housekeeping_mask); + return housekeeping_test_cpu(cpu); #endif return true; } -static inline void housekeeping_affine(struct task_struct *t) -{ -#ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) - set_cpus_allowed_ptr(t, housekeeping_mask); - -#endif -} - #endif /* _LINUX_SCHED_ISOLATION_H */ -- cgit v1.2.3 From e179f5a04ba46ee5c5439480c2bfd68c358168b7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:32 +0200 Subject: sched/isolation: Use its own static key Housekeeping code still depends on the nohz_full static key. Since we want to decouple housekeeping from NOHZ, let's create a housekeeping specific static key. It's mostly relevant for calls to is_housekeeping_cpu() from the scheduler. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-6-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index ed935ffc6ffa..194c586fbb12 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -6,6 +6,7 @@ #include #ifdef CONFIG_NO_HZ_FULL +DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); extern int housekeeping_any_cpu(void); extern const struct cpumask *housekeeping_cpumask(void); extern void housekeeping_affine(struct task_struct *t); @@ -31,7 +32,7 @@ static inline void housekeeping_init(void) { } static inline bool is_housekeeping_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL - if (tick_nohz_full_enabled()) + if (static_branch_unlikely(&housekeeping_overriden)) return housekeeping_test_cpu(cpu); #endif return true; -- cgit v1.2.3 From 204c083a009378dfa751175b5fcddc75988bab6c Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:33 +0200 Subject: sched/isolation: Rename is_housekeeping_cpu() to housekeeping_cpu() Fit it into the housekeeping_*() namespace. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-7-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 194c586fbb12..ad0f5d986a2e 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -29,7 +29,7 @@ static inline void housekeeping_affine(struct task_struct *t) { } static inline void housekeeping_init(void) { } #endif /* CONFIG_NO_HZ_FULL */ -static inline bool is_housekeeping_cpu(int cpu) +static inline bool housekeeping_cpu(int cpu) { #ifdef CONFIG_NO_HZ_FULL if (static_branch_unlikely(&housekeeping_overriden)) -- cgit v1.2.3 From 5c4991e24c69737bd41fc2737b1e3980abbf73f9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:34 +0200 Subject: sched/isolation: Split out new CONFIG_CPU_ISOLATION=y config from CONFIG_NO_HZ_FULL Split the housekeeping config from CONFIG_NO_HZ_FULL. This way we finally separate the isolation code from NOHZ. Although a dependency to CONFIG_NO_HZ_FULL remains for now, while the housekeeping code still deals with NOHZ internals. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-8-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index ad0f5d986a2e..93ac2367a520 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -5,7 +5,7 @@ #include #include -#ifdef CONFIG_NO_HZ_FULL +#ifdef CONFIG_CPU_ISOLATION DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); extern int housekeeping_any_cpu(void); extern const struct cpumask *housekeeping_cpumask(void); @@ -27,11 +27,11 @@ static inline const struct cpumask *housekeeping_cpumask(void) static inline void housekeeping_affine(struct task_struct *t) { } static inline void housekeeping_init(void) { } -#endif /* CONFIG_NO_HZ_FULL */ +#endif /* CONFIG_CPU_ISOLATION */ static inline bool housekeeping_cpu(int cpu) { -#ifdef CONFIG_NO_HZ_FULL +#ifdef CONFIG_CPU_ISOLATION if (static_branch_unlikely(&housekeeping_overriden)) return housekeeping_test_cpu(cpu); #endif -- cgit v1.2.3 From de201559df872f83d0c08fb4effe3efd28e6cbc8 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:35 +0200 Subject: sched/isolation: Introduce housekeeping flags Before we implement isolcpus under housekeeping, we need the isolation features to be more finegrained. For example some people want NOHZ_FULL without the full scheduler isolation, others want full scheduler isolation without NOHZ_FULL. So let's cut all these isolation features piecewise, at the risk of overcutting it right now. We can still merge some flags later if they always make sense together. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-9-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 26 +++++++++++++++++--------- 1 file changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 93ac2367a520..9bb753eece3b 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -5,35 +5,43 @@ #include #include +enum hk_flags { + HK_FLAG_TIMER = 1, + HK_FLAG_RCU = (1 << 1), + HK_FLAG_MISC = (1 << 2), + HK_FLAG_SCHED = (1 << 3), +}; + #ifdef CONFIG_CPU_ISOLATION DECLARE_STATIC_KEY_FALSE(housekeeping_overriden); -extern int housekeeping_any_cpu(void); -extern const struct cpumask *housekeeping_cpumask(void); -extern void housekeeping_affine(struct task_struct *t); -extern bool housekeeping_test_cpu(int cpu); +extern int housekeeping_any_cpu(enum hk_flags flags); +extern const struct cpumask *housekeeping_cpumask(enum hk_flags flags); +extern void housekeeping_affine(struct task_struct *t, enum hk_flags flags); +extern bool housekeeping_test_cpu(int cpu, enum hk_flags flags); extern void __init housekeeping_init(void); #else -static inline int housekeeping_any_cpu(void) +static inline int housekeeping_any_cpu(enum hk_flags flags) { return smp_processor_id(); } -static inline const struct cpumask *housekeeping_cpumask(void) +static inline const struct cpumask *housekeeping_cpumask(enum hk_flags flags) { return cpu_possible_mask; } -static inline void housekeeping_affine(struct task_struct *t) { } +static inline void housekeeping_affine(struct task_struct *t, + enum hk_flags flags) { } static inline void housekeeping_init(void) { } #endif /* CONFIG_CPU_ISOLATION */ -static inline bool housekeeping_cpu(int cpu) +static inline bool housekeeping_cpu(int cpu, enum hk_flags flags) { #ifdef CONFIG_CPU_ISOLATION if (static_branch_unlikely(&housekeeping_overriden)) - return housekeeping_test_cpu(cpu); + return housekeeping_test_cpu(cpu, flags); #endif return true; } -- cgit v1.2.3 From 6f1982fedd59856bcc42a9b521be4c3ffd2f60a7 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:36 +0200 Subject: sched/isolation: Handle the nohz_full= parameter We want to centralize the isolation management, done by the housekeeping subsystem. Therefore we need to handle the nohz_full= parameter from there. Since nohz_full= so far has involved unbound timers, watchdog, RCU and tilegx NAPI isolation, we keep that default behaviour. nohz_full= will be deprecated in the future. We want to control the isolation features from the isolcpus= parameter. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-10-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched/isolation.h | 1 + include/linux/tick.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index 9bb753eece3b..e53cfa96e91e 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -10,6 +10,7 @@ enum hk_flags { HK_FLAG_RCU = (1 << 1), HK_FLAG_MISC = (1 << 2), HK_FLAG_SCHED = (1 << 3), + HK_FLAG_TICK = (1 << 4), }; #ifdef CONFIG_CPU_ISOLATION diff --git a/include/linux/tick.h b/include/linux/tick.h index 68afc09aa8ac..e2a163a9f96c 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -228,6 +228,7 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, extern void tick_nohz_full_kick_cpu(int cpu); extern void __tick_nohz_task_switch(void); +extern void __init tick_nohz_full_setup(cpumask_var_t cpumask); #else static inline bool tick_nohz_full_enabled(void) { return false; } static inline bool tick_nohz_full_cpu(int cpu) { return false; } @@ -248,6 +249,7 @@ static inline void tick_dep_clear_signal(struct signal_struct *signal, static inline void tick_nohz_full_kick_cpu(int cpu) { } static inline void __tick_nohz_task_switch(void) { } +static inline void tick_nohz_full_setup(cpumask_var_t cpumask) { } #endif static inline void tick_nohz_task_switch(void) -- cgit v1.2.3 From edb9382175c3ebdced8ffdb3e0f20052ad9fdbe9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Fri, 27 Oct 2017 04:42:37 +0200 Subject: sched/isolation: Move isolcpus= handling to the housekeeping code We want to centralize the isolation features, to be done by the housekeeping subsystem and scheduler domain isolation is a significant part of it. No intended behaviour change, we just reuse the housekeeping cpumask and core code. Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: Chris Metcalf Cc: Christoph Lameter Cc: Linus Torvalds Cc: Luiz Capitulino Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Wanpeng Li Link: http://lkml.kernel.org/r/1509072159-31808-11-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- include/linux/sched/isolation.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 0f897dfc195e..1b0cc0d6df8d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -165,8 +165,6 @@ struct task_group; /* Task command name length: */ #define TASK_COMM_LEN 16 -extern cpumask_var_t cpu_isolated_map; - extern void scheduler_tick(void); #define MAX_SCHEDULE_TIMEOUT LONG_MAX diff --git a/include/linux/sched/isolation.h b/include/linux/sched/isolation.h index e53cfa96e91e..d849431c8060 100644 --- a/include/linux/sched/isolation.h +++ b/include/linux/sched/isolation.h @@ -11,6 +11,7 @@ enum hk_flags { HK_FLAG_MISC = (1 << 2), HK_FLAG_SCHED = (1 << 3), HK_FLAG_TICK = (1 << 4), + HK_FLAG_DOMAIN = (1 << 5), }; #ifdef CONFIG_CPU_ISOLATION -- cgit v1.2.3 From 7d9285e82db5defca4d9674ba089429eeca0c697 Mon Sep 17 00:00:00 2001 From: Yonghong Song Date: Thu, 5 Oct 2017 09:19:19 -0700 Subject: perf/bpf: Extend the perf_event_read_local() interface, a.k.a. "bpf: perf event change needed for subsequent bpf helpers" eBPF programs would like access to the (perf) event enabled and running times along with the event value, such that they can deal with event multiplexing (among other things). This patch extends the interface; a future eBPF patch will utilize the new functionality. [ Note, there's a same-content commit with a poor changelog and a meaningless title in the networking tree as well - but we need this change for subsequent perf work, so apply it here as well, with a proper changelog. Hopefully Git will be able to sort out this somewhat messy workflow, if there are no other, conflicting changes to these files. ] Signed-off-by: Yonghong Song [ Rewrote the changelog. ] Signed-off-by: Peter Zijlstra (Intel) Cc: Cc: Cc: Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: David S. Miller Link: http://lkml.kernel.org/r/20171005161923.332790-2-yhs@fb.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 8e22f24ded6a..79b18a20cf5d 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -806,6 +806,7 @@ struct perf_output_handle { struct bpf_perf_event_data_kern { struct pt_regs *regs; struct perf_sample_data *data; + struct perf_event *event; }; #ifdef CONFIG_CGROUP_PERF @@ -884,7 +885,8 @@ perf_event_create_kernel_counter(struct perf_event_attr *attr, void *context); extern void perf_pmu_migrate_context(struct pmu *pmu, int src_cpu, int dst_cpu); -int perf_event_read_local(struct perf_event *event, u64 *value); +int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running); extern u64 perf_event_read_value(struct perf_event *event, u64 *enabled, u64 *running); @@ -1286,7 +1288,8 @@ static inline const struct perf_event_attr *perf_event_attrs(struct perf_event * { return ERR_PTR(-EINVAL); } -static inline int perf_event_read_local(struct perf_event *event, u64 *value) +static inline int perf_event_read_local(struct perf_event *event, u64 *value, + u64 *enabled, u64 *running) { return -EINVAL; } -- cgit v1.2.3 From 8ca2bd41c7d1c135e9ac6f25970c2d491865088a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Sep 2017 14:12:35 +0200 Subject: perf/core: Rename 'enum perf_event_active_state' Its a weird name, active is one of the states, it should not be part of the name, also, its too long. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 79b18a20cf5d..b7532650de47 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -485,9 +485,9 @@ struct perf_addr_filters_head { }; /** - * enum perf_event_active_state - the states of a event + * enum perf_event_state - the states of a event */ -enum perf_event_active_state { +enum perf_event_state { PERF_EVENT_STATE_DEAD = -4, PERF_EVENT_STATE_EXIT = -3, PERF_EVENT_STATE_ERROR = -2, @@ -578,7 +578,7 @@ struct perf_event { struct pmu *pmu; void *pmu_private; - enum perf_event_active_state state; + enum perf_event_state state; unsigned int attach_state; local64_t count; atomic64_t child_count; -- cgit v1.2.3 From 0d3d73aac2ff05c78387aa9dcc2c8aa3804405e7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 5 Sep 2017 14:16:28 +0200 Subject: perf/core: Rewrite event timekeeping The current even timekeeping, which computes enabled and running times, uses 3 distinct timestamps to reflect the various event states: OFF (stopped), INACTIVE (enabled) and ACTIVE (running). Furthermore, the update rules are such that even INACTIVE events need their timestamps updated. This is undesirable because we'd like to not touch INACTIVE events if at all possible, this makes event scheduling (much) more expensive than needed. Rewrite the timekeeping to directly use event->state, this greatly simplifies the code and results in only having to update things when we change state, or an up-to-date value is requested (read). Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 19 +------------------ 1 file changed, 1 insertion(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b7532650de47..874b71a70058 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -588,26 +588,10 @@ struct perf_event { * has been enabled (i.e. eligible to run, and the task has * been scheduled in, if this is a per-task event) * and running (scheduled onto the CPU), respectively. - * - * They are computed from tstamp_enabled, tstamp_running and - * tstamp_stopped when the event is in INACTIVE or ACTIVE state. */ u64 total_time_enabled; u64 total_time_running; - - /* - * These are timestamps used for computing total_time_enabled - * and total_time_running when the event is in INACTIVE or - * ACTIVE state, measured in nanoseconds from an arbitrary point - * in time. - * tstamp_enabled: the notional time when the event was enabled - * tstamp_running: the notional time when the event was scheduled on - * tstamp_stopped: in INACTIVE state, the notional time when the - * event was scheduled off. - */ - u64 tstamp_enabled; - u64 tstamp_running; - u64 tstamp_stopped; + u64 tstamp; /* * timestamp shadows the actual context timing but it can @@ -699,7 +683,6 @@ struct perf_event { #ifdef CONFIG_CGROUP_PERF struct perf_cgroup *cgrp; /* cgroup event is attach to */ - int cgrp_defer_enabled; #endif struct list_head sb_list; -- cgit v1.2.3 From 035226b964c820f65e201cdf123705a8f1d7c670 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Thu, 26 Oct 2017 01:47:42 +0000 Subject: bpf: remove tail_call and get_stackid helper declarations from bpf.h commit afdb09c720b6 ("security: bpf: Add LSM hooks for bpf object related syscall") included linux/bpf.h in linux/security.h. As a result, bpf programs including bpf_helpers.h and some other header that ends up pulling in also security.h, such as several examples under samples/bpf, fail to compile because bpf_tail_call and bpf_get_stackid are now "redefined as different kind of symbol". >From bpf.h: u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); Whereas in bpf_helpers.h they are: static void (*bpf_tail_call)(void *ctx, void *map, int index); static int (*bpf_get_stackid)(void *ctx, void *map, int flags); Fix this by removing the unused declaration of bpf_tail_call and moving the declaration of bpf_get_stackid in bpf_trace.c, which is the only place where it's needed. Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 172be7faf7ba..520aeebe0d93 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -231,9 +231,6 @@ struct bpf_event_entry { struct rcu_head rcu; }; -u64 bpf_tail_call(u64 ctx, u64 r2, u64 index, u64 r4, u64 r5); -u64 bpf_get_stackid(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); - bool bpf_prog_array_compatible(struct bpf_array *array, const struct bpf_prog *fp); int bpf_prog_calc_tag(struct bpf_prog *fp); -- cgit v1.2.3 From 728fe6cef27444b6575c5e7ab5de13274610488b Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 11 Oct 2017 11:41:19 +0200 Subject: i2c: Allow overriding dev_name through board_info For devices not instantiated through ACPI the i2c-client's device-name gets set to - by default, e.g. "0-0022" this means that the device-name is dependent on the order in which the i2c-busses are enumerated. In some cases having a predictable constant device-name is desirable, for example on non device-tree platforms the link between a regulator and its consumers is specified by the platform code by setting regulator_init_data.consumers. This array identifies the regulator's consumers by dev_name and supply(-name). Which requires a constant dev_name. This commit adds a dev_name field to i2c_board_info allowing platform code to set a contstant dev_name so that the device can be identified by its dev_name in other platform code. Signed-off-by: Hans de Goede Acked-by: Mark Brown (live at ELCE17) Acked-by: Andy Shevchenko (live at ELCE17) Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index d501d3956f13..0f774406fad0 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -304,6 +304,7 @@ static inline bool i2c_detect_slave_mode(struct device *dev) { return false; } * @type: chip type, to initialize i2c_client.name * @flags: to initialize i2c_client.flags * @addr: stored in i2c_client.addr + * @dev_name: Overrides the default - dev_name if set * @platform_data: stored in i2c_client.dev.platform_data * @archdata: copied into i2c_client.dev.archdata * @of_node: pointer to OpenFirmware device node @@ -328,6 +329,7 @@ struct i2c_board_info { char type[I2C_NAME_SIZE]; unsigned short flags; unsigned short addr; + const char *dev_name; void *platform_data; struct dev_archdata *archdata; struct device_node *of_node; -- cgit v1.2.3 From 356c3e9afac0cc19c3d3b0cbc67106ce8efa0743 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Thu, 26 Oct 2017 11:00:48 +0200 Subject: net: dsa: lan9303: Move struct lan9303 to include/linux/dsa/lan9303.h The next patch require net/dsa/tag_lan9303.c to access struct lan9303. Therefore move struct lan9303 definitions from drivers/net/dsa/lan9303.h to new file include/linux/dsa/lan9303.h. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/dsa/lan9303.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) create mode 100644 include/linux/dsa/lan9303.h (limited to 'include/linux') diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h new file mode 100644 index 000000000000..05d8d136baab --- /dev/null +++ b/include/linux/dsa/lan9303.h @@ -0,0 +1,36 @@ +/* Included by drivers/net/dsa/lan9303.h and net/dsa/tag_lan9303.c */ +#include + +struct lan9303; + +struct lan9303_phy_ops { + /* PHY 1 and 2 access*/ + int (*phy_read)(struct lan9303 *chip, int port, int regnum); + int (*phy_write)(struct lan9303 *chip, int port, + int regnum, u16 val); +}; + +#define LAN9303_NUM_ALR_RECORDS 512 +struct lan9303_alr_cache_entry { + u8 mac_addr[ETH_ALEN]; + u8 port_map; /* Bitmap of ports. Zero if unused entry */ + u8 stp_override; /* non zero if set ALR_DAT1_AGE_OVERRID */ +}; + +struct lan9303 { + struct device *dev; + struct regmap *regmap; + struct regmap_irq_chip_data *irq_data; + struct gpio_desc *reset_gpio; + u32 reset_duration; /* in [ms] */ + bool phy_addr_sel_strap; + struct dsa_switch *ds; + struct mutex indirect_mutex; /* protect indexed register access */ + const struct lan9303_phy_ops *ops; + bool is_bridged; /* true if port 1 and 2 are bridged */ + u32 swe_port_state; /* remember SWE_PORT_STATE while not bridged */ + /* LAN9303 do not offer reading specific ALR entry. Cache all + * static entries in a flat table + **/ + struct lan9303_alr_cache_entry alr_cache[LAN9303_NUM_ALR_RECORDS]; +}; -- cgit v1.2.3 From 3d0bd028ffb4a4915cb64cfa0d2cee1578cc0321 Mon Sep 17 00:00:00 2001 From: Vinicius Costa Gomes Date: Mon, 16 Oct 2017 18:01:27 -0700 Subject: net/sched: Add support for HW offloading for CBS This adds support for offloading the CBS algorithm to the controller, if supported. Drivers wanting to support CBS offload must implement the .ndo_setup_tc callback and handle the TC_SETUP_CBS (introduced here) type. Signed-off-by: Vinicius Costa Gomes Tested-by: Henrik Austad Signed-off-by: Jeff Kirsher --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6c7960c8338a..5e02f79b2110 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -776,6 +776,7 @@ enum tc_setup_type { TC_SETUP_CLSMATCHALL, TC_SETUP_CLSBPF, TC_SETUP_BLOCK, + TC_SETUP_CBS, }; /* These structures hold the attributes of xdp state that are being passed -- cgit v1.2.3 From 9b9f2b8bc2ac98d91da714660c53d1cdac999e09 Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Thu, 24 Aug 2017 17:31:01 +0800 Subject: i2c: i2c-smbus: Use threaded irq for smbalert Prior to this commit the smbalert_irq was handling in the hard irq context. This change switch to using a thread irq which avoids the need for the work thread. Using threaded irq also removes the need for the edge_triggered flag as the enabling / disabling of the hard irq for level triggered interrupts will be handled by the irq core. Without this change have an irq connected to something like an i2c gpio resulted in a null ptr deferences. Specifically handle_nested_irq calls the threaded irq handler. There are currently 3 in tree drivers affected by this change. i2c-parport driver calls i2c_handle_smbus_alert in a hard irq context. This driver use edge trigger interrupts which skip the enable / disable calls. But it still need to handle the smbus transaction on a thread. So the work thread is kept for this driver. i2c-parport-light & i2c-thunderx-pcidrv provide the irq number in the setup which will result in the thread irq being used. i2c-parport-light is edge trigger so the enable / disable call was skipped as well. i2c-thunderx-pcidrv is getting the edge / level trigger setting from of data and was setting the flag as required. However the irq core should handle this automatically. Signed-off-by: Phil Reid Reviewed-by: Benjamin Tissoires Signed-off-by: Wolfram Sang --- include/linux/i2c-smbus.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index a1385023a29b..19efbd14e812 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -42,7 +42,6 @@ * properly set. */ struct i2c_smbus_alert_setup { - unsigned int alert_edge_triggered:1; int irq; }; -- cgit v1.2.3 From 69d17246ab255dda8e71c8d65396b4aa6121b7ad Mon Sep 17 00:00:00 2001 From: Phil Reid Date: Thu, 24 Aug 2017 17:31:03 +0800 Subject: i2c: i2c-smbus: add of_i2c_setup_smbus_alert This commit adds of_i2c_setup_smbus_alert which allows the smbalert driver to be attached to an i2c adapter via the device tree. Signed-off-by: Phil Reid Acked-by: Rob Herring Signed-off-by: Wolfram Sang --- include/linux/i2c-smbus.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c-smbus.h b/include/linux/i2c-smbus.h index 19efbd14e812..fb0e040b1abb 100644 --- a/include/linux/i2c-smbus.h +++ b/include/linux/i2c-smbus.h @@ -49,4 +49,13 @@ struct i2c_client *i2c_setup_smbus_alert(struct i2c_adapter *adapter, struct i2c_smbus_alert_setup *setup); int i2c_handle_smbus_alert(struct i2c_client *ara); +#if IS_ENABLED(CONFIG_I2C_SMBUS) && IS_ENABLED(CONFIG_OF) +int of_i2c_setup_smbus_alert(struct i2c_adapter *adap); +#else +static inline int of_i2c_setup_smbus_alert(struct i2c_adapter *adap) +{ + return 0; +} +#endif + #endif /* _LINUX_I2C_SMBUS_H */ -- cgit v1.2.3 From 186731145f920fb1514200043bcaf9c689693857 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 10 Sep 2017 11:44:46 +0200 Subject: hwmon: (sht15) Root out platform data After finding out there are active users of this sensor I noticed: - It has a single PXA27x board file using the platform data - The platform data is only used to carry two GPIO pins, all other fields are unused - The driver does not use GPIO descriptors but the legacy GPIO API I saw we can swiftly fix this by: - Killing off the platform data entirely - Define a GPIO descriptor lookup table in the board file - Use the standard devm_gpiod_get() to grab the GPIO descriptors from either the device tree or the board file table. This compiles, but needs testing. Cc: arm@kernel.org Cc: Marco Franchi Cc: Davide Hug Cc: Jonathan Cameron Signed-off-by: Linus Walleij Acked-by: Arnd Bergmann Tested-by: Marco Franchi Acked-by: Arnd Bergmann Signed-off-by: Guenter Roeck --- include/linux/platform_data/sht15.h | 38 ------------------------------------- 1 file changed, 38 deletions(-) delete mode 100644 include/linux/platform_data/sht15.h (limited to 'include/linux') diff --git a/include/linux/platform_data/sht15.h b/include/linux/platform_data/sht15.h deleted file mode 100644 index 12289c1e9413..000000000000 --- a/include/linux/platform_data/sht15.h +++ /dev/null @@ -1,38 +0,0 @@ -/* - * sht15.h - support for the SHT15 Temperature and Humidity Sensor - * - * Copyright (c) 2009 Jonathan Cameron - * - * Copyright (c) 2007 Wouter Horre - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * For further information, see the Documentation/hwmon/sht15 file. - */ - -#ifndef _PDATA_SHT15_H -#define _PDATA_SHT15_H - -/** - * struct sht15_platform_data - sht15 connectivity info - * @gpio_data: no. of gpio to which bidirectional data line is - * connected. - * @gpio_sck: no. of gpio to which the data clock is connected. - * @supply_mv: supply voltage in mv. Overridden by regulator if - * available. - * @checksum: flag to indicate the checksum should be validated. - * @no_otp_reload: flag to indicate no reload from OTP. - * @low_resolution: flag to indicate the temp/humidity resolution to use. - */ -struct sht15_platform_data { - int gpio_data; - int gpio_sck; - int supply_mv; - bool checksum; - bool no_otp_reload; - bool low_resolution; -}; - -#endif /* _PDATA_SHT15_H */ -- cgit v1.2.3 From ef7a612415958de1f9afd86235d38b14975d0b7c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Tue, 26 Sep 2017 01:09:05 +0200 Subject: hwmon: (gpio-fan) Localize platform data There is not a single user of the platform data header in . We can conclude that all current users are probing from the device tree, so start simplifying the code by pulling the header into the driver. Convert "unsigned" to "unsigned int" in the process to make checkpatch happy. Signed-off-by: Linus Walleij Signed-off-by: Guenter Roeck --- include/linux/gpio-fan.h | 36 ------------------------------------ 1 file changed, 36 deletions(-) delete mode 100644 include/linux/gpio-fan.h (limited to 'include/linux') diff --git a/include/linux/gpio-fan.h b/include/linux/gpio-fan.h deleted file mode 100644 index 096659169215..000000000000 --- a/include/linux/gpio-fan.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * include/linux/gpio-fan.h - * - * Platform data structure for GPIO fan driver - * - * This file is licensed under the terms of the GNU General Public - * License version 2. This program is licensed "as is" without any - * warranty of any kind, whether express or implied. - */ - -#ifndef __LINUX_GPIO_FAN_H -#define __LINUX_GPIO_FAN_H - -struct gpio_fan_alarm { - unsigned gpio; - unsigned active_low; -}; - -struct gpio_fan_speed { - int rpm; - int ctrl_val; -}; - -struct gpio_fan_platform_data { - int num_ctrl; - unsigned *ctrl; /* fan control GPIOs. */ - struct gpio_fan_alarm *alarm; /* fan alarm GPIO. */ - /* - * Speed conversion array: rpm from/to GPIO bit field. - * This array _must_ be sorted in ascending rpm order. - */ - int num_speed; - struct gpio_fan_speed *speed; -}; - -#endif /* __LINUX_GPIO_FAN_H */ -- cgit v1.2.3 From b2e63555592f81331c8da3afaa607d8cf83e8138 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 10 Sep 2017 01:30:46 +0200 Subject: i2c: gpio: Convert to use descriptors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This converts the GPIO-based I2C-driver to using GPIO descriptors instead of the old global numberspace-based GPIO interface. We: - Convert the driver to unconditionally grab two GPIOs from the device by index 0 (SDA) and 1 (SCL) which will work fine with device tree and descriptor tables. The existing device trees will continue to work just like before, but without any roundtrip through the global numberspace. - Brutally convert all boardfiles still passing global GPIOs by registering descriptor tables associated with the devices instead so this driver does not need to keep supporting passing any GPIO numbers as platform data. There is no stepwise approach as elegant as this, I strongly prefer this big hammer over any antsteps for this conversion. This way the old GPIO numbers go away and NEVER COME BACK. Special conversion for the different boards utilizing I2C-GPIO: - EP93xx (arch/arm/mach-ep93xx): pretty straight forward as all boards were using the same two GPIO lines, just define these two in a lookup table for "i2c-gpio" and register these along with the device. None of them define any other platform data so just pass NULL as platform data. This platform selects GPIOLIB so all should be smooth. The pins appear on a gpiochip for bank "G" as pins 1 (SDA) and 0 (SCL). - IXP4 (arch/arm/mach-ixp4): descriptor tables have to be registered for each board separately. They all use "IXP4XX_GPIO_CHIP" so it is pretty straight forward. Most board define no other platform data than SCL/SDA so they can drop the #include of and assign NULL to platform data. The "goramo_mlr" (Goramo Multilink Router) board is a bit worrisome: it implements its own I2C bit-banging in the board file, and optionally registers an I2C serial port, but claims the same GPIO lines for itself in the board file. This is not going to work: there will be competition for the GPIO lines, so delete the optional extra I2C bus instead, no I2C devices are registered on it anyway, there are just hints that it may contain an EEPROM that may be accessed from userspace. This needs to be fixed up properly by the serial clock using I2C emulation so drop a note in the code. - KS8695 board acs5k (arch/arm/mach-ks8695/board-acs5.c) has some platform data in addition to the pins so it needs to be kept around sans GPIO lines. Its GPIO chip is named "KS8695" and the arch selects GPIOLIB. - PXA boards (arch/arm/mach-pxa/*) use some of the platform data so it needs to be preserved here. The viper board even registers two GPIO I2Cs. The gpiochip is named "gpio-pxa" and the arch selects GPIOLIB. - SA1100 Simpad (arch/arm/mach-sa1100/simpad.c) defines a GPIO I2C bus, and the arch selects GPIOLIB. - Blackfin boards (arch/blackfin/bf533 etc) for these I assume their I2C GPIOs refer to the local gpiochip defined in arch/blackfin/kernel/bfin_gpio.c names "BFIN-GPIO". The arch selects GPIOLIB. The boards get spiked with IF_ENABLED(I2C_GPIO) but that is a side effect of it being like that already (I would just have Kconfig select I2C_GPIO and get rid of them all.) I also delete any platform data set to 0 as it will get that value anyway from static declartions of platform data. - The MIPS selects GPIOLIB and the Alchemy machine is using two local GPIO chips, one of them has a GPIO I2C. We need to adjust the local offset from the global number space here. The ATH79 has a proper GPIO driver in drivers/gpio/gpio-ath79.c and AFAICT the chip is named "ath79-gpio" and the PB44 PCF857x expander spawns from this on GPIO 1 and 0. The latter board only use the platform data to specify pins so it can be cut altogether after this. - The MFD Silicon Motion SM501 is a special case. It dynamically spawns an I2C bus off the MFD using sm501_create_subdev(). We use an approach to dynamically create a machine descriptor table and attach this to the "SM501-LOW" or "SM501-HIGH" gpiochip. We use chip-local offsets to grab the right lines. We can get rid of two local static inline helpers as part of this refactoring. Cc: Steven Miao Cc: Ralf Baechle Cc: Guenter Roeck Cc: Ville Syrjälä Cc: Magnus Damm Cc: Ben Dooks Cc: Heiko Schocher Acked-by: Wu, Aaron Acked-by: Olof Johansson Acked-by: Lee Jones Acked-by: Ralf Baechle Tested-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- include/linux/i2c-gpio.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/i2c-gpio.h b/include/linux/i2c-gpio.h index c1bcb1f1d73b..352c1426fd4d 100644 --- a/include/linux/i2c-gpio.h +++ b/include/linux/i2c-gpio.h @@ -12,8 +12,6 @@ /** * struct i2c_gpio_platform_data - Platform-dependent data for i2c-gpio - * @sda_pin: GPIO pin ID to use for SDA - * @scl_pin: GPIO pin ID to use for SCL * @udelay: signal toggle delay. SCL frequency is (500 / udelay) kHz * @timeout: clock stretching timeout in jiffies. If the slave keeps * SCL low for longer than this, the transfer will time out. @@ -26,8 +24,6 @@ * @scl_is_output_only: SCL output drivers cannot be turned off. */ struct i2c_gpio_platform_data { - unsigned int sda_pin; - unsigned int scl_pin; int udelay; int timeout; unsigned int sda_is_open_drain:1; -- cgit v1.2.3 From f926dfc112bc6cf41d7068ee5e3f261e13a5bec8 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 10 Sep 2017 19:26:22 +0200 Subject: gpio: Make it possible for consumers to enforce open drain Some busses, like I2C, strictly need to have the line handled as open drain, i.e. not actively driven high. For this reason the i2c-gpio.c bit-banged I2C driver is reimplementing open drain handling outside of gpiolib. This is not very optimal. Instead make it possible for a consumer to explcitly express that the line must be handled as open drain instead of allowing local hacks papering over this issue. The descriptor tables, whether DT, ACPI or board files, should of course have flagged these lines as open drain. E.g.: enum gpio_lookup_flags GPIO_OPEN_DRAIN for a board file, or gpios = <&foo 42 GPIO_ACTIVE_HIGH|GPIO_OPEN_DRAIN>; in a device tree using But more often than not, these descriptors are wrong. So we need to make it possible for consumers to enforce this open drain behaviour. We now have two new enumerated GPIO descriptor config flags: GPIOD_OUT_LOW_OPEN_DRAIN and GPIOD_OUT_HIGH_OPEN_DRAIN that will set up the lined enforced as open drain as output low or high, using open drain (if the driver supports it) or using open drain emulation (setting the line as input to drive it high) from the gpiolib core. Cc: linux-gpio@vger.kernel.org Tested-by: Geert Uytterhoeven Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 8f702fcbe485..5f72a49d1aa3 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -28,6 +28,7 @@ struct gpio_descs { #define GPIOD_FLAGS_BIT_DIR_SET BIT(0) #define GPIOD_FLAGS_BIT_DIR_OUT BIT(1) #define GPIOD_FLAGS_BIT_DIR_VAL BIT(2) +#define GPIOD_FLAGS_BIT_OPEN_DRAIN BIT(3) /** * Optional flags that can be passed to one of gpiod_* to configure direction @@ -39,6 +40,11 @@ enum gpiod_flags { GPIOD_OUT_LOW = GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT, GPIOD_OUT_HIGH = GPIOD_FLAGS_BIT_DIR_SET | GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_DIR_VAL, + GPIOD_OUT_LOW_OPEN_DRAIN = GPIOD_FLAGS_BIT_DIR_SET | + GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_OPEN_DRAIN, + GPIOD_OUT_HIGH_OPEN_DRAIN = GPIOD_FLAGS_BIT_DIR_SET | + GPIOD_FLAGS_BIT_DIR_OUT | GPIOD_FLAGS_BIT_DIR_VAL | + GPIOD_FLAGS_BIT_OPEN_DRAIN, }; #ifdef CONFIG_GPIOLIB -- cgit v1.2.3 From 79ea73b05aaa83b7451a3aee48a5e835fb0f3864 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Fri, 15 Sep 2017 20:13:28 +0200 Subject: mmc: sdhci-pci: remove outdated declaration The function was removed half a year ago, so this declaration can go, too. Fixes: 51ced59cc02e0d ("mmc: sdhci-pci: Use ACPI DSM to get driver strength for some Intel devices") Signed-off-by: Wolfram Sang Acked-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/sdhci-pci-data.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci-pci-data.h b/include/linux/mmc/sdhci-pci-data.h index fda15b6d4135..618f90d6e1ba 100644 --- a/include/linux/mmc/sdhci-pci-data.h +++ b/include/linux/mmc/sdhci-pci-data.h @@ -14,7 +14,4 @@ struct sdhci_pci_data { extern struct sdhci_pci_data *(*sdhci_pci_get_data)(struct pci_dev *pdev, int slotno); - -extern int sdhci_pci_spt_drive_strength; - #endif -- cgit v1.2.3 From 6c0cedd1ef9527ef13e66875746570e76a3188a7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 22 Sep 2017 15:36:51 +0300 Subject: mmc: core: Introduce host claiming by context Currently the host can be claimed by a task. Change this so that the host can be claimed by a context that may or may not be a task. This provides for the host to be claimed by a block driver queue to support blk-mq, while maintaining compatibility with the existing use of mmc_claim_host(). Signed-off-by: Adrian Hunter Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 9a43763a68ad..443f7a8cdfe5 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -255,6 +255,10 @@ struct mmc_supply { struct regulator *vqmmc; /* Optional Vccq supply */ }; +struct mmc_ctx { + struct task_struct *task; +}; + struct mmc_host { struct device *parent; struct device class_dev; @@ -388,8 +392,9 @@ struct mmc_host { struct mmc_card *card; /* device attached to this host */ wait_queue_head_t wq; - struct task_struct *claimer; /* task that has host claimed */ + struct mmc_ctx *claimer; /* context that has host claimed */ int claim_cnt; /* "claim" nesting count */ + struct mmc_ctx default_ctx; /* default context */ struct delayed_work detect; int detect_change; /* card detect flag */ -- cgit v1.2.3 From 72a5af554df837e373efb0d6c8fc68c568f9a7ac Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Fri, 22 Sep 2017 15:36:52 +0300 Subject: mmc: core: Add support for handling CQE requests Add core support for handling CQE requests, including starting, completing and recovering. Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 443f7a8cdfe5..c296f4351c1d 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -474,6 +474,8 @@ void mmc_detect_change(struct mmc_host *, unsigned long delay); void mmc_request_done(struct mmc_host *, struct mmc_request *); void mmc_command_done(struct mmc_host *host, struct mmc_request *mrq); +void mmc_cqe_request_done(struct mmc_host *host, struct mmc_request *mrq); + static inline void mmc_signal_sdio_irq(struct mmc_host *host) { host->ops->enable_sdio_irq(host, 0); -- cgit v1.2.3 From 563be8b60324f8947e9c71ec81a8cb67ea2f2127 Mon Sep 17 00:00:00 2001 From: rui_feng Date: Fri, 22 Sep 2017 16:07:35 +0800 Subject: mmc: rtsx: fix tuning fail on gen3 PCI-Express On gen3 PCI-Express we should send command one by one. If sending many commands in one packet will lead to a failure. Signed-off-by: rui_feng Signed-off-by: Ulf Hansson --- include/linux/mfd/rtsx_pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 116816fb9110..7815d8db7eca 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -334,6 +334,7 @@ #define DCM_DRP_RD_DATA_H 0xFC29 #define SD_VPCLK0_CTL 0xFC2A #define SD_VPCLK1_CTL 0xFC2B +#define PHASE_SELECT_MASK 0x1F #define SD_DCMPS0_CTL 0xFC2C #define SD_DCMPS1_CTL 0xFC2D #define SD_VPTX_CTL SD_VPCLK0_CTL -- cgit v1.2.3 From 6186d06c519e217351fac95b545f334f8582af90 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sun, 15 Oct 2017 14:46:14 +0200 Subject: mmc: parse new binding for eMMC fixed driver type Parse the new binding and store it in the host struct after doing some sanity checks. The code is designed to support fixed SD driver type if we ever need that. Signed-off-by: Wolfram Sang Reviewed-by: Simon Horman Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index c296f4351c1d..e7743eca1021 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -354,6 +354,8 @@ struct mmc_host { #define MMC_CAP2_CQE (1 << 23) /* Has eMMC command queue engine */ #define MMC_CAP2_CQE_DCMD (1 << 24) /* CQE can issue a direct command */ + int fixed_drv_type; /* fixed driver type for non-removable media */ + mmc_pm_flag_t pm_caps; /* supported pm features */ /* host specific block data */ -- cgit v1.2.3 From 0f295b0650c90362b4111f46d7f9149a0a4191be Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 13 Oct 2017 11:54:33 -0600 Subject: rtc: Allow rtc drivers to specify the tv_nsec value for ntp ntp is currently hardwired to try and call the rtc set when wall clock tv_nsec is 0.5 seconds. This historical behaviour works well with certain PC RTCs, but is not universal to all rtc hardware. Change how this works by introducing the driver specific concept of set_offset_nsec, the delay between current wall clock time and the target time to set (with a 0 tv_nsecs). For x86-style CMOS set_offset_nsec should be -0.5 s which causes the last second to be written 0.5 s after it has started. For compat with the old rtc_set_ntp_time, the value is defaulted to + 0.5 s, which causes the next second to be written 0.5s before it starts, as things were before this patch. Testing shows many non-x86 RTCs would like set_offset_nsec ~= 0, so ultimately each RTC driver should set the set_offset_nsec according to its needs, and non x86 architectures should stop using update_persistent_clock64 in order to access this feature. Future patches will revise the drivers as needed. Since CMOS and RTC now have very different handling they are split into two dedicated code paths, sharing the support code, and ifdefs are replaced with IS_ENABLED. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Jason Gunthorpe Signed-off-by: John Stultz --- include/linux/rtc.h | 43 ++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 42 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rtc.h b/include/linux/rtc.h index e6d0f9c1cafd..5b13fa029fd6 100644 --- a/include/linux/rtc.h +++ b/include/linux/rtc.h @@ -135,6 +135,14 @@ struct rtc_device { /* Some hardware can't support UIE mode */ int uie_unsupported; + /* Number of nsec it takes to set the RTC clock. This influences when + * the set ops are called. An offset: + * - of 0.5 s will call RTC set for wall clock time 10.0 s at 9.5 s + * - of 1.5 s will call RTC set for wall clock time 10.0 s at 8.5 s + * - of -0.5 s will call RTC set for wall clock time 10.0 s at 10.5 s + */ + long set_offset_nsec; + bool registered; struct nvmem_config *nvmem_config; @@ -172,7 +180,7 @@ extern void devm_rtc_device_unregister(struct device *dev, extern int rtc_read_time(struct rtc_device *rtc, struct rtc_time *tm); extern int rtc_set_time(struct rtc_device *rtc, struct rtc_time *tm); -extern int rtc_set_ntp_time(struct timespec64 now); +extern int rtc_set_ntp_time(struct timespec64 now, unsigned long *target_nsec); int __rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alarm); extern int rtc_read_alarm(struct rtc_device *rtc, struct rtc_wkalrm *alrm); @@ -221,6 +229,39 @@ static inline bool is_leap_year(unsigned int year) return (!(year % 4) && (year % 100)) || !(year % 400); } +/* Determine if we can call to driver to set the time. Drivers can only be + * called to set a second aligned time value, and the field set_offset_nsec + * specifies how far away from the second aligned time to call the driver. + * + * This also computes 'to_set' which is the time we are trying to set, and has + * a zero in tv_nsecs, such that: + * to_set - set_delay_nsec == now +/- FUZZ + * + */ +static inline bool rtc_tv_nsec_ok(s64 set_offset_nsec, + struct timespec64 *to_set, + const struct timespec64 *now) +{ + /* Allowed error in tv_nsec, arbitarily set to 5 jiffies in ns. */ + const unsigned long TIME_SET_NSEC_FUZZ = TICK_NSEC * 5; + struct timespec64 delay = {.tv_sec = 0, + .tv_nsec = set_offset_nsec}; + + *to_set = timespec64_add(*now, delay); + + if (to_set->tv_nsec < TIME_SET_NSEC_FUZZ) { + to_set->tv_nsec = 0; + return true; + } + + if (to_set->tv_nsec > NSEC_PER_SEC - TIME_SET_NSEC_FUZZ) { + to_set->tv_sec++; + to_set->tv_nsec = 0; + return true; + } + return false; +} + #define rtc_register_device(device) \ __rtc_register_device(THIS_MODULE, device) -- cgit v1.2.3 From e0956dcc4ba74ec4b17e32fc9a156fcba1ef6610 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 13:14:44 +0200 Subject: timekeeping: Consolidate timekeeping_inject_offset code The code to check the adjtimex() or clock_adjtime() arguments is spread out across multiple files for presumably only historic reasons. As a preparatation for a rework to get rid of the use of 'struct timeval' and 'struct timespec' in there, this moves all the portions into kernel/time/timekeeping.c and marks them as 'static'. The warp_clock() function here is not as closely related as the others, but I feel it still makes sense to move it here in order to consolidate all callers of timekeeping_inject_offset(). Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Arnd Bergmann [jstultz: Whitespace fixup] Signed-off-by: John Stultz --- include/linux/time.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 9bc1f945777c..c0fbad08448f 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -134,32 +134,6 @@ static inline bool timeval_valid(const struct timeval *tv) extern struct timespec timespec_trunc(struct timespec t, unsigned gran); -/* - * Validates if a timespec/timeval used to inject a time offset is valid. - * Offsets can be postive or negative. The value of the timeval/timespec - * is the sum of its fields, but *NOTE*: the field tv_usec/tv_nsec must - * always be non-negative. - */ -static inline bool timeval_inject_offset_valid(const struct timeval *tv) -{ - /* We don't check the tv_sec as it can be positive or negative */ - - /* Can't have more microseconds then a second */ - if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) - return false; - return true; -} - -static inline bool timespec_inject_offset_valid(const struct timespec *ts) -{ - /* We don't check the tv_sec as it can be positive or negative */ - - /* Can't have more nanoseconds then a second */ - if (ts->tv_nsec < 0 || ts->tv_nsec >= NSEC_PER_SEC) - return false; - return true; -} - /* Some architectures do not supply their own clocksource. * This is mainly the case in architectures that get their * inter-tick times by reading the counter on their interval -- cgit v1.2.3 From 85bf19e7df2479140eff2348a4e6a9c19b5c3960 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 13:14:46 +0200 Subject: time: Remove unused functions The (slow but) ongoing work on conversion from timespec to timespec64 has led some timespec based helper functions to become unused. No new code should use them, so we can remove the functions entirely. I'm planning to obsolete additional interfaces next and remove more of these. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/time.h | 18 ------------------ include/linux/time64.h | 28 ---------------------------- 2 files changed, 46 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index c0fbad08448f..0e8a80918484 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -39,15 +39,6 @@ static inline int timespec_compare(const struct timespec *lhs, const struct time return lhs->tv_nsec - rhs->tv_nsec; } -static inline int timeval_compare(const struct timeval *lhs, const struct timeval *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_usec - rhs->tv_usec; -} - extern time64_t mktime64(const unsigned int year, const unsigned int mon, const unsigned int day, const unsigned int hour, const unsigned int min, const unsigned int sec); @@ -65,15 +56,6 @@ static inline unsigned long mktime(const unsigned int year, extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec); -/* - * timespec_add_safe assumes both values are positive and checks - * for overflow. It will return TIME_T_MAX if the reutrn would be - * smaller then either of the arguments. - */ -extern struct timespec timespec_add_safe(const struct timespec lhs, - const struct timespec rhs); - - static inline struct timespec timespec_add(struct timespec lhs, struct timespec rhs) { diff --git a/include/linux/time64.h b/include/linux/time64.h index 980c71b3001a..402b595c76d2 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -53,16 +53,6 @@ static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) return ts; } -static inline struct itimerspec itimerspec64_to_itimerspec(struct itimerspec64 *its64) -{ - return *its64; -} - -static inline struct itimerspec64 itimerspec_to_itimerspec64(struct itimerspec *its) -{ - return *its; -} - # define timespec64_equal timespec_equal # define timespec64_compare timespec_compare # define set_normalized_timespec64 set_normalized_timespec @@ -94,24 +84,6 @@ static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) return ret; } -static inline struct itimerspec itimerspec64_to_itimerspec(struct itimerspec64 *its64) -{ - struct itimerspec ret; - - ret.it_interval = timespec64_to_timespec(its64->it_interval); - ret.it_value = timespec64_to_timespec(its64->it_value); - return ret; -} - -static inline struct itimerspec64 itimerspec_to_itimerspec64(struct itimerspec *its) -{ - struct itimerspec64 ret; - - ret.it_interval = timespec_to_timespec64(its->it_interval); - ret.it_value = timespec_to_timespec64(its->it_value); - return ret; -} - static inline int timespec64_equal(const struct timespec64 *a, const struct timespec64 *b) { -- cgit v1.2.3 From 5dbf20127f8cca8588ad0b0e3e8ded587ac7afa0 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 13:14:47 +0200 Subject: time: Move time_t based interfaces to time32.h Interfaces based on 'struct timespec' or 'struct timeval' should no longer be used for new code, which can use either ktime_t or 'struct timespec64' instead. To make this a little clearer, this moves the various helpers into a new time32.h header. For the moment, this gets included by the normal time.h, but we may be able to separate it entirely when most users of time32.h are gone. Individual helpers in the new file can get removed once they become unused in the future. Since the contents of time32.h look a lot like what's in time64.h, I'm reordering them during the move to make them more similar, and to allow a follow-up patch to redirect the 'timespec' based functions to thei 'timespec64' based counterparts on 64-bit architectures later. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Arnd Bergmann [jstultz: Whitespace & checkpatch fixups] Signed-off-by: John Stultz --- include/linux/time.h | 163 +-------------------------------------------- include/linux/time32.h | 176 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 177 insertions(+), 162 deletions(-) create mode 100644 include/linux/time32.h (limited to 'include/linux') diff --git a/include/linux/time.h b/include/linux/time.h index 0e8a80918484..c375f54a678d 100644 --- a/include/linux/time.h +++ b/include/linux/time.h @@ -17,105 +17,10 @@ int get_itimerspec64(struct itimerspec64 *it, int put_itimerspec64(const struct itimerspec64 *it, struct itimerspec __user *uit); -#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) - -static inline int timespec_equal(const struct timespec *a, - const struct timespec *b) -{ - return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); -} - -/* - * lhs < rhs: return <0 - * lhs == rhs: return 0 - * lhs > rhs: return >0 - */ -static inline int timespec_compare(const struct timespec *lhs, const struct timespec *rhs) -{ - if (lhs->tv_sec < rhs->tv_sec) - return -1; - if (lhs->tv_sec > rhs->tv_sec) - return 1; - return lhs->tv_nsec - rhs->tv_nsec; -} - extern time64_t mktime64(const unsigned int year, const unsigned int mon, const unsigned int day, const unsigned int hour, const unsigned int min, const unsigned int sec); -/** - * Deprecated. Use mktime64(). - */ -static inline unsigned long mktime(const unsigned int year, - const unsigned int mon, const unsigned int day, - const unsigned int hour, const unsigned int min, - const unsigned int sec) -{ - return mktime64(year, mon, day, hour, min, sec); -} - -extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec); - -static inline struct timespec timespec_add(struct timespec lhs, - struct timespec rhs) -{ - struct timespec ts_delta; - set_normalized_timespec(&ts_delta, lhs.tv_sec + rhs.tv_sec, - lhs.tv_nsec + rhs.tv_nsec); - return ts_delta; -} - -/* - * sub = lhs - rhs, in normalized form - */ -static inline struct timespec timespec_sub(struct timespec lhs, - struct timespec rhs) -{ - struct timespec ts_delta; - set_normalized_timespec(&ts_delta, lhs.tv_sec - rhs.tv_sec, - lhs.tv_nsec - rhs.tv_nsec); - return ts_delta; -} - -/* - * Returns true if the timespec is norm, false if denorm: - */ -static inline bool timespec_valid(const struct timespec *ts) -{ - /* Dates before 1970 are bogus */ - if (ts->tv_sec < 0) - return false; - /* Can't have more nanoseconds then a second */ - if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) - return false; - return true; -} - -static inline bool timespec_valid_strict(const struct timespec *ts) -{ - if (!timespec_valid(ts)) - return false; - /* Disallow values that could overflow ktime_t */ - if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) - return false; - return true; -} - -static inline bool timeval_valid(const struct timeval *tv) -{ - /* Dates before 1970 are bogus */ - if (tv->tv_sec < 0) - return false; - - /* Can't have more microseconds then a second */ - if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) - return false; - - return true; -} - -extern struct timespec timespec_trunc(struct timespec t, unsigned gran); - /* Some architectures do not supply their own clocksource. * This is mainly the case in architectures that get their * inter-tick times by reading the counter on their interval @@ -164,73 +69,7 @@ struct tm { void time64_to_tm(time64_t totalsecs, int offset, struct tm *result); -/** - * time_to_tm - converts the calendar time to local broken-down time - * - * @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970, - * Coordinated Universal Time (UTC). - * @offset offset seconds adding to totalsecs. - * @result pointer to struct tm variable to receive broken-down time - */ -static inline void time_to_tm(time_t totalsecs, int offset, struct tm *result) -{ - time64_to_tm(totalsecs, offset, result); -} - -/** - * timespec_to_ns - Convert timespec to nanoseconds - * @ts: pointer to the timespec variable to be converted - * - * Returns the scalar nanosecond representation of the timespec - * parameter. - */ -static inline s64 timespec_to_ns(const struct timespec *ts) -{ - return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; -} - -/** - * timeval_to_ns - Convert timeval to nanoseconds - * @ts: pointer to the timeval variable to be converted - * - * Returns the scalar nanosecond representation of the timeval - * parameter. - */ -static inline s64 timeval_to_ns(const struct timeval *tv) -{ - return ((s64) tv->tv_sec * NSEC_PER_SEC) + - tv->tv_usec * NSEC_PER_USEC; -} - -/** - * ns_to_timespec - Convert nanoseconds to timespec - * @nsec: the nanoseconds value to be converted - * - * Returns the timespec representation of the nsec parameter. - */ -extern struct timespec ns_to_timespec(const s64 nsec); - -/** - * ns_to_timeval - Convert nanoseconds to timeval - * @nsec: the nanoseconds value to be converted - * - * Returns the timeval representation of the nsec parameter. - */ -extern struct timeval ns_to_timeval(const s64 nsec); - -/** - * timespec_add_ns - Adds nanoseconds to a timespec - * @a: pointer to timespec to be incremented - * @ns: unsigned nanoseconds value to be added - * - * This must always be inlined because its used from the x86-64 vdso, - * which cannot call other kernel functions. - */ -static __always_inline void timespec_add_ns(struct timespec *a, u64 ns) -{ - a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); - a->tv_nsec = ns; -} +# include static inline bool itimerspec64_valid(const struct itimerspec64 *its) { diff --git a/include/linux/time32.h b/include/linux/time32.h new file mode 100644 index 000000000000..9b9c43f0d39b --- /dev/null +++ b/include/linux/time32.h @@ -0,0 +1,176 @@ +#ifndef _LINUX_TIME32_H +#define _LINUX_TIME32_H +/* + * These are all interfaces based on the old time_t definition + * that overflows in 2038 on 32-bit architectures. New code + * should use the replacements based on time64_t and timespec64. + * + * Any interfaces in here that become unused as we migrate + * code to time64_t should get removed. + */ + +#include + +#define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) + +static inline int timespec_equal(const struct timespec *a, + const struct timespec *b) +{ + return (a->tv_sec == b->tv_sec) && (a->tv_nsec == b->tv_nsec); +} + +/* + * lhs < rhs: return <0 + * lhs == rhs: return 0 + * lhs > rhs: return >0 + */ +static inline int timespec_compare(const struct timespec *lhs, const struct timespec *rhs) +{ + if (lhs->tv_sec < rhs->tv_sec) + return -1; + if (lhs->tv_sec > rhs->tv_sec) + return 1; + return lhs->tv_nsec - rhs->tv_nsec; +} + +extern void set_normalized_timespec(struct timespec *ts, time_t sec, s64 nsec); + +static inline struct timespec timespec_add(struct timespec lhs, + struct timespec rhs) +{ + struct timespec ts_delta; + + set_normalized_timespec(&ts_delta, lhs.tv_sec + rhs.tv_sec, + lhs.tv_nsec + rhs.tv_nsec); + return ts_delta; +} + +/* + * sub = lhs - rhs, in normalized form + */ +static inline struct timespec timespec_sub(struct timespec lhs, + struct timespec rhs) +{ + struct timespec ts_delta; + + set_normalized_timespec(&ts_delta, lhs.tv_sec - rhs.tv_sec, + lhs.tv_nsec - rhs.tv_nsec); + return ts_delta; +} + +/* + * Returns true if the timespec is norm, false if denorm: + */ +static inline bool timespec_valid(const struct timespec *ts) +{ + /* Dates before 1970 are bogus */ + if (ts->tv_sec < 0) + return false; + /* Can't have more nanoseconds then a second */ + if ((unsigned long)ts->tv_nsec >= NSEC_PER_SEC) + return false; + return true; +} + +static inline bool timespec_valid_strict(const struct timespec *ts) +{ + if (!timespec_valid(ts)) + return false; + /* Disallow values that could overflow ktime_t */ + if ((unsigned long long)ts->tv_sec >= KTIME_SEC_MAX) + return false; + return true; +} + +/** + * timespec_to_ns - Convert timespec to nanoseconds + * @ts: pointer to the timespec variable to be converted + * + * Returns the scalar nanosecond representation of the timespec + * parameter. + */ +static inline s64 timespec_to_ns(const struct timespec *ts) +{ + return ((s64) ts->tv_sec * NSEC_PER_SEC) + ts->tv_nsec; +} + +/** + * ns_to_timespec - Convert nanoseconds to timespec + * @nsec: the nanoseconds value to be converted + * + * Returns the timespec representation of the nsec parameter. + */ +extern struct timespec ns_to_timespec(const s64 nsec); + +/** + * timespec_add_ns - Adds nanoseconds to a timespec + * @a: pointer to timespec to be incremented + * @ns: unsigned nanoseconds value to be added + * + * This must always be inlined because its used from the x86-64 vdso, + * which cannot call other kernel functions. + */ +static __always_inline void timespec_add_ns(struct timespec *a, u64 ns) +{ + a->tv_sec += __iter_div_u64_rem(a->tv_nsec + ns, NSEC_PER_SEC, &ns); + a->tv_nsec = ns; +} + +/** + * time_to_tm - converts the calendar time to local broken-down time + * + * @totalsecs the number of seconds elapsed since 00:00:00 on January 1, 1970, + * Coordinated Universal Time (UTC). + * @offset offset seconds adding to totalsecs. + * @result pointer to struct tm variable to receive broken-down time + */ +static inline void time_to_tm(time_t totalsecs, int offset, struct tm *result) +{ + time64_to_tm(totalsecs, offset, result); +} + +static inline unsigned long mktime(const unsigned int year, + const unsigned int mon, const unsigned int day, + const unsigned int hour, const unsigned int min, + const unsigned int sec) +{ + return mktime64(year, mon, day, hour, min, sec); +} + +static inline bool timeval_valid(const struct timeval *tv) +{ + /* Dates before 1970 are bogus */ + if (tv->tv_sec < 0) + return false; + + /* Can't have more microseconds then a second */ + if (tv->tv_usec < 0 || tv->tv_usec >= USEC_PER_SEC) + return false; + + return true; +} + +extern struct timespec timespec_trunc(struct timespec t, unsigned int gran); + +/** + * timeval_to_ns - Convert timeval to nanoseconds + * @ts: pointer to the timeval variable to be converted + * + * Returns the scalar nanosecond representation of the timeval + * parameter. + */ +static inline s64 timeval_to_ns(const struct timeval *tv) +{ + return ((s64) tv->tv_sec * NSEC_PER_SEC) + + tv->tv_usec * NSEC_PER_USEC; +} + +/** + * ns_to_timeval - Convert nanoseconds to timeval + * @nsec: the nanoseconds value to be converted + * + * Returns the timeval representation of the nsec parameter. + */ +extern struct timeval ns_to_timeval(const s64 nsec); + +#endif -- cgit v1.2.3 From abc8f96e3eb846fcf6333395ee1f6ed4a734576c Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 13:14:48 +0200 Subject: time: Move time_t conversion helpers to time32.h On 64-bit architectures, the timespec64 based helpers in linux/time.h are defined as macros pointing to their timespec based counterparts. This made sense when they were first introduced, but as we are migrating away from timespec in general, it's much less intuitive now. This changes the macros to work in the exact opposite way: we always provide the timespec64 based helpers and define the old interfaces as macros for them. Now we can move those macros into linux/time32.h, which already contains the respective helpers for 32-bit architectures. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/time32.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ include/linux/time64.h | 50 +------------------------------------------------- 2 files changed, 46 insertions(+), 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/time32.h b/include/linux/time32.h index 9b9c43f0d39b..65b1de25198d 100644 --- a/include/linux/time32.h +++ b/include/linux/time32.h @@ -13,6 +13,49 @@ #define TIME_T_MAX (time_t)((1UL << ((sizeof(time_t) << 3) - 1)) - 1) +#if __BITS_PER_LONG == 64 + +/* timespec64 is defined as timespec here */ +static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) +{ + return ts64; +} + +static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) +{ + return ts; +} + +# define timespec_equal timespec64_equal +# define timespec_compare timespec64_compare +# define set_normalized_timespec set_normalized_timespec64 +# define timespec_add timespec64_add +# define timespec_sub timespec64_sub +# define timespec_valid timespec64_valid +# define timespec_valid_strict timespec64_valid_strict +# define timespec_to_ns timespec64_to_ns +# define ns_to_timespec ns_to_timespec64 +# define timespec_add_ns timespec64_add_ns + +#else +static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) +{ + struct timespec ret; + + ret.tv_sec = (time_t)ts64.tv_sec; + ret.tv_nsec = ts64.tv_nsec; + return ret; +} + +static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) +{ + struct timespec64 ret; + + ret.tv_sec = ts.tv_sec; + ret.tv_nsec = ts.tv_nsec; + return ret; +} + static inline int timespec_equal(const struct timespec *a, const struct timespec *b) { @@ -116,6 +159,8 @@ static __always_inline void timespec_add_ns(struct timespec *a, u64 ns) a->tv_nsec = ns; } +#endif + /** * time_to_tm - converts the calendar time to local broken-down time * diff --git a/include/linux/time64.h b/include/linux/time64.h index 402b595c76d2..ec1888cf5378 100644 --- a/include/linux/time64.h +++ b/include/linux/time64.h @@ -7,11 +7,8 @@ typedef __s64 time64_t; typedef __u64 timeu64_t; -/* - * This wants to go into uapi/linux/time.h once we agreed about the - * userspace interfaces. - */ #if __BITS_PER_LONG == 64 +/* this trick allows us to optimize out timespec64_to_timespec */ # define timespec64 timespec #define itimerspec64 itimerspec #else @@ -41,49 +38,6 @@ struct itimerspec64 { #define KTIME_MAX ((s64)~((u64)1 << 63)) #define KTIME_SEC_MAX (KTIME_MAX / NSEC_PER_SEC) -#if __BITS_PER_LONG == 64 - -static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) -{ - return ts64; -} - -static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) -{ - return ts; -} - -# define timespec64_equal timespec_equal -# define timespec64_compare timespec_compare -# define set_normalized_timespec64 set_normalized_timespec -# define timespec64_add timespec_add -# define timespec64_sub timespec_sub -# define timespec64_valid timespec_valid -# define timespec64_valid_strict timespec_valid_strict -# define timespec64_to_ns timespec_to_ns -# define ns_to_timespec64 ns_to_timespec -# define timespec64_add_ns timespec_add_ns - -#else - -static inline struct timespec timespec64_to_timespec(const struct timespec64 ts64) -{ - struct timespec ret; - - ret.tv_sec = (time_t)ts64.tv_sec; - ret.tv_nsec = ts64.tv_nsec; - return ret; -} - -static inline struct timespec64 timespec_to_timespec64(const struct timespec ts) -{ - struct timespec64 ret; - - ret.tv_sec = ts.tv_sec; - ret.tv_nsec = ts.tv_nsec; - return ret; -} - static inline int timespec64_equal(const struct timespec64 *a, const struct timespec64 *b) { @@ -185,8 +139,6 @@ static __always_inline void timespec64_add_ns(struct timespec64 *a, u64 ns) a->tv_nsec = ns; } -#endif - /* * timespec64_add_safe assumes both values are positive and checks for * overflow. It will return TIME64_MAX in case of overflow. -- cgit v1.2.3 From 6546911ed369af8d747215ff8b6144618e91c6ab Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 19 Oct 2017 13:14:49 +0200 Subject: time: Move old timekeeping interfaces to timekeeping32.h The interfaces based on 'struct timespec' and 'unsigned long' seconds are no longer recommended for new code, and we are trying to migrate to ktime_t based interfaces and other y2038-safe variants. This moves all the legacy interfaces from linux/timekeeping.h into a new timekeeping32.h to better document this. Cc: Thomas Gleixner Cc: Ingo Molnar Cc: Miroslav Lichvar Cc: Richard Cochran Cc: Prarit Bhargava Cc: Stephen Boyd Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/ktime.h | 1 + include/linux/timekeeping.h | 137 +------------------------------------- include/linux/timekeeping32.h | 151 ++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 154 insertions(+), 135 deletions(-) create mode 100644 include/linux/timekeeping32.h (limited to 'include/linux') diff --git a/include/linux/ktime.h b/include/linux/ktime.h index 0c8bd45c8206..5b9fddbaac41 100644 --- a/include/linux/ktime.h +++ b/include/linux/ktime.h @@ -270,5 +270,6 @@ static inline ktime_t ms_to_ktime(u64 ms) } # include +# include #endif diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index ddc229ff6d1e..405beea4e71b 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -15,27 +15,16 @@ extern void xtime_update(unsigned long ticks); /* * Get and set timeofday */ -extern void do_gettimeofday(struct timeval *tv); extern int do_settimeofday64(const struct timespec64 *ts); extern int do_sys_settimeofday64(const struct timespec64 *tv, const struct timezone *tz); /* * Kernel time accessors */ -unsigned long get_seconds(void); struct timespec64 current_kernel_time64(void); -/* does not take xtime_lock */ -struct timespec __current_kernel_time(void); - -static inline struct timespec current_kernel_time(void) -{ - struct timespec64 now = current_kernel_time64(); - - return timespec64_to_timespec(now); -} /* - * timespec based interfaces + * timespec64 based interfaces */ struct timespec64 get_monotonic_coarse64(void); extern void getrawmonotonic64(struct timespec64 *ts); @@ -47,116 +36,6 @@ extern int __getnstimeofday64(struct timespec64 *tv); extern void getnstimeofday64(struct timespec64 *tv); extern void getboottime64(struct timespec64 *ts); -#if BITS_PER_LONG == 64 -/** - * Deprecated. Use do_settimeofday64(). - */ -static inline int do_settimeofday(const struct timespec *ts) -{ - return do_settimeofday64(ts); -} - -static inline int __getnstimeofday(struct timespec *ts) -{ - return __getnstimeofday64(ts); -} - -static inline void getnstimeofday(struct timespec *ts) -{ - getnstimeofday64(ts); -} - -static inline void ktime_get_ts(struct timespec *ts) -{ - ktime_get_ts64(ts); -} - -static inline void ktime_get_real_ts(struct timespec *ts) -{ - getnstimeofday64(ts); -} - -static inline void getrawmonotonic(struct timespec *ts) -{ - getrawmonotonic64(ts); -} - -static inline struct timespec get_monotonic_coarse(void) -{ - return get_monotonic_coarse64(); -} - -static inline void getboottime(struct timespec *ts) -{ - return getboottime64(ts); -} -#else -/** - * Deprecated. Use do_settimeofday64(). - */ -static inline int do_settimeofday(const struct timespec *ts) -{ - struct timespec64 ts64; - - ts64 = timespec_to_timespec64(*ts); - return do_settimeofday64(&ts64); -} - -static inline int __getnstimeofday(struct timespec *ts) -{ - struct timespec64 ts64; - int ret = __getnstimeofday64(&ts64); - - *ts = timespec64_to_timespec(ts64); - return ret; -} - -static inline void getnstimeofday(struct timespec *ts) -{ - struct timespec64 ts64; - - getnstimeofday64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void ktime_get_ts(struct timespec *ts) -{ - struct timespec64 ts64; - - ktime_get_ts64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void ktime_get_real_ts(struct timespec *ts) -{ - struct timespec64 ts64; - - getnstimeofday64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline void getrawmonotonic(struct timespec *ts) -{ - struct timespec64 ts64; - - getrawmonotonic64(&ts64); - *ts = timespec64_to_timespec(ts64); -} - -static inline struct timespec get_monotonic_coarse(void) -{ - return timespec64_to_timespec(get_monotonic_coarse64()); -} - -static inline void getboottime(struct timespec *ts) -{ - struct timespec64 ts64; - - getboottime64(&ts64); - *ts = timespec64_to_timespec(ts64); -} -#endif - #define ktime_get_real_ts64(ts) getnstimeofday64(ts) /* @@ -241,23 +120,13 @@ extern u64 ktime_get_raw_fast_ns(void); extern u64 ktime_get_boot_fast_ns(void); /* - * Timespec interfaces utilizing the ktime based ones + * timespec64 interfaces utilizing the ktime based ones */ -static inline void get_monotonic_boottime(struct timespec *ts) -{ - *ts = ktime_to_timespec(ktime_get_boottime()); -} - static inline void get_monotonic_boottime64(struct timespec64 *ts) { *ts = ktime_to_timespec64(ktime_get_boottime()); } -static inline void timekeeping_clocktai(struct timespec *ts) -{ - *ts = ktime_to_timespec(ktime_get_clocktai()); -} - static inline void timekeeping_clocktai64(struct timespec64 *ts) { *ts = ktime_to_timespec64(ktime_get_clocktai()); @@ -340,10 +209,8 @@ extern void ktime_get_snapshot(struct system_time_snapshot *systime_snapshot); */ extern int persistent_clock_is_local; -extern void read_persistent_clock(struct timespec *ts); extern void read_persistent_clock64(struct timespec64 *ts); extern void read_boot_clock64(struct timespec64 *ts); -extern int update_persistent_clock(struct timespec now); extern int update_persistent_clock64(struct timespec64 now); diff --git a/include/linux/timekeeping32.h b/include/linux/timekeeping32.h new file mode 100644 index 000000000000..af4114d5dc17 --- /dev/null +++ b/include/linux/timekeeping32.h @@ -0,0 +1,151 @@ +#ifndef _LINUX_TIMEKEEPING32_H +#define _LINUX_TIMEKEEPING32_H +/* + * These interfaces are all based on the old timespec type + * and should get replaced with the timespec64 based versions + * over time so we can remove the file here. + */ + +extern void do_gettimeofday(struct timeval *tv); +unsigned long get_seconds(void); + +/* does not take xtime_lock */ +struct timespec __current_kernel_time(void); + +static inline struct timespec current_kernel_time(void) +{ + struct timespec64 now = current_kernel_time64(); + + return timespec64_to_timespec(now); +} + +#if BITS_PER_LONG == 64 +/** + * Deprecated. Use do_settimeofday64(). + */ +static inline int do_settimeofday(const struct timespec *ts) +{ + return do_settimeofday64(ts); +} + +static inline int __getnstimeofday(struct timespec *ts) +{ + return __getnstimeofday64(ts); +} + +static inline void getnstimeofday(struct timespec *ts) +{ + getnstimeofday64(ts); +} + +static inline void ktime_get_ts(struct timespec *ts) +{ + ktime_get_ts64(ts); +} + +static inline void ktime_get_real_ts(struct timespec *ts) +{ + getnstimeofday64(ts); +} + +static inline void getrawmonotonic(struct timespec *ts) +{ + getrawmonotonic64(ts); +} + +static inline struct timespec get_monotonic_coarse(void) +{ + return get_monotonic_coarse64(); +} + +static inline void getboottime(struct timespec *ts) +{ + return getboottime64(ts); +} +#else +/** + * Deprecated. Use do_settimeofday64(). + */ +static inline int do_settimeofday(const struct timespec *ts) +{ + struct timespec64 ts64; + + ts64 = timespec_to_timespec64(*ts); + return do_settimeofday64(&ts64); +} + +static inline int __getnstimeofday(struct timespec *ts) +{ + struct timespec64 ts64; + int ret = __getnstimeofday64(&ts64); + + *ts = timespec64_to_timespec(ts64); + return ret; +} + +static inline void getnstimeofday(struct timespec *ts) +{ + struct timespec64 ts64; + + getnstimeofday64(&ts64); + *ts = timespec64_to_timespec(ts64); +} + +static inline void ktime_get_ts(struct timespec *ts) +{ + struct timespec64 ts64; + + ktime_get_ts64(&ts64); + *ts = timespec64_to_timespec(ts64); +} + +static inline void ktime_get_real_ts(struct timespec *ts) +{ + struct timespec64 ts64; + + getnstimeofday64(&ts64); + *ts = timespec64_to_timespec(ts64); +} + +static inline void getrawmonotonic(struct timespec *ts) +{ + struct timespec64 ts64; + + getrawmonotonic64(&ts64); + *ts = timespec64_to_timespec(ts64); +} + +static inline struct timespec get_monotonic_coarse(void) +{ + return timespec64_to_timespec(get_monotonic_coarse64()); +} + +static inline void getboottime(struct timespec *ts) +{ + struct timespec64 ts64; + + getboottime64(&ts64); + *ts = timespec64_to_timespec(ts64); +} +#endif + +/* + * Timespec interfaces utilizing the ktime based ones + */ +static inline void get_monotonic_boottime(struct timespec *ts) +{ + *ts = ktime_to_timespec(ktime_get_boottime()); +} + +static inline void timekeeping_clocktai(struct timespec *ts) +{ + *ts = ktime_to_timespec(ktime_get_clocktai()); +} + +/* + * Persistent clock related interfaces + */ +extern void read_persistent_clock(struct timespec *ts); +extern int update_persistent_clock(struct timespec now); + +#endif -- cgit v1.2.3 From d82bd359972a7fe71a778396cf287bc9f9f3b981 Mon Sep 17 00:00:00 2001 From: Avaneesh Kumar Dwivedi Date: Tue, 24 Oct 2017 21:22:24 +0530 Subject: firmware: scm: Add new SCM call API for switching memory ownership Two different processors on a SOC need to switch memory ownership during load/unload. To enable this, second level memory map table need to be updated, which is done by secure layer. This patch adds the interface for making secure monitor call for memory ownership switching request. Acked-by: Andy Gross Signed-off-by: Avaneesh Kumar Dwivedi [bjorn: Minor style and kerneldoc updates] Signed-off-by: Bjorn Andersson --- include/linux/qcom_scm.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index e5380471c2cd..6f8da9e182f9 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -23,6 +23,19 @@ struct qcom_scm_hdcp_req { u32 val; }; +struct qcom_scm_vmperm { + int vmid; + int perm; +}; + +#define QCOM_SCM_VMID_HLOS 0x3 +#define QCOM_SCM_VMID_MSS_MSA 0xF +#define QCOM_SCM_PERM_READ 0x4 +#define QCOM_SCM_PERM_WRITE 0x2 +#define QCOM_SCM_PERM_EXEC 0x1 +#define QCOM_SCM_PERM_RW (QCOM_SCM_PERM_READ | QCOM_SCM_PERM_WRITE) +#define QCOM_SCM_PERM_RWX (QCOM_SCM_PERM_RW | QCOM_SCM_PERM_EXEC) + #if IS_ENABLED(CONFIG_QCOM_SCM) extern int qcom_scm_set_cold_boot_addr(void *entry, const cpumask_t *cpus); extern int qcom_scm_set_warm_boot_addr(void *entry, const cpumask_t *cpus); @@ -37,6 +50,9 @@ extern int qcom_scm_pas_mem_setup(u32 peripheral, phys_addr_t addr, phys_addr_t size); extern int qcom_scm_pas_auth_and_reset(u32 peripheral); extern int qcom_scm_pas_shutdown(u32 peripheral); +extern int qcom_scm_assign_mem(phys_addr_t mem_addr, size_t mem_sz, + unsigned int *src, struct qcom_scm_vmperm *newvm, + int dest_cnt); extern void qcom_scm_cpu_power_down(u32 flags); extern u32 qcom_scm_get_version(void); extern int qcom_scm_set_remote_state(u32 state, u32 id); -- cgit v1.2.3 From 707ce9eac5fc3b68f98c887dddea3911a8fc4f9f Mon Sep 17 00:00:00 2001 From: James Ban Date: Mon, 30 Oct 2017 11:32:38 +0900 Subject: regulator: da9211: update for supporting da9223/4/5 This is update for supporting additional devices da9223/4/5. Only device strings is added because only package type is different. Signed-off-by: James Ban Signed-off-by: Mark Brown --- include/linux/regulator/da9211.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/regulator/da9211.h b/include/linux/regulator/da9211.h index 80cb40b7c88d..f2fd2d3bf58f 100644 --- a/include/linux/regulator/da9211.h +++ b/include/linux/regulator/da9211.h @@ -1,6 +1,6 @@ /* * da9211.h - Regulator device driver for DA9211/DA9212 - * /DA9213/DA9214/DA9215 + * /DA9213/DA9223/DA9214/DA9224/DA9215/DA9225 * Copyright (C) 2015 Dialog Semiconductor Ltd. * * This program is free software; you can redistribute it and/or @@ -25,8 +25,11 @@ enum da9211_chip_id { DA9211, DA9212, DA9213, + DA9223, DA9214, + DA9224, DA9215, + DA9225, }; struct da9211_pdata { -- cgit v1.2.3 From 6981fbf3780366093858c5d2dcdaadcd1fbb04be Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Sun, 29 Oct 2017 11:33:40 -0700 Subject: Drivers: hv: vmbus: Expose per-channel interrupts and events counters When investigating performance, it is useful to be able to look at the number of host and guest events per-channel. This is equivalent to per-device interrupt statistics. Signed-off-by: Stephen Hemminger Signed-off-by: K. Y. Srinivasan Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index ea6b5586ad77..f3e97c5f94c9 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -719,6 +719,10 @@ struct vmbus_channel { struct vmbus_close_msg close_msg; + /* Statistics */ + u64 interrupts; /* Host to Guest interrupts */ + u64 sig_events; /* Guest to Host events */ + /* Channel callback's invoked in softirq context */ struct tasklet_struct callback_event; void (*onchannel_callback)(void *context); -- cgit v1.2.3 From 3974320ca6aa68d479051f208d5c95afd1e47a4c Mon Sep 17 00:00:00 2001 From: Bob Peterson Date: Thu, 16 Feb 2017 10:27:16 -0500 Subject: GFS2: Implement iomap for block_map This patch implements iomap for block mapping, and switches the block_map function to use it under the covers. The additional IOMAP_F_BOUNDARY iomap flag indicates when iomap has reached a "metadata boundary" and fetching the next mapping is likely to incur an additional I/O. This flag is used for setting the bh buffer boundary flag. Signed-off-by: Bob Peterson Signed-off-by: Andreas Gruenbacher --- include/linux/iomap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index 2b0790dbd6ea..a61be86710b5 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -21,7 +21,8 @@ struct vm_fault; /* * Flags for all iomap mappings: */ -#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ +#define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ +#define IOMAP_F_BOUNDARY 0x02 /* mapping ends at metadata boundary */ /* * Flags that only need to be reported for IOMAP_REPORT requests: -- cgit v1.2.3 From b2f270e8747387335d80428c576118e7d87f69cc Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Oct 2017 19:04:41 -0700 Subject: module: Prepare to convert all module_param_call() prototypes After actually converting all module_param_call() function prototypes, we no longer need to do a tricky sizeof(func(thing)) type-check. Remove it. Signed-off-by: Kees Cook Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 12 ++---------- 1 file changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 1ee7b30dafec..037a90a522a5 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -228,18 +228,10 @@ struct kparam_array /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ - static const struct kernel_param_ops __param_ops_##name = \ + static const struct kernel_param_ops __param_ops_##name = \ { .flags = 0, (void *)set, (void *)get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ - name, &__param_ops_##name, arg, \ - (perm) + sizeof(__check_old_set_param(set))*0, -1, 0) - -/* We don't get oldget: it's often a new-style param_get_uint, etc. */ -static inline int -__check_old_set_param(int (*oldset)(const char *, struct kernel_param *)) -{ - return 0; -} + name, &__param_ops_##name, arg, perm, -1, 0) #ifdef CONFIG_SYSFS extern void kernel_param_lock(struct module *mod); -- cgit v1.2.3 From ece1996a21eeb344b49200e627c6660111009c10 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Oct 2017 19:04:43 -0700 Subject: module: Do not paper over type mismatches in module_param_call() The module_param_call() macro was explicitly casting the .set and .get function prototypes away. This can lead to hard-to-find type mismatches. Now that all the function prototypes have been fixed tree-wide, we can drop these casts, and use named initializers too. Signed-off-by: Kees Cook Signed-off-by: Jessica Yu --- include/linux/moduleparam.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 037a90a522a5..20386252fe3e 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -227,9 +227,9 @@ struct kparam_array VERIFY_OCTAL_PERMISSIONS(perm), level, flags, { arg } } /* Obsolete - use module_param_cb() */ -#define module_param_call(name, set, get, arg, perm) \ +#define module_param_call(name, _set, _get, arg, perm) \ static const struct kernel_param_ops __param_ops_##name = \ - { .flags = 0, (void *)set, (void *)get }; \ + { .flags = 0, .set = _set, .get = _get }; \ __module_param_call(MODULE_PARAM_PREFIX, \ name, &__param_ops_##name, arg, perm, -1, 0) -- cgit v1.2.3 From 7761daa6a1599fa5479b8da367470f632a1927e0 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 13:26:01 +0300 Subject: fsnotify: convert fsnotify_group.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable fsnotify_group.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index c6c69318752b..20a57bac38f2 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -17,6 +17,7 @@ #include #include #include +#include /* * IN_* from inotfy.h lines up EXACTLY with FS_*, this is so we can easily @@ -135,7 +136,7 @@ struct fsnotify_group { * inotify_init() and the refcnt will hit 0 only when that fd has been * closed. */ - atomic_t refcnt; /* things with interest in this group */ + refcount_t refcnt; /* things with interest in this group */ const struct fsnotify_ops *ops; /* how this group handles things */ -- cgit v1.2.3 From 6685df31255493c3f0e9e0b8bf885e4c9762fc5d Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 30 Oct 2017 21:14:56 +0100 Subject: fanotify: clean up CONFIG_FANOTIFY_ACCESS_PERMISSIONS ifdefs The only negative from this patch should be an addition of 32bytes to 'struct fsnotify_group' if CONFIG_FANOTIFY_ACCESS_PERMISSIONS is not defined. Reviewed-by: Amir Goldstein Signed-off-by: Miklos Szeredi Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 20a57bac38f2..744e2b9969fc 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -183,11 +183,9 @@ struct fsnotify_group { #endif #ifdef CONFIG_FANOTIFY struct fanotify_group_private_data { -#ifdef CONFIG_FANOTIFY_ACCESS_PERMISSIONS /* allows a group to block waiting for a userspace response */ struct list_head access_list; wait_queue_head_t access_waitq; -#endif /* CONFIG_FANOTIFY_ACCESS_PERMISSIONS */ int f_flags; unsigned int max_marks; struct user_struct *user; -- cgit v1.2.3 From ab97f87325e28b7ef7717e6cb62e8da14a7176e1 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 13:26:02 +0300 Subject: fsnotify: convert fsnotify_mark.refcnt from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable fsnotify_mark.refcnt is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Jan Kara --- include/linux/fsnotify_backend.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fsnotify_backend.h b/include/linux/fsnotify_backend.h index 744e2b9969fc..9bcb43953f4e 100644 --- a/include/linux/fsnotify_backend.h +++ b/include/linux/fsnotify_backend.h @@ -242,7 +242,7 @@ struct fsnotify_mark { __u32 mask; /* We hold one for presence in g_list. Also one ref for each 'thing' * in kernel that found and may be using this mark. */ - atomic_t refcnt; + refcount_t refcnt; /* Group this mark is for. Set on mark creation, stable until last ref * is dropped */ struct fsnotify_group *group; -- cgit v1.2.3 From aa4bf44dc851c6bdd4f7b61b5f2c56c84dfe2ff0 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 25 Oct 2017 00:04:40 +0200 Subject: userns: use union in {g,u}idmap struct - Add a struct containing two pointer to extents and wrap both the static extent array and the struct into a union. This is done in preparation for bumping the {g,u}idmap limits for user namespaces. - Add brackets around anonymous union when using designated initializers to initialize members in order to please gcc <= 4.4. Signed-off-by: Christian Brauner Signed-off-by: Eric W. Biederman --- include/linux/user_namespace.h | 18 +++++++++++++----- 1 file changed, 13 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index c18e01252346..7c83d7f6289b 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -12,13 +12,21 @@ #define UID_GID_MAP_MAX_EXTENTS 5 +struct uid_gid_extent { + u32 first; + u32 lower_first; + u32 count; +}; + struct uid_gid_map { /* 64 bytes -- 1 cache line */ u32 nr_extents; - struct uid_gid_extent { - u32 first; - u32 lower_first; - u32 count; - } extent[UID_GID_MAP_MAX_EXTENTS]; + union { + struct uid_gid_extent extent[UID_GID_MAP_MAX_EXTENTS]; + struct { + struct uid_gid_extent *forward; + struct uid_gid_extent *reverse; + }; + }; }; #define USERNS_SETGROUPS_ALLOWED 1UL -- cgit v1.2.3 From 6397fac4915ab3002dc15aae751455da1a852f25 Mon Sep 17 00:00:00 2001 From: Christian Brauner Date: Wed, 25 Oct 2017 00:04:41 +0200 Subject: userns: bump idmap limits to 340 There are quite some use cases where users run into the current limit for {g,u}id mappings. Consider a user requesting us to map everything but 999, and 1001 for a given range of 1000000000 with a sub{g,u}id layout of: some-user:100000:1000000000 some-user:999:1 some-user:1000:1 some-user:1001:1 some-user:1002:1 This translates to: MAPPING-TYPE | CONTAINER | HOST | RANGE | -------------|-----------|---------|-----------| uid | 999 | 999 | 1 | uid | 1001 | 1001 | 1 | uid | 0 | 1000000 | 999 | uid | 1000 | 1001000 | 1 | uid | 1002 | 1001002 | 999998998 | ------------------------------------------------ gid | 999 | 999 | 1 | gid | 1001 | 1001 | 1 | gid | 0 | 1000000 | 999 | gid | 1000 | 1001000 | 1 | gid | 1002 | 1001002 | 999998998 | which is already the current limit. As discussed at LPC simply bumping the number of limits is not going to work since this would mean that struct uid_gid_map won't fit into a single cache-line anymore thereby regressing performance for the base-cases. The same problem seems to arise when using a single pointer. So the idea is to use struct uid_gid_extent { u32 first; u32 lower_first; u32 count; }; struct uid_gid_map { /* 64 bytes -- 1 cache line */ u32 nr_extents; union { struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS]; struct { struct uid_gid_extent *forward; struct uid_gid_extent *reverse; }; }; }; For the base cases we will only use the struct uid_gid_extent extent member. If we go over UID_GID_MAP_MAX_BASE_EXTENTS mappings we perform a single 4k kmalloc() which means we can have a maximum of 340 mappings (340 * size(struct uid_gid_extent) = 4080). For the latter case we use two pointers "forward" and "reverse". The forward pointer points to an array sorted by "first" and the reverse pointer points to an array sorted by "lower_first". We can then perform binary search on those arrays. Performance Testing: When Eric introduced the extent-based struct uid_gid_map approach he measured the performanc impact of his idmap changes: > My benchmark consisted of going to single user mode where nothing else was > running. On an ext4 filesystem opening 1,000,000 files and looping through all > of the files 1000 times and calling fstat on the individuals files. This was > to ensure I was benchmarking stat times where the inodes were in the kernels > cache, but the inode values were not in the processors cache. My results: > v3.4-rc1: ~= 156ns (unmodified v3.4-rc1 with user namespace support disabled) > v3.4-rc1-userns-: ~= 155ns (v3.4-rc1 with my user namespace patches and user namespace support disabled) > v3.4-rc1-userns+: ~= 164ns (v3.4-rc1 with my user namespace patches and user namespace support enabled) I used an identical approach on my laptop. Here's a thorough description of what I did. I built a 4.14.0-rc4 mainline kernel with my new idmap patches applied. I booted into single user mode and used an ext4 filesystem to open/create 1,000,000 files. Then I looped through all of the files calling fstat() on each of them 1000 times and calculated the mean fstat() time for a single file. (The test program can be found below.) Here are the results. For fun, I compared the first version of my patch which scaled linearly with the new version of the patch: | # MAPPINGS | PATCH-V1 | PATCH-NEW | |--------------|------------|-----------| | 0 mappings | 158 ns | 158 ns | | 1 mappings | 164 ns | 157 ns | | 2 mappings | 170 ns | 158 ns | | 3 mappings | 175 ns | 161 ns | | 5 mappings | 187 ns | 165 ns | | 10 mappings | 218 ns | 199 ns | | 50 mappings | 528 ns | 218 ns | | 100 mappings | 980 ns | 229 ns | | 200 mappings | 1880 ns | 239 ns | | 300 mappings | 2760 ns | 240 ns | | 340 mappings | not tested | 248 ns | Here's the test program I used. I asked Eric what he did and this is a more "advanced" implementation of the idea. It's pretty straight-forward: #define __GNU_SOURCE #define __STDC_FORMAT_MACROS #include #include #include #include #include #include #include #include #include #include #include int main(int argc, char *argv[]) { int ret; size_t i, k; int fd[1000000]; int times[1000]; char pathname[4096]; struct stat st; struct timeval t1, t2; uint64_t time_in_mcs; uint64_t sum = 0; if (argc != 2) { fprintf(stderr, "Please specify a directory where to create " "the test files\n"); exit(EXIT_FAILURE); } for (i = 0; i < sizeof(fd) / sizeof(fd[0]); i++) { sprintf(pathname, "%s/idmap_test_%zu", argv[1], i); fd[i]= open(pathname, O_RDWR | O_CREAT, S_IXUSR | S_IXGRP | S_IXOTH); if (fd[i] < 0) { ssize_t j; for (j = i; j >= 0; j--) close(fd[j]); exit(EXIT_FAILURE); } } for (k = 0; k < 1000; k++) { ret = gettimeofday(&t1, NULL); if (ret < 0) goto close_all; for (i = 0; i < sizeof(fd) / sizeof(fd[0]); i++) { ret = fstat(fd[i], &st); if (ret < 0) goto close_all; } ret = gettimeofday(&t2, NULL); if (ret < 0) goto close_all; time_in_mcs = (1000000 * t2.tv_sec + t2.tv_usec) - (1000000 * t1.tv_sec + t1.tv_usec); printf("Total time in micro seconds: %" PRIu64 "\n", time_in_mcs); printf("Total time in nanoseconds: %" PRIu64 "\n", time_in_mcs * 1000); printf("Time per file in nanoseconds: %" PRIu64 "\n", (time_in_mcs * 1000) / 1000000); times[k] = (time_in_mcs * 1000) / 1000000; } close_all: for (i = 0; i < sizeof(fd) / sizeof(fd[0]); i++) close(fd[i]); if (ret < 0) exit(EXIT_FAILURE); for (k = 0; k < 1000; k++) { sum += times[k]; } printf("Mean time per file in nanoseconds: %" PRIu64 "\n", sum / 1000); exit(EXIT_SUCCESS);; } Signed-off-by: Christian Brauner CC: Serge Hallyn CC: Eric Biederman Signed-off-by: Eric W. Biederman --- include/linux/user_namespace.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/user_namespace.h b/include/linux/user_namespace.h index 7c83d7f6289b..1c1046a60fb4 100644 --- a/include/linux/user_namespace.h +++ b/include/linux/user_namespace.h @@ -10,7 +10,8 @@ #include #include -#define UID_GID_MAP_MAX_EXTENTS 5 +#define UID_GID_MAP_MAX_BASE_EXTENTS 5 +#define UID_GID_MAP_MAX_EXTENTS 340 struct uid_gid_extent { u32 first; @@ -18,10 +19,10 @@ struct uid_gid_extent { u32 count; }; -struct uid_gid_map { /* 64 bytes -- 1 cache line */ +struct uid_gid_map { /* 64 bytes -- 1 cache line */ u32 nr_extents; union { - struct uid_gid_extent extent[UID_GID_MAP_MAX_EXTENTS]; + struct uid_gid_extent extent[UID_GID_MAP_MAX_BASE_EXTENTS]; struct { struct uid_gid_extent *forward; struct uid_gid_extent *reverse; -- cgit v1.2.3 From 638f5b90d46016372a8e3e0a434f199cc5e12b8c Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 31 Oct 2017 18:16:05 -0700 Subject: bpf: reduce verifier memory consumption the verifier got progressively smarter over time and size of its internal state grew as well. Time to reduce the memory consumption. Before: sizeof(struct bpf_verifier_state) = 6520 After: sizeof(struct bpf_verifier_state) = 896 It's done by observing that majority of BPF programs use little to no stack whereas verifier kept all of 512 stack slots ready always. Instead dynamically reallocate struct verifier state when stack access is detected. Runtime difference before vs after is within a noise. The number of processed instructions stays the same. Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index feeaea93d959..3b0976aaac75 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -88,14 +88,19 @@ enum bpf_stack_slot_type { #define BPF_REG_SIZE 8 /* size of eBPF register in bytes */ +struct bpf_stack_state { + struct bpf_reg_state spilled_ptr; + u8 slot_type[BPF_REG_SIZE]; +}; + /* state of the program: * type of all registers and stack info */ struct bpf_verifier_state { struct bpf_reg_state regs[MAX_BPF_REG]; - u8 stack_slot_type[MAX_BPF_STACK]; - struct bpf_reg_state spilled_regs[MAX_BPF_STACK / BPF_REG_SIZE]; struct bpf_verifier_state *parent; + int allocated_stack; + struct bpf_stack_state *stack; }; /* linked list of verifier states used to prune search */ @@ -145,7 +150,7 @@ struct bpf_verifier_env { struct bpf_verifier_stack_elem *head; /* stack of verifier states to be processed */ int stack_size; /* number of states to be processed */ bool strict_alignment; /* perform strict pointer alignment checks */ - struct bpf_verifier_state cur_state; /* current verifier state */ + struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ void *analyzer_priv; /* pointer to external analyzer's private data */ @@ -159,6 +164,11 @@ struct bpf_verifier_env { struct bpf_verifer_log log; }; +static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) +{ + return env->cur_state->regs; +} + int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, void *priv); -- cgit v1.2.3 From a9903f04e0a4ea522d959c2f287cdf0ab029e324 Mon Sep 17 00:00:00 2001 From: Matthias Kaehlcke Date: Mon, 30 Oct 2017 11:08:16 -0700 Subject: sched/sysctl: Fix attributes of some extern declarations The definition of sysctl_sched_migration_cost, sysctl_sched_nr_migrate and sysctl_sched_time_avg includes the attribute const_debug. This attribute is not part of the extern declaration of these variables in include/linux/sched/sysctl.h, while it is in kernel/sched/sched.h, and as a result Clang generates warnings like this: kernel/sched/sched.h:1618:33: warning: section attribute is specified on redeclared variable [-Wsection] extern const_debug unsigned int sysctl_sched_time_avg; ^ ./include/linux/sched/sysctl.h:42:21: note: previous declaration is here extern unsigned int sysctl_sched_time_avg; The header only declares the variables when CONFIG_SCHED_DEBUG is defined, therefore it is not necessary to duplicate the definition of const_debug. Instead we can use the attribute __read_mostly, which is the expansion of const_debug when CONFIG_SCHED_DEBUG=y is set. Signed-off-by: Matthias Kaehlcke Reviewed-by: Nick Desaulniers Cc: Douglas Anderson Cc: Guenter Roeck Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Shile Zhang Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20171030180816.170850-1-mka@chromium.org Signed-off-by: Ingo Molnar --- include/linux/sched/sysctl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched/sysctl.h b/include/linux/sched/sysctl.h index 0f5ecd4d298e..d34c823f3d36 100644 --- a/include/linux/sched/sysctl.h +++ b/include/linux/sched/sysctl.h @@ -37,9 +37,9 @@ extern unsigned int sysctl_numa_balancing_scan_period_max; extern unsigned int sysctl_numa_balancing_scan_size; #ifdef CONFIG_SCHED_DEBUG -extern unsigned int sysctl_sched_migration_cost; -extern unsigned int sysctl_sched_nr_migrate; -extern unsigned int sysctl_sched_time_avg; +extern __read_mostly unsigned int sysctl_sched_migration_cost; +extern __read_mostly unsigned int sysctl_sched_nr_migrate; +extern __read_mostly unsigned int sysctl_sched_time_avg; int sched_proc_update_handler(struct ctl_table *table, int write, void __user *buffer, size_t *length, -- cgit v1.2.3 From 8698b9364710e7bac84b3af07dd410e39c8c2e08 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 1 Nov 2017 10:11:55 +0800 Subject: regmap: Add hardware spinlock support On some platforms, when reading or writing some special registers through regmap, we should acquire one hardware spinlock to synchronize between the multiple subsystems. Thus this patch adds the hardware spinlock support for regmap. Signed-off-by: Baolin Wang Signed-off-by: Mark Brown --- include/linux/regmap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 978abfbac617..edc32aac84d7 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -273,6 +273,9 @@ typedef void (*regmap_unlock)(void *); * * @ranges: Array of configuration entries for virtual address ranges. * @num_ranges: Number of range configuration entries. + * @hwlock_id: Specify the hardware spinlock id. + * @hwlock_mode: The hardware spinlock mode, should be HWLOCK_IRQSTATE, + * HWLOCK_IRQ or 0. */ struct regmap_config { const char *name; @@ -317,6 +320,9 @@ struct regmap_config { const struct regmap_range_cfg *ranges; unsigned int num_ranges; + + unsigned int hwlock_id; + unsigned int hwlock_mode; }; /** -- cgit v1.2.3 From e9292f2c03851ef81bef38579a0ee9c42140e586 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Tue, 31 Oct 2017 15:48:01 +0100 Subject: net: dsa: lan9303: Add STP ALR entry on port 0 STP BPDUs arriving on user ports must sent to CPU port only, for processing by the SW bridge. Add an ALR entry with STP state override to fix that. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/dsa/lan9303.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h index 05d8d136baab..b2110e69630f 100644 --- a/include/linux/dsa/lan9303.h +++ b/include/linux/dsa/lan9303.h @@ -34,3 +34,5 @@ struct lan9303 { **/ struct lan9303_alr_cache_entry alr_cache[LAN9303_NUM_ALR_RECORDS]; }; + +#define eth_stp_addr eth_reserved_addr_base -- cgit v1.2.3 From 7930d0a00ff5dbcc80f793d1a7a6b8de4e591f1a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 14 Oct 2017 17:22:27 +0800 Subject: sbitmap: introduce __sbitmap_for_each_set() For blk-mq, we need to be able to iterate software queues starting from any queue in a round robin fashion, so introduce this helper. Reviewed-by: Omar Sandoval Cc: Omar Sandoval Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 64 ++++++++++++++++++++++++++++++++++++------------- 1 file changed, 47 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index a1904aadbc45..0dcc60e820de 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -211,10 +211,14 @@ bool sbitmap_any_bit_set(const struct sbitmap *sb); */ bool sbitmap_any_bit_clear(const struct sbitmap *sb); +#define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift) +#define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U)) + typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *); /** - * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. + * __sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. + * @start: Where to start the iteration. * @sb: Bitmap to iterate over. * @fn: Callback. Should return true to continue or false to break early. * @data: Pointer to pass to callback. @@ -222,35 +226,61 @@ typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *); * This is inline even though it's non-trivial so that the function calls to the * callback will hopefully get optimized away. */ -static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn, - void *data) +static inline void __sbitmap_for_each_set(struct sbitmap *sb, + unsigned int start, + sb_for_each_fn fn, void *data) { - unsigned int i; + unsigned int index; + unsigned int nr; + unsigned int scanned = 0; - for (i = 0; i < sb->map_nr; i++) { - struct sbitmap_word *word = &sb->map[i]; - unsigned int off, nr; + if (start >= sb->depth) + start = 0; + index = SB_NR_TO_INDEX(sb, start); + nr = SB_NR_TO_BIT(sb, start); - if (!word->word) - continue; + while (scanned < sb->depth) { + struct sbitmap_word *word = &sb->map[index]; + unsigned int depth = min_t(unsigned int, word->depth - nr, + sb->depth - scanned); - nr = 0; - off = i << sb->shift; + scanned += depth; + if (!word->word) + goto next; + + /* + * On the first iteration of the outer loop, we need to add the + * bit offset back to the size of the word for find_next_bit(). + * On all other iterations, nr is zero, so this is a noop. + */ + depth += nr; while (1) { - nr = find_next_bit(&word->word, word->depth, nr); - if (nr >= word->depth) + nr = find_next_bit(&word->word, depth, nr); + if (nr >= depth) break; - - if (!fn(sb, off + nr, data)) + if (!fn(sb, (index << sb->shift) + nr, data)) return; nr++; } +next: + nr = 0; + if (++index >= sb->map_nr) + index = 0; } } -#define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift) -#define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U)) +/** + * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. + * @sb: Bitmap to iterate over. + * @fn: Callback. Should return true to continue or false to break early. + * @data: Pointer to pass to callback. + */ +static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn, + void *data) +{ + __sbitmap_for_each_set(sb, 0, fn, data); +} static inline unsigned long *__sbitmap_word(struct sbitmap *sb, unsigned int bitnr) -- cgit v1.2.3 From de1482974080ec9ef414bf048b2646b246b63f6e Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 14 Oct 2017 17:22:29 +0800 Subject: blk-mq: introduce .get_budget and .put_budget in blk_mq_ops For SCSI devices, there is often a per-request-queue depth, which needs to be respected before queuing one request. Currently blk-mq always dequeues the request first, then calls .queue_rq() to dispatch the request to lld. One obvious issue with this approach is that I/O merging may not be successful, because when the per-request-queue depth can't be respected, .queue_rq() has to return BLK_STS_RESOURCE, and then this request has to stay in hctx->dispatch list. This means it never gets a chance to be merged with other IO. This patch introduces .get_budget and .put_budget callback in blk_mq_ops, then we can try to get reserved budget first before dequeuing request. If the budget for queueing I/O can't be satisfied, we don't need to dequeue request at all. Hence the request can be left in the IO scheduler queue, for more merging opportunities. Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 50c6485cb04f..901457df3d64 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -90,6 +90,8 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +typedef blk_status_t (get_budget_fn)(struct blk_mq_hw_ctx *); +typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); @@ -111,6 +113,15 @@ struct blk_mq_ops { */ queue_rq_fn *queue_rq; + /* + * Reserve budget before queue request, once .queue_rq is + * run, it is driver's responsibility to release the + * reserved budget. Also we have to handle failure case + * of .get_budget for avoiding I/O deadlock. + */ + get_budget_fn *get_budget; + put_budget_fn *put_budget; + /* * Called on request timeout */ -- cgit v1.2.3 From b347689ffbca745ac457ee27400ce1affd571c6f Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sat, 14 Oct 2017 17:22:30 +0800 Subject: blk-mq-sched: improve dispatching from sw queue SCSI devices use host-wide tagset, and the shared driver tag space is often quite big. However, there is also a queue depth for each lun( .cmd_per_lun), which is often small, for example, on both lpfc and qla2xxx, .cmd_per_lun is just 3. So lots of requests may stay in sw queue, and we always flush all belonging to same hw queue and dispatch them all to driver. Unfortunately it is easy to cause queue busy because of the small .cmd_per_lun. Once these requests are flushed out, they have to stay in hctx->dispatch, and no bio merge can happen on these requests, and sequential IO performance is harmed. This patch introduces blk_mq_dequeue_from_ctx for dequeuing a request from a sw queue, so that we can dispatch them in scheduler's way. We can then avoid dequeueing too many requests from sw queue, since we don't flush ->dispatch completely. This patch improves dispatching from sw queue by using the .get_budget and .put_budget callbacks. Reviewed-by: Omar Sandoval Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 901457df3d64..e5e6becd57d3 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -30,6 +30,8 @@ struct blk_mq_hw_ctx { struct sbitmap ctx_map; + struct blk_mq_ctx *dispatch_from; + struct blk_mq_ctx **ctxs; unsigned int nr_ctx; -- cgit v1.2.3 From ac7fe82b6fcf77e757e88005c33b8147c1b7b73f Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 25 Oct 2017 16:43:15 -0700 Subject: nvme-fc: add a dev_loss_tmo field to the remoteport Add a dev_loss_tmo value, paralleling the SCSI FC transport, for device connectivity loss. The transport initializes the value in the nvme_fc_register_remoteport() call. If the value is not set, a default of 60s is set. Add a new routine to the api, nvme_fc_set_remoteport_devloss() routine, which allows the lldd to dynamically update the value on an existing remoteport. Signed-off-by: James Smart Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig --- include/linux/nvme-fc-driver.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 2be4db353937..496ff759f84c 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -40,6 +40,8 @@ * @node_name: FC WWNN for the port * @port_name: FC WWPN for the port * @port_role: What NVME roles are supported (see FC_PORT_ROLE_xxx) + * @dev_loss_tmo: maximum delay for reconnects to an association on + * this device. Used only on a remoteport. * * Initialization values for dynamic port fields: * @port_id: FC N_Port_ID currently assigned the port. Upper 8 bits must @@ -50,6 +52,7 @@ struct nvme_fc_port_info { u64 port_name; u32 port_role; u32 port_id; + u32 dev_loss_tmo; }; @@ -200,6 +203,9 @@ enum nvme_fc_obj_state { * The length of the buffer corresponds to the local_priv_sz * value specified in the nvme_fc_port_template supplied by * the LLDD. + * @dev_loss_tmo: maximum delay for reconnects to an association on + * this device. To modify, lldd must call + * nvme_fc_set_remoteport_devloss(). * * Fields with dynamic values. Values may change base on link state. LLDD * may reference fields directly to change them. Initialized by the @@ -257,10 +263,9 @@ struct nvme_fc_remote_port { u32 port_role; u64 node_name; u64 port_name; - struct nvme_fc_local_port *localport; - void *private; + u32 dev_loss_tmo; /* dynamic fields */ u32 port_id; @@ -446,6 +451,8 @@ int nvme_fc_unregister_remoteport(struct nvme_fc_remote_port *remoteport); void nvme_fc_rescan_remoteport(struct nvme_fc_remote_port *remoteport); +int nvme_fc_set_remoteport_devloss(struct nvme_fc_remote_port *remoteport, + u32 dev_loss_tmo); /* -- cgit v1.2.3 From 00ed87da35e88a7a4d7f41673beab52ef828f12b Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 1 Nov 2017 07:32:50 -0700 Subject: timer: Add parenthesis around timer_setup() macro arguments In the case where expressions are passed as macro arguments, the LOCKDEP version of the timer macros need enclosing parenthesis. Reported-by: Stephen Rothwell Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner Link: https://lkml.kernel.org/r/20171101143250.GA65266@beast --- include/linux/timer.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 09950482309b..a1af92bac0d5 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -173,11 +173,12 @@ static inline void timer_setup_on_stack(struct timer_list *timer, * do want to keep the inline for argument type checking, though. */ # define timer_setup(timer, callback, flags) \ - __setup_timer(timer, (TIMER_FUNC_TYPE)callback, \ - (TIMER_DATA_TYPE)timer, flags) + __setup_timer((timer), (TIMER_FUNC_TYPE)(callback), \ + (TIMER_DATA_TYPE)(timer), (flags)) # define timer_setup_on_stack(timer, callback, flags) \ - __setup_timer_on_stack(timer, (TIMER_FUNC_TYPE)callback,\ - (TIMER_DATA_TYPE)timer, flags) + __setup_timer_on_stack((timer), \ + (TIMER_FUNC_TYPE)(callback), \ + (TIMER_DATA_TYPE)(timer), (flags)) #endif #define from_timer(var, callback_timer, timer_fieldname) \ -- cgit v1.2.3 From aa795c41d9cd41dc9c915dd1956ddd0e4ae44485 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Fri, 1 Sep 2017 16:16:40 -0700 Subject: clk: Add devm_of_clk_add_hw_provider()/del_provider() APIs Sometimes we only have one of_clk_del_provider() call in driver error and remove paths, because we're missing a devm_of_clk_add_hw_provider() API. Introduce the API so we can convert drivers to use this and potentially reduce the amount of code needed to remove providers in drivers. Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 5100ec1b5d55..ba79d6186133 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -815,7 +815,12 @@ int of_clk_add_hw_provider(struct device_node *np, struct clk_hw *(*get)(struct of_phandle_args *clkspec, void *data), void *data); +int devm_of_clk_add_hw_provider(struct device *dev, + struct clk_hw *(*get)(struct of_phandle_args *clkspec, + void *data), + void *data); void of_clk_del_provider(struct device_node *np); +void devm_of_clk_del_provider(struct device *dev); struct clk *of_clk_src_simple_get(struct of_phandle_args *clkspec, void *data); struct clk_hw *of_clk_hw_simple_get(struct of_phandle_args *clkspec, @@ -847,7 +852,15 @@ static inline int of_clk_add_hw_provider(struct device_node *np, { return 0; } +static inline int devm_of_clk_add_hw_provider(struct device *dev, + struct clk_hw *(*get)(struct of_phandle_args *clkspec, + void *data), + void *data) +{ + return 0; +} static inline void of_clk_del_provider(struct device_node *np) {} +static inline void devm_of_clk_del_provider(struct device *dev) {} static inline struct clk *of_clk_src_simple_get( struct of_phandle_args *clkspec, void *data) { -- cgit v1.2.3 From 7066fdd0d7427f1ba455d186e75213c442c9663b Mon Sep 17 00:00:00 2001 From: Rajendra Nayak Date: Tue, 10 Oct 2017 14:27:13 +0530 Subject: clk: qcom: clk-smd-rpm: add msm8996 rpmclks Add all RPM controlled clocks on msm8996 platform [srini: Fixed various issues with offsets and made names specific to msm8996] Signed-off-by: Srinivas Kandagatla Signed-off-by: Rajendra Nayak Acked-by: Rob Herring Acked-by: Bjorn Andersson Signed-off-by: Stephen Boyd --- include/linux/soc/qcom/smd-rpm.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd-rpm.h b/include/linux/soc/qcom/smd-rpm.h index 2a53dcaeeeed..ebdabd669d93 100644 --- a/include/linux/soc/qcom/smd-rpm.h +++ b/include/linux/soc/qcom/smd-rpm.h @@ -26,6 +26,10 @@ struct qcom_smd_rpm; #define QCOM_SMD_RPM_SMPB 0x62706d73 #define QCOM_SMD_RPM_SPDM 0x63707362 #define QCOM_SMD_RPM_VSA 0x00617376 +#define QCOM_SMD_RPM_MMAXI_CLK 0x69786d6d +#define QCOM_SMD_RPM_IPA_CLK 0x617069 +#define QCOM_SMD_RPM_CE_CLK 0x6563 +#define QCOM_SMD_RPM_AGGR_CLK 0x72676761 int qcom_rpm_smd_write(struct qcom_smd_rpm *rpm, int state, -- cgit v1.2.3 From 1495dc9f0a711a54f8fec849ce7f3a8f585a11e5 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Wed, 1 Nov 2017 11:48:00 -0700 Subject: security: bpf: replace include of linux/bpf.h with forward declarations Touching linux/bpf.h makes us rebuild a surprisingly large portion of the kernel. Remove the unnecessary dependency from security.h, it only needs forward declarations. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/security.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/security.h b/include/linux/security.h index 18800b0911e5..73f1ef625d40 100644 --- a/include/linux/security.h +++ b/include/linux/security.h @@ -31,7 +31,6 @@ #include #include #include -#include struct linux_binprm; struct cred; @@ -1732,6 +1731,10 @@ static inline void securityfs_remove(struct dentry *dentry) #endif #ifdef CONFIG_BPF_SYSCALL +union bpf_attr; +struct bpf_map; +struct bpf_prog; +struct bpf_prog_aux; #ifdef CONFIG_SECURITY extern int security_bpf(int cmd, union bpf_attr *attr, unsigned int size); extern int security_bpf_map(struct bpf_map *map, fmode_t fmode); -- cgit v1.2.3 From 908a543ac7cdf3aa8a283ec42cab3c16e2fc45a2 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sun, 24 Sep 2017 18:19:18 +0200 Subject: clk: clk-gpio: Make GPIO clock provider use descriptors only After som grep:ing it turns out nothing in the kernel is really calling clk_[hw_]_register_gpio_[gate|mux](). All existing instances are just created directly from the device tree probe functions at the bottom of the clk-gpio.c clock provider file. This means we can change the signature of the function without any consequences! Everyone should be using GPIO descriptors now, so let's just go in and enforce that. This saves a bit of code since GPIO descriptors know inherently if they are active low so no need for the code keeping track of that. We leave it to the caller to come up with the GPIO descriptor. It is nowadays possible to do that even without a corresponding device, so no excuse not to pass them around. The one in-kernel user lifecycles it using devm_gpiod_get() in gpio_clk_driver_probe(). Cc: Sergej Sawazki Cc: Jyri Sarha Signed-off-by: Linus Walleij Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 5100ec1b5d55..063d8cb9926f 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -682,10 +682,10 @@ struct clk_gpio { extern const struct clk_ops clk_gpio_gate_ops; struct clk *clk_register_gpio_gate(struct device *dev, const char *name, - const char *parent_name, unsigned gpio, bool active_low, + const char *parent_name, struct gpio_desc *gpiod, unsigned long flags); struct clk_hw *clk_hw_register_gpio_gate(struct device *dev, const char *name, - const char *parent_name, unsigned gpio, bool active_low, + const char *parent_name, struct gpio_desc *gpiod, unsigned long flags); void clk_hw_unregister_gpio_gate(struct clk_hw *hw); @@ -701,11 +701,11 @@ void clk_hw_unregister_gpio_gate(struct clk_hw *hw); extern const struct clk_ops clk_gpio_mux_ops; struct clk *clk_register_gpio_mux(struct device *dev, const char *name, - const char * const *parent_names, u8 num_parents, unsigned gpio, - bool active_low, unsigned long flags); + const char * const *parent_names, u8 num_parents, struct gpio_desc *gpiod, + unsigned long flags); struct clk_hw *clk_hw_register_gpio_mux(struct device *dev, const char *name, - const char * const *parent_names, u8 num_parents, unsigned gpio, - bool active_low, unsigned long flags); + const char * const *parent_names, u8 num_parents, struct gpio_desc *gpiod, + unsigned long flags); void clk_hw_unregister_gpio_mux(struct clk_hw *hw); /** -- cgit v1.2.3 From d4d7b4ad2f05c03fb25252aea66f9f3cd7cfbe06 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 26 Oct 2017 10:44:07 +0100 Subject: irqchip/gic-v3-its: Setup VLPI properties at map time So far, we require the hypervisor to update the VLPI properties once the the VLPI mapping has been established. While this makes it easy for the ITS driver, it creates a window where an incoming interrupt can be delivered with an unknown set of properties. Not very nice. Instead, let's add a "properties" field to the mapping structure, and use that to configure the VLPI before it actually gets mapped. Reviewed-by: Christoffer Dall Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 43cde15f221b..447da8ca2156 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -71,12 +71,14 @@ struct its_vpe { * @vm: Pointer to the GICv4 notion of a VM * @vpe: Pointer to the GICv4 notion of a virtual CPU (VPE) * @vintid: Virtual LPI number + * @properties: Priority and enable bits (as written in the prop table) * @db_enabled: Is the VPE doorbell to be generated? */ struct its_vlpi_map { struct its_vm *vm; struct its_vpe *vpe; u32 vintid; + u8 properties; bool db_enabled; }; -- cgit v1.2.3 From 4b82130077d93539c9fbb0f5eee21965cea9cfe9 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Mon, 30 Oct 2017 10:15:00 +0800 Subject: irqdomain: Update the comments of fwnode field of irq_domain structure Commit: f110711a6053 ("irqdomain: Convert irqdomain-%3Eof_node to fwnode") converted of_node field to fwnode, but didn't update its comments. Update it. Fixes: f110711a6053 ("irqdomain: Convert irqdomain-%3Eof_node to fwnode") Signed-off-by: Dou Liyang Signed-off-by: Marc Zyngier --- include/linux/irqdomain.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index df162f7a4aad..ce48a23d621f 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -138,8 +138,8 @@ struct irq_domain_chip_generic; * @mapcount: The number of mapped interrupts * * Optional elements - * @of_node: Pointer to device tree nodes associated with the irq_domain. Used - * when decoding device tree interrupt specifiers. + * @fwnode: Pointer to firmware node associated with the irq_domain. Pretty easy + * to swap it for the of_node via the irq_domain_get_of_node accessor * @gc: Pointer to a list of generic chips. There is a helper function for * setting up one or more generic chips for interrupt controllers * drivers using the generic chip library which uses this pointer. -- cgit v1.2.3 From da61fcf9d62a05f3508f5646d353a9c2604bac76 Mon Sep 17 00:00:00 2001 From: Paul Burton Date: Tue, 31 Oct 2017 09:41:45 -0700 Subject: irqchip: mips-gic: Use irq_cpu_online to (un)mask all-VP(E) IRQs The gic_all_vpes_local_irq_controller chip currently attempts to operate on all CPUs/VPs in the system when masking or unmasking an interrupt. This has a few drawbacks: - In multi-cluster systems we may not always have access to all CPUs in the system. When all CPUs in a cluster are powered down that cluster's GIC may also power down, in which case we cannot configure its state. - Relatedly, if we power down a cluster after having configured interrupts for CPUs within it then the cluster's GIC may lose state & we need to reconfigure it. The current approach doesn't take this into account. - It's wasteful if we run Linux on fewer VPs than are present in the system. For example if we run a uniprocessor kernel on CPU0 of a system with 16 CPUs then there's no point in us configuring CPUs 1-15. - The implementation is also lacking in that it expects the range 0..gic_vpes-1 to represent valid Linux CPU numbers which may not always be the case - for example if we run on a system with more VPs than the kernel is configured to support. Fix all of these issues by only configuring the affected interrupts for CPUs which are online at the time, and recording the configuration in a new struct gic_all_vpes_chip_data for later use by CPUs being brought online. We register a CPU hotplug state (reusing CPUHP_AP_IRQ_GIC_STARTING which the ARM GIC driver uses, and which seems suitably generic for reuse with the MIPS GIC) and execute irq_cpu_online() in order to configure the interrupts on the newly onlined CPU. Signed-off-by: Paul Burton Cc: Jason Cooper Cc: Marc Zyngier Cc: Thomas Gleixner Cc: linux-mips@linux-mips.org Signed-off-by: Marc Zyngier --- include/linux/cpuhotplug.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 6d508767e144..1966a45bc453 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -98,6 +98,7 @@ enum cpuhp_state { CPUHP_AP_IRQ_HIP04_STARTING, CPUHP_AP_IRQ_ARMADA_XP_STARTING, CPUHP_AP_IRQ_BCM2836_STARTING, + CPUHP_AP_IRQ_MIPS_GIC_STARTING, CPUHP_AP_ARM_MVEBU_COHERENCY, CPUHP_AP_PERF_X86_AMD_UNCORE_STARTING, CPUHP_AP_PERF_X86_STARTING, -- cgit v1.2.3 From aa9ad44a42b4cf4387f8ecddaf8e51707fdcda5a Mon Sep 17 00:00:00 2001 From: Dave Jiang Date: Wed, 23 Aug 2017 12:48:26 -0700 Subject: libnvdimm: move poison list functions to a new 'badrange' file nfit_test needs to use the poison list manipulation code as well. Make it more generic and in the process rename poison to badrange, and move all the related helpers to a new file. Signed-off-by: Dave Jiang [vishal: Add badrange.o to nfit_test's Kbuild] [vishal: add a missed include in bus.c for the new badrange functions] [vishal: rename all instances of 'be' to 'bre'] Signed-off-by: Vishal Verma Signed-off-by: Dan Williams --- include/linux/libnvdimm.h | 21 ++++++++++++++++++--- 1 file changed, 18 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/libnvdimm.h b/include/linux/libnvdimm.h index 3eaad2fbf284..f8109ddb5ef1 100644 --- a/include/linux/libnvdimm.h +++ b/include/linux/libnvdimm.h @@ -18,6 +18,18 @@ #include #include #include +#include + +struct badrange_entry { + u64 start; + u64 length; + struct list_head list; +}; + +struct badrange { + struct list_head list; + spinlock_t lock; +}; enum { /* when a dimm supports both PMEM and BLK access a label is required */ @@ -129,9 +141,12 @@ static inline struct nd_blk_region_desc *to_blk_region_desc( } -int nvdimm_bus_add_poison(struct nvdimm_bus *nvdimm_bus, u64 addr, u64 length); -void nvdimm_forget_poison(struct nvdimm_bus *nvdimm_bus, - phys_addr_t start, unsigned int len); +void badrange_init(struct badrange *badrange); +int badrange_add(struct badrange *badrange, u64 addr, u64 length); +void badrange_forget(struct badrange *badrange, phys_addr_t start, + unsigned int len); +int nvdimm_bus_add_badrange(struct nvdimm_bus *nvdimm_bus, u64 addr, + u64 length); struct nvdimm_bus *nvdimm_bus_register(struct device *parent, struct nvdimm_bus_descriptor *nfit_desc); void nvdimm_bus_unregister(struct nvdimm_bus *nvdimm_bus); -- cgit v1.2.3 From 054287295b1132c8742ea55f8e3af9cbd630c932 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Thu, 2 Nov 2017 10:36:48 +0100 Subject: net: Define eth_stp_addr in linux/etherdevice.h The lan9303 driver defines eth_stp_addr as a synonym to eth_reserved_addr_base to get the STP ethernet address 01:80:c2:00:00:00. eth_reserved_addr_base is also used to define the start of Bridge Reserved ethernet address range, which happen to be the STP address. br_dev_setup refer to eth_reserved_addr_base as a definition of STP address. Clean up by: - Move the eth_stp_addr definition to linux/etherdevice.h - Use eth_stp_addr instead of eth_reserved_addr_base in br_dev_setup. Signed-off-by: Egil Hjelmeland Reviewed-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/dsa/lan9303.h | 2 -- include/linux/etherdevice.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h index b2110e69630f..05d8d136baab 100644 --- a/include/linux/dsa/lan9303.h +++ b/include/linux/dsa/lan9303.h @@ -34,5 +34,3 @@ struct lan9303 { **/ struct lan9303_alr_cache_entry alr_cache[LAN9303_NUM_ALR_RECORDS]; }; - -#define eth_stp_addr eth_reserved_addr_base diff --git a/include/linux/etherdevice.h b/include/linux/etherdevice.h index 2d9f80848d4b..263dbcad22fc 100644 --- a/include/linux/etherdevice.h +++ b/include/linux/etherdevice.h @@ -66,6 +66,7 @@ int eth_gro_complete(struct sk_buff *skb, int nhoff); /* Reserved Ethernet Addresses per IEEE 802.1Q */ static const u8 eth_reserved_addr_base[ETH_ALEN] __aligned(2) = { 0x01, 0x80, 0xc2, 0x00, 0x00, 0x00 }; +#define eth_stp_addr eth_reserved_addr_base /** * is_link_local_ether_addr - Determine if given Ethernet address is link-local -- cgit v1.2.3 From 17c0899682a55666e58f2200d5599f3a7b22867f Mon Sep 17 00:00:00 2001 From: Jack Pham Date: Thu, 2 Nov 2017 17:21:41 -0700 Subject: usb: remove msm_hsusb_hw.h The last two remaining drivers (ehci-msm.c and phy-msm-usb.c) that needed this header were recently removed, so delete this now-unused file. Signed-off-by: Jack Pham Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/msm_hsusb_hw.h | 77 ---------------------------------------- 1 file changed, 77 deletions(-) delete mode 100644 include/linux/usb/msm_hsusb_hw.h (limited to 'include/linux') diff --git a/include/linux/usb/msm_hsusb_hw.h b/include/linux/usb/msm_hsusb_hw.h deleted file mode 100644 index 974c3796a23f..000000000000 --- a/include/linux/usb/msm_hsusb_hw.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (C) 2007 Google, Inc. - * Author: Brian Swetland - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - */ - -#ifndef __LINUX_USB_GADGET_MSM72K_UDC_H__ -#define __LINUX_USB_GADGET_MSM72K_UDC_H__ - -/* USB phy selector - in TCSR address range */ -#define USB2_PHY_SEL 0xfd4ab000 - -#define USB_AHBBURST (MSM_USB_BASE + 0x0090) -#define USB_AHBMODE (MSM_USB_BASE + 0x0098) -#define USB_GENCONFIG_2 (MSM_USB_BASE + 0x00a0) -#define ULPI_TX_PKT_EN_CLR_FIX BIT(19) - -#define USB_CAPLENGTH (MSM_USB_BASE + 0x0100) /* 8 bit */ - -#define USB_USBCMD (MSM_USB_BASE + 0x0140) -#define USB_PORTSC (MSM_USB_BASE + 0x0184) -#define USB_OTGSC (MSM_USB_BASE + 0x01A4) -#define USB_USBMODE (MSM_USB_BASE + 0x01A8) -#define USB_PHY_CTRL (MSM_USB_BASE + 0x0240) -#define USB_PHY_CTRL2 (MSM_USB_BASE + 0x0278) - -#define GENCONFIG_2_SESS_VLD_CTRL_EN BIT(7) -#define USBCMD_SESS_VLD_CTRL BIT(25) - -#define USBCMD_RESET 2 -#define USB_USBINTR (MSM_USB_BASE + 0x0148) - -#define PORTSC_PHCD (1 << 23) /* phy suspend mode */ -#define PORTSC_PTS_MASK (3 << 30) -#define PORTSC_PTS_ULPI (2 << 30) -#define PORTSC_PTS_SERIAL (3 << 30) - -#define USB_ULPI_VIEWPORT (MSM_USB_BASE + 0x0170) -#define ULPI_RUN (1 << 30) -#define ULPI_WRITE (1 << 29) -#define ULPI_READ (0 << 29) -#define ULPI_ADDR(n) (((n) & 255) << 16) -#define ULPI_DATA(n) ((n) & 255) -#define ULPI_DATA_READ(n) (((n) >> 8) & 255) - -/* synopsys 28nm phy registers */ -#define ULPI_PWR_CLK_MNG_REG 0x88 -#define OTG_COMP_DISABLE BIT(0) - -#define ULPI_MISC_A 0x96 -#define ULPI_MISC_A_VBUSVLDEXTSEL BIT(1) -#define ULPI_MISC_A_VBUSVLDEXT BIT(0) - -#define ASYNC_INTR_CTRL (1 << 29) /* Enable async interrupt */ -#define ULPI_STP_CTRL (1 << 30) /* Block communication with PHY */ -#define PHY_RETEN (1 << 1) /* PHY retention enable/disable */ -#define PHY_POR_ASSERT (1 << 0) /* USB2 28nm PHY POR ASSERT */ - -/* OTG definitions */ -#define OTGSC_INTSTS_MASK (0x7f << 16) -#define OTGSC_ID (1 << 8) -#define OTGSC_BSV (1 << 11) -#define OTGSC_IDIS (1 << 16) -#define OTGSC_BSVIS (1 << 19) -#define OTGSC_IDIE (1 << 24) -#define OTGSC_BSVIE (1 << 27) - -#endif /* __LINUX_USB_GADGET_MSM72K_UDC_H__ */ -- cgit v1.2.3 From ec7ed7708e009e046d1e16ed53ba4d6048748d07 Mon Sep 17 00:00:00 2001 From: Angelo Dureghello Date: Sat, 28 Oct 2017 00:23:01 +0200 Subject: spi: spi-fsl-dspi: enabling Coldfire mcf5441x dspi Signed-off-by: Angelo Dureghello Signed-off-by: Mark Brown --- include/linux/spi/spi-fsl-dspi.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/spi/spi-fsl-dspi.h (limited to 'include/linux') diff --git a/include/linux/spi/spi-fsl-dspi.h b/include/linux/spi/spi-fsl-dspi.h new file mode 100644 index 000000000000..74c9bae20bf2 --- /dev/null +++ b/include/linux/spi/spi-fsl-dspi.h @@ -0,0 +1,31 @@ +/* + * Freescale DSPI controller driver + * + * Copyright (c) 2017 Angelo Dureghello + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef SPI_FSL_DSPI_HEADER_H +#define SPI_FSL_DSPI_HEADER_H + +/** + * struct fsl_dspi_platform_data - platform data for the Freescale DSPI driver + * @bus_num: board specific identifier for this DSPI driver. + * @cs_num: number of chip selects supported by this DSPI driver. + */ +struct fsl_dspi_platform_data { + u32 cs_num; + u32 bus_num; + u32 sck_cs_delay; + u32 cs_sck_delay; +}; + +#endif /* SPI_FSL_DSPI_HEADER_H */ -- cgit v1.2.3 From 46209401f8f6116bd0b2c2d14a63958e83ffca0b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 3 Nov 2017 11:46:25 +0100 Subject: net: core: introduce mini_Qdisc and eliminate usage of tp->q for clsact fastpath In sch_handle_egress and sch_handle_ingress tp->q is used only in order to update stats. So stats and filter list are the only things that are needed in clsact qdisc fastpath processing. Introduce new mini_Qdisc struct to hold those items. Also, introduce a helper to swap the mini_Qdisc structures in case filter list head changes. This removes need for tp->q usage without added overhead. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 5e02f79b2110..7de7656550c2 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1559,6 +1559,8 @@ enum netdev_priv_flags { * * @rx_handler: handler for received packets * @rx_handler_data: XXX: need comments on this one + * @miniq_ingress: ingress/clsact qdisc specific data for + * ingress processing * @ingress_queue: XXX: need comments on this one * @broadcast: hw bcast address * @@ -1576,7 +1578,8 @@ enum netdev_priv_flags { * @tx_global_lock: XXX: need comments on this one * * @xps_maps: XXX: need comments on this one - * + * @miniq_egress: clsact qdisc specific data for + * egress processing * @watchdog_timeo: Represents the timeout that is used by * the watchdog (see dev_watchdog()) * @watchdog_timer: List of timers @@ -1795,7 +1798,7 @@ struct net_device { void __rcu *rx_handler_data; #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto __rcu *ingress_cl_list; + struct mini_Qdisc __rcu *miniq_ingress; #endif struct netdev_queue __rcu *ingress_queue; #ifdef CONFIG_NETFILTER_INGRESS @@ -1826,7 +1829,7 @@ struct net_device { struct xps_dev_maps __rcu *xps_maps; #endif #ifdef CONFIG_NET_CLS_ACT - struct tcf_proto __rcu *egress_cl_list; + struct mini_Qdisc __rcu *miniq_egress; #endif /* These may be needed for future network-power-down code. */ -- cgit v1.2.3 From a622c641665c32a879348efb2abc389527479ee4 Mon Sep 17 00:00:00 2001 From: Ladislav Michl Date: Wed, 25 Oct 2017 20:42:57 +0200 Subject: memory: omap-gpmc: Remove deprecated gpmc_update_nand_reg() Deprecated gpmc_update_nand_reg() is no longer used, remove. Signed-off-by: Ladislav Michl Signed-off-by: Roger Quadros --- include/linux/omap-gpmc.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/omap-gpmc.h b/include/linux/omap-gpmc.h index fd0de00c0d77..edfa280c3d56 100644 --- a/include/linux/omap-gpmc.h +++ b/include/linux/omap-gpmc.h @@ -36,18 +36,6 @@ static inline struct gpmc_nand_ops *gpmc_omap_get_nand_ops(struct gpmc_nand_regs } #endif /* CONFIG_OMAP_GPMC */ -/*--------------------------------*/ - -/* deprecated APIs */ -#if IS_ENABLED(CONFIG_OMAP_GPMC) -void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs); -#else -static inline void gpmc_update_nand_reg(struct gpmc_nand_regs *reg, int cs) -{ -} -#endif /* CONFIG_OMAP_GPMC */ -/*--------------------------------*/ - extern int gpmc_calc_timings(struct gpmc_timings *gpmc_t, struct gpmc_settings *gpmc_s, struct gpmc_device_timings *dev_t); -- cgit v1.2.3 From 592e254502041f953e84d091eae2c68cba04c10b Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 3 Nov 2017 12:21:21 +0100 Subject: mm: Handle 0 flags in _calc_vm_trans() macro _calc_vm_trans() does not handle the situation when some of the passed flags are 0 (which can happen if these VM flags do not make sense for the architecture). Improve the _calc_vm_trans() macro to return 0 in such situation. Since all passed flags are constant, this does not add any runtime overhead. Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/mman.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mman.h b/include/linux/mman.h index c8367041fafd..edb6cf6a81ed 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -63,8 +63,9 @@ static inline bool arch_validate_prot(unsigned long prot) * ("bit1" and "bit2" must be single bits) */ #define _calc_vm_trans(x, bit1, bit2) \ + ((!(bit1) || !(bit2)) ? 0 : \ ((bit1) <= (bit2) ? ((x) & (bit1)) * ((bit2) / (bit1)) \ - : ((x) & (bit1)) / ((bit1) / (bit2))) + : ((x) & (bit1)) / ((bit1) / (bit2)))) /* * Combine the mmap "prot" argument into "vm_flags" used internally. -- cgit v1.2.3 From 1c9725974074a047f6080eecc62c50a8e840d050 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 1 Nov 2017 16:36:30 +0100 Subject: mm: introduce MAP_SHARED_VALIDATE, a mechanism to safely define new mmap flags The mmap(2) syscall suffers from the ABI anti-pattern of not validating unknown flags. However, proposals like MAP_SYNC need a mechanism to define new behavior that is known to fail on older kernels without the support. Define a new MAP_SHARED_VALIDATE flag pattern that is guaranteed to fail on all legacy mmap implementations. It is worth noting that the original proposal was for a standalone MAP_VALIDATE flag. However, when that could not be supported by all archs Linus observed: I see why you *think* you want a bitmap. You think you want a bitmap because you want to make MAP_VALIDATE be part of MAP_SYNC etc, so that people can do ret = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED | MAP_SYNC, fd, 0); and "know" that MAP_SYNC actually takes. And I'm saying that whole wish is bogus. You're fundamentally depending on special semantics, just make it explicit. It's already not portable, so don't try to make it so. Rename that MAP_VALIDATE as MAP_SHARED_VALIDATE, make it have a value of 0x3, and make people do ret = mmap(NULL, size, PROT_READ | PROT_WRITE, MAP_SHARED_VALIDATE | MAP_SYNC, fd, 0); and then the kernel side is easier too (none of that random garbage playing games with looking at the "MAP_VALIDATE bit", but just another case statement in that map type thing. Boom. Done. Similar to ->fallocate() we also want the ability to validate the support for new flags on a per ->mmap() 'struct file_operations' instance basis. Towards that end arrange for flags to be generically validated against a mmap_supported_flags exported by 'struct file_operations'. By default all existing flags are implicitly supported, but new flags require MAP_SHARED_VALIDATE and per-instance-opt-in. Cc: Jan Kara Cc: Arnd Bergmann Cc: Andy Lutomirski Cc: Andrew Morton Suggested-by: Christoph Hellwig Suggested-by: Linus Torvalds Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/fs.h | 1 + include/linux/mman.h | 39 +++++++++++++++++++++++++++++++++++++++ 2 files changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 13dab191a23e..57added3201d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1701,6 +1701,7 @@ struct file_operations { long (*unlocked_ioctl) (struct file *, unsigned int, unsigned long); long (*compat_ioctl) (struct file *, unsigned int, unsigned long); int (*mmap) (struct file *, struct vm_area_struct *); + unsigned long mmap_supported_flags; int (*open) (struct inode *, struct file *); int (*flush) (struct file *, fl_owner_t id); int (*release) (struct inode *, struct file *); diff --git a/include/linux/mman.h b/include/linux/mman.h index edb6cf6a81ed..74452e3f2536 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -7,6 +7,45 @@ #include #include +/* + * Arrange for legacy / undefined architecture specific flags to be + * ignored by default in LEGACY_MAP_MASK. + */ +#ifndef MAP_32BIT +#define MAP_32BIT 0 +#endif +#ifndef MAP_HUGE_2MB +#define MAP_HUGE_2MB 0 +#endif +#ifndef MAP_HUGE_1GB +#define MAP_HUGE_1GB 0 +#endif +#ifndef MAP_UNINITIALIZED +#define MAP_UNINITIALIZED 0 +#endif + +/* + * The historical set of flags that all mmap implementations implicitly + * support when a ->mmap_validate() op is not provided in file_operations. + */ +#define LEGACY_MAP_MASK (MAP_SHARED \ + | MAP_PRIVATE \ + | MAP_FIXED \ + | MAP_ANONYMOUS \ + | MAP_DENYWRITE \ + | MAP_EXECUTABLE \ + | MAP_UNINITIALIZED \ + | MAP_GROWSDOWN \ + | MAP_LOCKED \ + | MAP_NORESERVE \ + | MAP_POPULATE \ + | MAP_NONBLOCK \ + | MAP_STACK \ + | MAP_HUGETLB \ + | MAP_32BIT \ + | MAP_HUGE_2MB \ + | MAP_HUGE_1GB) + extern int sysctl_overcommit_memory; extern int sysctl_overcommit_ratio; extern unsigned long sysctl_overcommit_kbytes; -- cgit v1.2.3 From d81b8a722f691a3907eb4a6df410ab517ba5bfcc Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:31 +0100 Subject: mm: Remove VM_FAULT_HWPOISON_LARGE_MASK It is unused. Reviewed-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 065d99deb847..ca72b67153d5 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1182,8 +1182,6 @@ static inline void clear_page_pfmemalloc(struct page *page) #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ #define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */ -#define VM_FAULT_HWPOISON_LARGE_MASK 0xf000 /* encodes hpage index for large hwpoison */ - #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ VM_FAULT_FALLBACK) -- cgit v1.2.3 From 9a0dd42251439de635088b97533109a935864a84 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:39 +0100 Subject: dax: Allow dax_iomap_fault() to return pfn For synchronous page fault dax_iomap_fault() will need to return PFN which will then need to be inserted into page tables after fsync() completes. Add necessary parameter to dax_iomap_fault(). Reviewed-by: Christoph Hellwig Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/dax.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index 122197124b9d..e7fa4b8f45bc 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -95,7 +95,7 @@ bool dax_write_cache_enabled(struct dax_device *dax_dev); ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, - const struct iomap_ops *ops); + pfn_t *pfnp, const struct iomap_ops *ops); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); -- cgit v1.2.3 From b6fb293f2497a9841d94f6b57bd2bb2cd222da43 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:41 +0100 Subject: mm: Define MAP_SYNC and VM_SYNC flags Define new MAP_SYNC flag and corresponding VMA VM_SYNC flag. As the MAP_SYNC flag is not part of LEGACY_MAP_MASK, currently it will be refused by all MAP_SHARED_VALIDATE map attempts and silently ignored for everything else. Reviewed-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/mm.h | 1 + include/linux/mman.h | 8 ++++++-- 2 files changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ca72b67153d5..5411cb7442de 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -189,6 +189,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ +#define VM_SYNC 0x00800000 /* Synchronous page faults */ #define VM_ARCH_1 0x01000000 /* Architecture-specific flag */ #define VM_WIPEONFORK 0x02000000 /* Wipe VMA contents in child. */ #define VM_DONTDUMP 0x04000000 /* Do not include in the core dump */ diff --git a/include/linux/mman.h b/include/linux/mman.h index 74452e3f2536..3427bf3daef5 100644 --- a/include/linux/mman.h +++ b/include/linux/mman.h @@ -9,7 +9,7 @@ /* * Arrange for legacy / undefined architecture specific flags to be - * ignored by default in LEGACY_MAP_MASK. + * ignored by mmap handling code. */ #ifndef MAP_32BIT #define MAP_32BIT 0 @@ -23,6 +23,9 @@ #ifndef MAP_UNINITIALIZED #define MAP_UNINITIALIZED 0 #endif +#ifndef MAP_SYNC +#define MAP_SYNC 0 +#endif /* * The historical set of flags that all mmap implementations implicitly @@ -126,7 +129,8 @@ calc_vm_flag_bits(unsigned long flags) { return _calc_vm_trans(flags, MAP_GROWSDOWN, VM_GROWSDOWN ) | _calc_vm_trans(flags, MAP_DENYWRITE, VM_DENYWRITE ) | - _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ); + _calc_vm_trans(flags, MAP_LOCKED, VM_LOCKED ) | + _calc_vm_trans(flags, MAP_SYNC, VM_SYNC ); } unsigned long vm_commit_limit(void); -- cgit v1.2.3 From caa51d26f85c248f1c4f43a870ad3ef84bf9eb8f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:42 +0100 Subject: dax, iomap: Add support for synchronous faults Add a flag to iomap interface informing the caller that inode needs fdstasync(2) for returned extent to become persistent and use it in DAX fault code so that we don't map such extents into page tables immediately. Instead we propagate the information that fdatasync(2) is necessary from dax_iomap_fault() with a new VM_FAULT_NEEDDSYNC flag. Filesystem fault handler is then responsible for calling fdatasync(2) and inserting pfn into page tables. Reviewed-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/iomap.h | 5 +++++ include/linux/mm.h | 6 +++++- 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iomap.h b/include/linux/iomap.h index f64dc6ce5161..73e3b7085dbe 100644 --- a/include/linux/iomap.h +++ b/include/linux/iomap.h @@ -22,6 +22,11 @@ struct vm_fault; * Flags for all iomap mappings: */ #define IOMAP_F_NEW 0x01 /* blocks have been newly allocated */ +/* + * IOMAP_F_DIRTY indicates the inode has uncommitted metadata needed to access + * written data and requires fdatasync to commit them to persistent storage. + */ +#define IOMAP_F_DIRTY 0x02 /* * Flags that only need to be reported for IOMAP_REPORT requests: diff --git a/include/linux/mm.h b/include/linux/mm.h index 5411cb7442de..f57e55782d7d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1182,6 +1182,9 @@ static inline void clear_page_pfmemalloc(struct page *page) #define VM_FAULT_RETRY 0x0400 /* ->fault blocked, must retry */ #define VM_FAULT_FALLBACK 0x0800 /* huge page fault failed, fall back to small */ #define VM_FAULT_DONE_COW 0x1000 /* ->fault has fully handled COW */ +#define VM_FAULT_NEEDDSYNC 0x2000 /* ->fault did not modify page tables + * and needs fsync() to complete (for + * synchronous page faults in DAX) */ #define VM_FAULT_ERROR (VM_FAULT_OOM | VM_FAULT_SIGBUS | VM_FAULT_SIGSEGV | \ VM_FAULT_HWPOISON | VM_FAULT_HWPOISON_LARGE | \ @@ -1199,7 +1202,8 @@ static inline void clear_page_pfmemalloc(struct page *page) { VM_FAULT_LOCKED, "LOCKED" }, \ { VM_FAULT_RETRY, "RETRY" }, \ { VM_FAULT_FALLBACK, "FALLBACK" }, \ - { VM_FAULT_DONE_COW, "DONE_COW" } + { VM_FAULT_DONE_COW, "DONE_COW" }, \ + { VM_FAULT_NEEDDSYNC, "NEEDDSYNC" } /* Encode hstate index for a hwpoisoned large page */ #define VM_FAULT_SET_HINDEX(x) ((x) << 12) -- cgit v1.2.3 From 71eab6dfd91eabf06fe782a590c07e8a0795f5ea Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:43 +0100 Subject: dax: Implement dax_finish_sync_fault() Implement a function that filesystems can call to finish handling of synchronous page faults. It takes care of syncing appropriare file range and insertion of page table entry. Reviewed-by: Ross Zwisler Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/dax.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dax.h b/include/linux/dax.h index e7fa4b8f45bc..d403f78b706c 100644 --- a/include/linux/dax.h +++ b/include/linux/dax.h @@ -96,6 +96,8 @@ ssize_t dax_iomap_rw(struct kiocb *iocb, struct iov_iter *iter, const struct iomap_ops *ops); int dax_iomap_fault(struct vm_fault *vmf, enum page_entry_size pe_size, pfn_t *pfnp, const struct iomap_ops *ops); +int dax_finish_sync_fault(struct vm_fault *vmf, enum page_entry_size pe_size, + pfn_t pfn); int dax_delete_mapping_entry(struct address_space *mapping, pgoff_t index); int dax_invalidate_mapping_entry_sync(struct address_space *mapping, pgoff_t index); -- cgit v1.2.3 From b8a6176c214cf9aa2679131ed7e4515cddaadc33 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 1 Nov 2017 16:36:45 +0100 Subject: ext4: Support for synchronous DAX faults We return IOMAP_F_DIRTY flag from ext4_iomap_begin() when asked to prepare blocks for writing and the inode has some uncommitted metadata changes. In the fault handler ext4_dax_fault() we then detect this case (through VM_FAULT_NEEDDSYNC return value) and call helper dax_finish_sync_fault() to flush metadata changes and insert page table entry. Note that this will also dirty corresponding radix tree entry which is what we want - fsync(2) will still provide data integrity guarantees for applications not using userspace flushing. And applications using userspace flushing can avoid calling fsync(2) and thus avoid the performance overhead. Reviewed-by: Ross Zwisler Signed-off-by: Jan Kara Signed-off-by: Dan Williams --- include/linux/jbd2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 606b6bce3a5b..296d1e0ea87b 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1367,6 +1367,7 @@ int jbd2_log_start_commit(journal_t *journal, tid_t tid); int __jbd2_log_start_commit(journal_t *journal, tid_t tid); int jbd2_journal_start_commit(journal_t *journal, tid_t *tid); int jbd2_log_wait_commit(journal_t *journal, tid_t tid); +int jbd2_transaction_committed(journal_t *journal, tid_t tid); int jbd2_complete_transaction(journal_t *journal, tid_t tid); int jbd2_log_do_checkpoint(journal_t *journal); int jbd2_trans_will_send_data_barrier(journal_t *journal, tid_t tid); -- cgit v1.2.3 From ada69a1639eca54ff74d839a6513c43db8d57d70 Mon Sep 17 00:00:00 2001 From: Gilad Ben-Yossef Date: Wed, 18 Oct 2017 08:00:38 +0100 Subject: crypto: introduce crypto wait for async op Invoking a possibly async. crypto op and waiting for completion while correctly handling backlog processing is a common task in the crypto API implementation and outside users of it. This patch adds a generic implementation for doing so in preparation for using it across the board instead of hand rolled versions. Signed-off-by: Gilad Ben-Yossef CC: Eric Biggers CC: Jonathan Cameron Signed-off-by: Herbert Xu --- include/linux/crypto.h | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crypto.h b/include/linux/crypto.h index 84da9978e951..78508ca4b108 100644 --- a/include/linux/crypto.h +++ b/include/linux/crypto.h @@ -24,6 +24,7 @@ #include #include #include +#include /* * Autoloaded crypto modules should only use a prefixed name to avoid allowing @@ -467,6 +468,45 @@ struct crypto_alg { struct module *cra_module; } CRYPTO_MINALIGN_ATTR; +/* + * A helper struct for waiting for completion of async crypto ops + */ +struct crypto_wait { + struct completion completion; + int err; +}; + +/* + * Macro for declaring a crypto op async wait object on stack + */ +#define DECLARE_CRYPTO_WAIT(_wait) \ + struct crypto_wait _wait = { \ + COMPLETION_INITIALIZER_ONSTACK((_wait).completion), 0 } + +/* + * Async ops completion helper functioons + */ +void crypto_req_done(struct crypto_async_request *req, int err); + +static inline int crypto_wait_req(int err, struct crypto_wait *wait) +{ + switch (err) { + case -EINPROGRESS: + case -EBUSY: + wait_for_completion(&wait->completion); + reinit_completion(&wait->completion); + err = wait->err; + break; + }; + + return err; +} + +static inline void crypto_init_wait(struct crypto_wait *wait) +{ + init_completion(&wait->completion); +} + /* * Algorithm registration interface. */ -- cgit v1.2.3 From 27e64b4be4b863d884f3ec1686a2f744ae93a1b9 Mon Sep 17 00:00:00 2001 From: Dave Martin Date: Tue, 31 Oct 2017 15:50:53 +0000 Subject: regset: Add support for dynamically sized regsets Currently the regset API doesn't allow for the possibility that regsets (or at least, the amount of meaningful data in a regset) may change in size. In particular, this results in useless padding being added to coredumps if a regset's current size is smaller than its theoretical maximum size. This patch adds a get_size() function to struct user_regset. Individual regset implementations can implement this function to return the current size of the regset data. A regset_size() function is added to provide callers with an abstract interface for determining the size of a regset without needing to know whether the regset is dynamically sized or not. The only affected user of this interface is the ELF coredump code: This patch ports ELF coredump to dump regsets with their actual size in the coredump. This has no effect except for new regsets that are dynamically sized and provide a get_size() implementation. Signed-off-by: Dave Martin Reviewed-by: Catalin Marinas Cc: Oleg Nesterov Cc: Alexander Viro Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Cc: Dmitry Safonov Cc: H. J. Lu Signed-off-by: Will Deacon --- include/linux/regset.h | 67 ++++++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 60 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regset.h b/include/linux/regset.h index 8e0c9febf495..494cedaafdf2 100644 --- a/include/linux/regset.h +++ b/include/linux/regset.h @@ -106,6 +106,28 @@ typedef int user_regset_writeback_fn(struct task_struct *target, const struct user_regset *regset, int immediate); +/** + * user_regset_get_size_fn - type of @get_size function in &struct user_regset + * @target: thread being examined + * @regset: regset being examined + * + * This call is optional; usually the pointer is %NULL. + * + * When provided, this function must return the current size of regset + * data, as observed by the @get function in &struct user_regset. The + * value returned must be a multiple of @size. The returned size is + * required to be valid only until the next time (if any) @regset is + * modified for @target. + * + * This function is intended for dynamically sized regsets. A regset + * that is statically sized does not need to implement it. + * + * This function should not be called directly: instead, callers should + * call regset_size() to determine the current size of a regset. + */ +typedef unsigned int user_regset_get_size_fn(struct task_struct *target, + const struct user_regset *regset); + /** * struct user_regset - accessible thread CPU state * @n: Number of slots (registers). @@ -117,19 +139,33 @@ typedef int user_regset_writeback_fn(struct task_struct *target, * @set: Function to store values. * @active: Function to report if regset is active, or %NULL. * @writeback: Function to write data back to user memory, or %NULL. + * @get_size: Function to return the regset's size, or %NULL. * * This data structure describes a machine resource we call a register set. * This is part of the state of an individual thread, not necessarily * actual CPU registers per se. A register set consists of a number of * similar slots, given by @n. Each slot is @size bytes, and aligned to - * @align bytes (which is at least @size). + * @align bytes (which is at least @size). For dynamically-sized + * regsets, @n must contain the maximum possible number of slots for the + * regset, and @get_size must point to a function that returns the + * current regset size. * - * These functions must be called only on the current thread or on a - * thread that is in %TASK_STOPPED or %TASK_TRACED state, that we are - * guaranteed will not be woken up and return to user mode, and that we - * have called wait_task_inactive() on. (The target thread always might - * wake up for SIGKILL while these functions are working, in which case - * that thread's user_regset state might be scrambled.) + * Callers that need to know only the current size of the regset and do + * not care about its internal structure should call regset_size() + * instead of inspecting @n or calling @get_size. + * + * For backward compatibility, the @get and @set methods must pad to, or + * accept, @n * @size bytes, even if the current regset size is smaller. + * The precise semantics of these operations depend on the regset being + * accessed. + * + * The functions to which &struct user_regset members point must be + * called only on the current thread or on a thread that is in + * %TASK_STOPPED or %TASK_TRACED state, that we are guaranteed will not + * be woken up and return to user mode, and that we have called + * wait_task_inactive() on. (The target thread always might wake up for + * SIGKILL while these functions are working, in which case that + * thread's user_regset state might be scrambled.) * * The @pos argument must be aligned according to @align; the @count * argument must be a multiple of @size. These functions are not @@ -156,6 +192,7 @@ struct user_regset { user_regset_set_fn *set; user_regset_active_fn *active; user_regset_writeback_fn *writeback; + user_regset_get_size_fn *get_size; unsigned int n; unsigned int size; unsigned int align; @@ -371,5 +408,21 @@ static inline int copy_regset_from_user(struct task_struct *target, return regset->set(target, regset, offset, size, NULL, data); } +/** + * regset_size - determine the current size of a regset + * @target: thread to be examined + * @regset: regset to be examined + * + * Note that the returned size is valid only until the next time + * (if any) @regset is modified for @target. + */ +static inline unsigned int regset_size(struct task_struct *target, + const struct user_regset *regset) +{ + if (!regset->get_size) + return regset->n * regset->size; + else + return regset->get_size(target, regset); +} #endif /* */ -- cgit v1.2.3 From 8977f563845bdd61fc61c4dfb399270b7d5667c6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:29:48 +0300 Subject: block: move REQ_NOWAIT This flag should be before the operation-specific REQ_NOUNMAP bit. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 3385c89f402e..c4c6c8bced2e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -224,11 +224,11 @@ enum req_flag_bits { __REQ_PREFLUSH, /* request for cache flush */ __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_BACKGROUND, /* background IO */ + __REQ_NOWAIT, /* Don't wait if request will block */ /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ - __REQ_NOWAIT, /* Don't wait if request will block */ __REQ_NR_BITS, /* stops here */ }; @@ -245,9 +245,9 @@ enum req_flag_bits { #define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) +#define REQ_NOWAIT (1ULL << __REQ_NOWAIT) #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) -#define REQ_NOWAIT (1ULL << __REQ_NOWAIT) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) -- cgit v1.2.3 From 96222bcc732d0504363dc772637c50e53b4bd41e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:29:49 +0300 Subject: block: add REQ_DRV bit Set aside a bit in the request/bio flags for driver use. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c4c6c8bced2e..1b04085255fb 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -229,6 +229,9 @@ enum req_flag_bits { /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ + /* for driver use */ + __REQ_DRV, + __REQ_NR_BITS, /* stops here */ }; @@ -249,6 +252,8 @@ enum req_flag_bits { #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) +#define REQ_DRV (1ULL << __REQ_DRV) + #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) -- cgit v1.2.3 From f421e1d9ade4e1b88183e54425cf50e390d16a7f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:29:50 +0300 Subject: block: provide a direct_make_request helper MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This helper allows reinserting a bio into a new queue without much overhead, but requires all queue limits to be the same for the upper and lower queues, and it does not provide any recursion preventions. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Javier González Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 72637028f3c9..eda3d25c0f68 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -920,6 +920,7 @@ static inline void rq_flush_dcache_pages(struct request *rq) extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); extern blk_qc_t generic_make_request(struct bio *bio); +extern blk_qc_t direct_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); -- cgit v1.2.3 From ef71de8b15d891b27b8c983a9a8972b11cb4576a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:29:51 +0300 Subject: block: add a blk_steal_bios helper This helpers allows to bounce steal the uncompleted bios from a request so that they can be reissued on another path. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index eda3d25c0f68..fddda6a1f9b5 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1094,6 +1094,8 @@ extern struct request *blk_peek_request(struct request_queue *q); extern void blk_start_request(struct request *rq); extern struct request *blk_fetch_request(struct request_queue *q); +void blk_steal_bios(struct bio_list *list, struct request *rq); + /* * Request completion related functions. * -- cgit v1.2.3 From 517bf3c306bad4fe0da631f90ae2ec40924dee2b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:29:52 +0300 Subject: block: don't look at the struct device dev_t in disk_devt The hidden gendisks introduced in the next patch need to keep the dev field in their struct device empty so that udev won't try to create block device nodes for them. To support that rewrite disk_devt to look at the major and first_minor fields in the gendisk itself instead of looking into the struct device. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ea652bfcd675..5c0ed5db33c2 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -234,7 +234,7 @@ static inline bool disk_part_scan_enabled(struct gendisk *disk) static inline dev_t disk_devt(struct gendisk *disk) { - return disk_to_dev(disk)->devt; + return MKDEV(disk->major, disk->first_minor); } static inline dev_t part_devt(struct hd_struct *part) -- cgit v1.2.3 From 8ddcd653257c18a669fcb75ee42c37054908e0d6 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:29:53 +0300 Subject: block: introduce GENHD_FL_HIDDEN With this flag a driver can create a gendisk that can be used for I/O submission inside the kernel, but which is not registered as user facing block device. This will be useful for the NVMe multipath implementation. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 5c0ed5db33c2..93aae3476f58 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -140,6 +140,7 @@ struct hd_struct { #define GENHD_FL_NATIVE_CAPACITY 128 #define GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE 256 #define GENHD_FL_NO_PART_SCAN 512 +#define GENHD_FL_HIDDEN 1024 enum { DISK_EVENT_MEDIA_CHANGE = 1 << 0, /* media changed */ -- cgit v1.2.3 From ea435e1b9392a33deceaea2a16ebaa3397bead93 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 2 Nov 2017 21:29:54 +0300 Subject: block: add a poll_fn callback to struct request_queue That we we can also poll non blk-mq queues. Mostly needed for the NVMe multipath code, but could also be useful elsewhere. Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index fddda6a1f9b5..225617dd0a3f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -266,6 +266,7 @@ struct blk_queue_ctx; typedef void (request_fn_proc) (struct request_queue *q); typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); +typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); @@ -408,6 +409,7 @@ struct request_queue { request_fn_proc *request_fn; make_request_fn *make_request_fn; + poll_q_fn *poll_fn; prep_rq_fn *prep_rq_fn; unprep_rq_fn *unprep_rq_fn; softirq_done_fn *softirq_done_fn; @@ -975,7 +977,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -bool blk_mq_poll(struct request_queue *q, blk_qc_t cookie); +bool blk_poll(struct request_queue *q, blk_qc_t cookie); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { -- cgit v1.2.3 From 973b546451fdf11e518cc96d1b137af893a38db5 Mon Sep 17 00:00:00 2001 From: Lu Baolu Date: Fri, 3 Nov 2017 10:51:33 -0600 Subject: iommu/vt-d: Clear Page Request Overflow fault bit Currently Page Request Overflow bit in IOMMU Fault Status register is not cleared. Not clearing this bit would mean that any future page-request is going to be automatically dropped by IOMMU. Suggested-by: Ashok Raj Signed-off-by: Lu Baolu Signed-off-by: Alex Williamson --- include/linux/intel-iommu.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 485a5b48f038..f3274d9f46a2 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -212,6 +212,7 @@ #define DMA_FSTS_IQE (1 << 4) #define DMA_FSTS_ICE (1 << 5) #define DMA_FSTS_ITE (1 << 6) +#define DMA_FSTS_PRO (1 << 7) #define dma_fsts_fault_record_index(s) (((s) >> 8) & 0xff) /* FRCD_REG, 32 bits access */ -- cgit v1.2.3 From 722c856d46c6ca74a246b54a72f14751fec01aae Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 1 Nov 2017 14:25:23 -0500 Subject: platform/x86: wmi: Add new method wmidev_evaluate_method MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Drivers properly using the wmibus can pass their wmi_device pointer rather than the GUID back to the WMI bus to evaluate the proper methods. Any "new" drivers added that use the WMI bus should use this rather than the old wmi_evaluate_method that would take the GUID. Signed-off-by: Mario Limonciello Reviewed-by: Edward O'Callaghan Reviewed-by: Pali Rohár Signed-off-by: Darren Hart (VMware) --- include/linux/wmi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index cd0d7734dc49..2cd10c3b89e9 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -26,6 +26,12 @@ struct wmi_device { bool setable; }; +/* evaluate the ACPI method associated with this device */ +extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev, + u8 instance, u32 method_id, + const struct acpi_buffer *in, + struct acpi_buffer *out); + /* Caller must kfree the result. */ extern union acpi_object *wmidev_block_query(struct wmi_device *wdev, u8 instance); -- cgit v1.2.3 From f97e058cfe8032504e310bd5c20e35d640ef2858 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 1 Nov 2017 14:25:28 -0500 Subject: platform/x86: wmi: Don't allow drivers to get each other's GUIDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The only driver using this was dell-wmi, and it really was a hack. The driver was getting a data attribute from another driver and this type of action should not be encouraged. Rather drivers that need to interact with one another should pass data back and forth via exported functions. Signed-off-by: Mario Limonciello Reviewed-by: Edward O'Callaghan Reviewed-by: Pali Rohár Signed-off-by: Darren Hart (VMware) --- include/linux/wmi.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index 2cd10c3b89e9..ddee427e0721 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -36,10 +36,6 @@ extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev, extern union acpi_object *wmidev_block_query(struct wmi_device *wdev, u8 instance); -/* Gets another device on the same bus. Caller must put_device the result. */ -extern struct wmi_device *wmidev_get_other_guid(struct wmi_device *wdev, - const char *guid_string); - struct wmi_device_id { const char *guid_string; }; -- cgit v1.2.3 From 44b6b7661132b1b0e5fd3147ded66f1e4a817ca9 Mon Sep 17 00:00:00 2001 From: Mario Limonciello Date: Wed, 1 Nov 2017 14:25:35 -0500 Subject: platform/x86: wmi: create userspace interface for drivers For WMI operations that are only Set or Query readable and writable sysfs attributes created by WMI vendor drivers or the bus driver makes sense. For other WMI operations that are run on Method, there needs to be a way to guarantee to userspace that the results from the method call belong to the data request to the method call. Sysfs attributes don't work well in this scenario because two userspace processes may be competing at reading/writing an attribute and step on each other's data. When a WMI vendor driver declares a callback method in the wmi_driver the WMI bus driver will create a character device that maps to that function. This callback method will be responsible for filtering invalid requests and performing the actual call. That character device will correspond to this path: /dev/wmi/$driver Performing read() on this character device will provide the size of the buffer that the character device needs to perform calls. This buffer size can be set by vendor drivers through a new symbol or when MOF parsing is available by the MOF. Performing ioctl() on this character device will be interpretd by the WMI bus driver. It will perform sanity tests for size of data, test them for a valid instance, copy the data from userspace and pass iton to the vendor driver to further process and run. This creates an implicit policy that each driver will only be allowed a single character device. If a module matches multiple GUID's, the wmi_devices will need to be all handled by the same wmi_driver. The WMI vendor drivers will be responsible for managing inappropriate access to this character device and proper locking on data used by it. When a WMI vendor driver is unloaded the WMI bus driver will clean up the character device and any memory allocated for the call. Signed-off-by: Mario Limonciello Reviewed-by: Edward O'Callaghan Signed-off-by: Darren Hart (VMware) --- include/linux/wmi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/wmi.h b/include/linux/wmi.h index ddee427e0721..4757cb5077e5 100644 --- a/include/linux/wmi.h +++ b/include/linux/wmi.h @@ -18,6 +18,7 @@ #include #include +#include struct wmi_device { struct device dev; @@ -36,6 +37,8 @@ extern acpi_status wmidev_evaluate_method(struct wmi_device *wdev, extern union acpi_object *wmidev_block_query(struct wmi_device *wdev, u8 instance); +extern int set_required_buffer_size(struct wmi_device *wdev, u64 length); + struct wmi_device_id { const char *guid_string; }; @@ -47,6 +50,8 @@ struct wmi_driver { int (*probe)(struct wmi_device *wdev); int (*remove)(struct wmi_device *wdev); void (*notify)(struct wmi_device *device, union acpi_object *data); + long (*filter_callback)(struct wmi_device *wdev, unsigned int cmd, + struct wmi_ioctl_buffer *arg); }; extern int __must_check __wmi_driver_register(struct wmi_driver *driver, -- cgit v1.2.3 From 5fd54ace4721fc5ce2bb5aef6318fcf17f421460 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Fri, 3 Nov 2017 11:28:30 +0100 Subject: USB: add SPDX identifiers to all remaining files in drivers/usb/ It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the drivers/usb/ and include/linux/usb* files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Signed-off-by: Greg Kroah-Hartman Acked-by: Felipe Balbi Acked-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/association.h | 1 + include/linux/usb/audio-v2.h | 1 + include/linux/usb/audio.h | 1 + include/linux/usb/c67x00.h | 1 + include/linux/usb/cdc-wdm.h | 1 + include/linux/usb/cdc.h | 1 + include/linux/usb/cdc_ncm.h | 1 + include/linux/usb/composite.h | 1 + include/linux/usb/ehci_def.h | 1 + include/linux/usb/ehci_pdriver.h | 1 + include/linux/usb/g_hid.h | 1 + include/linux/usb/gadget.h | 1 + include/linux/usb/gpio_vbus.h | 1 + include/linux/usb/hcd.h | 1 + include/linux/usb/input.h | 1 + include/linux/usb/isp1301.h | 1 + include/linux/usb/m66592.h | 1 + include/linux/usb/musb-ux500.h | 1 + include/linux/usb/net2280.h | 1 + include/linux/usb/of.h | 1 + include/linux/usb/ohci_pdriver.h | 1 + include/linux/usb/otg-fsm.h | 1 + include/linux/usb/phy_companion.h | 1 + include/linux/usb/r8a66597.h | 1 + include/linux/usb/renesas_usbhs.h | 1 + include/linux/usb/rndis_host.h | 1 + include/linux/usb/samsung_usb_phy.h | 1 + include/linux/usb/serial.h | 1 + include/linux/usb/storage.h | 1 + include/linux/usb/tegra_usb_phy.h | 1 + include/linux/usb/tilegx.h | 1 + include/linux/usb/ulpi.h | 1 + include/linux/usb/usb338x.h | 1 + include/linux/usb/usbnet.h | 1 + include/linux/usb/wusb-wa.h | 1 + include/linux/usb/wusb.h | 1 + include/linux/usb/xhci-dbgp.h | 1 + include/linux/usbdevice_fs.h | 1 + 38 files changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/association.h b/include/linux/usb/association.h index 0a4a18b3c1bb..d7f3cb9b9db5 100644 --- a/include/linux/usb/association.h +++ b/include/linux/usb/association.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Wireless USB - Cable Based Association * diff --git a/include/linux/usb/audio-v2.h b/include/linux/usb/audio-v2.h index fd73bc0e9027..3119d0ace7aa 100644 --- a/include/linux/usb/audio-v2.h +++ b/include/linux/usb/audio-v2.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (c) 2010 Daniel Mack * diff --git a/include/linux/usb/audio.h b/include/linux/usb/audio.h index 3d84619110a4..170acd500ea1 100644 --- a/include/linux/usb/audio.h +++ b/include/linux/usb/audio.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * -- USB Audio definitions. * diff --git a/include/linux/usb/c67x00.h b/include/linux/usb/c67x00.h index 83c6b45470ca..2fc39e3b7281 100644 --- a/include/linux/usb/c67x00.h +++ b/include/linux/usb/c67x00.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * usb_c67x00.h: platform definitions for the Cypress C67X00 USB chip * diff --git a/include/linux/usb/cdc-wdm.h b/include/linux/usb/cdc-wdm.h index 0b3f4295c025..9b895f93d8de 100644 --- a/include/linux/usb/cdc-wdm.h +++ b/include/linux/usb/cdc-wdm.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * USB CDC Device Management subdriver * diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index b5706f94ee9e..35d784cf32a4 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * USB CDC common helpers * diff --git a/include/linux/usb/cdc_ncm.h b/include/linux/usb/cdc_ncm.h index 1a59699cf82a..1646c06989df 100644 --- a/include/linux/usb/cdc_ncm.h +++ b/include/linux/usb/cdc_ncm.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: (GPL-2.0 OR BSD-2-Clause) /* * Copyright (C) ST-Ericsson 2010-2012 * Contact: Alexey Orishko diff --git a/include/linux/usb/composite.h b/include/linux/usb/composite.h index f665d2ceac20..cef0e44601f8 100644 --- a/include/linux/usb/composite.h +++ b/include/linux/usb/composite.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * composite.h -- framework for usb gadgets which are composite devices * diff --git a/include/linux/usb/ehci_def.h b/include/linux/usb/ehci_def.h index e479033bd782..a15ce99dfc2d 100644 --- a/include/linux/usb/ehci_def.h +++ b/include/linux/usb/ehci_def.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (c) 2001-2002 by David Brownell * diff --git a/include/linux/usb/ehci_pdriver.h b/include/linux/usb/ehci_pdriver.h index db0431b39a63..dd742afdc03f 100644 --- a/include/linux/usb/ehci_pdriver.h +++ b/include/linux/usb/ehci_pdriver.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2012 Hauke Mehrtens * diff --git a/include/linux/usb/g_hid.h b/include/linux/usb/g_hid.h index 50f5745df28c..7581e488c237 100644 --- a/include/linux/usb/g_hid.h +++ b/include/linux/usb/g_hid.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * g_hid.h -- Header file for USB HID gadget driver * diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 523e5a2b5015..0142f3af0da6 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * * diff --git a/include/linux/usb/gpio_vbus.h b/include/linux/usb/gpio_vbus.h index 837bba604a0b..804fb06cf6d6 100644 --- a/include/linux/usb/gpio_vbus.h +++ b/include/linux/usb/gpio_vbus.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * A simple GPIO VBUS sensing driver for B peripheral only devices * with internal transceivers. diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index a1f03ebfde47..176900528822 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (c) 2001-2002 by David Brownell * diff --git a/include/linux/usb/input.h b/include/linux/usb/input.h index 0e010b220e85..974befa72ac0 100644 --- a/include/linux/usb/input.h +++ b/include/linux/usb/input.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2005 Dmitry Torokhov * diff --git a/include/linux/usb/isp1301.h b/include/linux/usb/isp1301.h index d3a851c28b6a..dedb3b2473e8 100644 --- a/include/linux/usb/isp1301.h +++ b/include/linux/usb/isp1301.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * NXP ISP1301 USB transceiver driver * diff --git a/include/linux/usb/m66592.h b/include/linux/usb/m66592.h index a4ba31ab2fed..2dfe68183495 100644 --- a/include/linux/usb/m66592.h +++ b/include/linux/usb/m66592.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * M66592 driver platform data * diff --git a/include/linux/usb/musb-ux500.h b/include/linux/usb/musb-ux500.h index 1e2c7130f6e1..c4b7ad9850ca 100644 --- a/include/linux/usb/musb-ux500.h +++ b/include/linux/usb/musb-ux500.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2013 ST-Ericsson AB * diff --git a/include/linux/usb/net2280.h b/include/linux/usb/net2280.h index 725120224472..08b85caecfaf 100644 --- a/include/linux/usb/net2280.h +++ b/include/linux/usb/net2280.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * NetChip 2280 high/full speed USB device controller. * Unlike many such controllers, this one talks PCI. diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h index 4031f47629ec..6cbe7a5c2b57 100644 --- a/include/linux/usb/of.h +++ b/include/linux/usb/of.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * OF helpers for usb devices. * diff --git a/include/linux/usb/ohci_pdriver.h b/include/linux/usb/ohci_pdriver.h index 012f2b7eb2b6..7eb16cf587ee 100644 --- a/include/linux/usb/ohci_pdriver.h +++ b/include/linux/usb/ohci_pdriver.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2012 Hauke Mehrtens * diff --git a/include/linux/usb/otg-fsm.h b/include/linux/usb/otg-fsm.h index a0a8f878503c..e78eb577d0fa 100644 --- a/include/linux/usb/otg-fsm.h +++ b/include/linux/usb/otg-fsm.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* Copyright (C) 2007,2008 Freescale Semiconductor, Inc. * * This program is free software; you can redistribute it and/or modify it diff --git a/include/linux/usb/phy_companion.h b/include/linux/usb/phy_companion.h index edd2ec23d282..407f530061cd 100644 --- a/include/linux/usb/phy_companion.h +++ b/include/linux/usb/phy_companion.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * phy-companion.h -- phy companion to indicate the comparator part of PHY * diff --git a/include/linux/usb/r8a66597.h b/include/linux/usb/r8a66597.h index 55805f9dcf21..c0753d026bbf 100644 --- a/include/linux/usb/r8a66597.h +++ b/include/linux/usb/r8a66597.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * R8A66597 driver platform data * diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index d2f41e4dd7a8..67102f3d59d4 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-1.0+ /* * Renesas USB * diff --git a/include/linux/usb/rndis_host.h b/include/linux/usb/rndis_host.h index d44ef85db177..809bccd08455 100644 --- a/include/linux/usb/rndis_host.h +++ b/include/linux/usb/rndis_host.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Host Side support for RNDIS Networking Links * Copyright (C) 2005 by David Brownell diff --git a/include/linux/usb/samsung_usb_phy.h b/include/linux/usb/samsung_usb_phy.h index 916782699f1c..dc0071741695 100644 --- a/include/linux/usb/samsung_usb_phy.h +++ b/include/linux/usb/samsung_usb_phy.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * Copyright (C) 2012 Samsung Electronics Co.Ltd * http://www.samsung.com/ diff --git a/include/linux/usb/serial.h b/include/linux/usb/serial.h index e2f0ab07eea5..106551a5616e 100644 --- a/include/linux/usb/serial.h +++ b/include/linux/usb/serial.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * USB Serial Converter stuff * diff --git a/include/linux/usb/storage.h b/include/linux/usb/storage.h index 305ee8db7faf..e0240f864548 100644 --- a/include/linux/usb/storage.h +++ b/include/linux/usb/storage.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 #ifndef __LINUX_USB_STORAGE_H #define __LINUX_USB_STORAGE_H diff --git a/include/linux/usb/tegra_usb_phy.h b/include/linux/usb/tegra_usb_phy.h index 1de16c324ec8..d641ea1660b7 100644 --- a/include/linux/usb/tegra_usb_phy.h +++ b/include/linux/usb/tegra_usb_phy.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright (C) 2010 Google, Inc. * diff --git a/include/linux/usb/tilegx.h b/include/linux/usb/tilegx.h index 2d65e3435680..817908573fe8 100644 --- a/include/linux/usb/tilegx.h +++ b/include/linux/usb/tilegx.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Copyright 2012 Tilera Corporation. All Rights Reserved. * diff --git a/include/linux/usb/ulpi.h b/include/linux/usb/ulpi.h index 5f07407a367a..c515765adab7 100644 --- a/include/linux/usb/ulpi.h +++ b/include/linux/usb/ulpi.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * ulpi.h -- ULPI defines and function prorotypes * diff --git a/include/linux/usb/usb338x.h b/include/linux/usb/usb338x.h index 11525d8d89a7..7189e3387bf9 100644 --- a/include/linux/usb/usb338x.h +++ b/include/linux/usb/usb338x.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * USB 338x super/high/full speed USB device controller. * Unlike many such controllers, this one talks PCI. diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index 97116379db5f..a69877734c4e 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /* * USB Networking Link Interface * diff --git a/include/linux/usb/wusb-wa.h b/include/linux/usb/wusb-wa.h index c1257130769b..64a840b5106e 100644 --- a/include/linux/usb/wusb-wa.h +++ b/include/linux/usb/wusb-wa.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Wireless USB Wire Adapter constants and structures. * diff --git a/include/linux/usb/wusb.h b/include/linux/usb/wusb.h index eeb28329fa3c..9e4a3213f2c2 100644 --- a/include/linux/usb/wusb.h +++ b/include/linux/usb/wusb.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Wireless USB Standard Definitions * Event Size Tables diff --git a/include/linux/usb/xhci-dbgp.h b/include/linux/usb/xhci-dbgp.h index 80c1cca1f529..0a37f1283bf0 100644 --- a/include/linux/usb/xhci-dbgp.h +++ b/include/linux/usb/xhci-dbgp.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * Standalone xHCI debug capability driver * diff --git a/include/linux/usbdevice_fs.h b/include/linux/usbdevice_fs.h index 04a26285416c..79aab0065ec8 100644 --- a/include/linux/usbdevice_fs.h +++ b/include/linux/usbdevice_fs.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0+ /*****************************************************************************/ /* -- cgit v1.2.3 From a8497b31fee650846fcc5a1e41f4ef22318a9925 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 24 Oct 2017 03:00:18 -0700 Subject: tty: cyclades: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Moves timer structures from global to attached to struct cyclades_port. Cc: Jiri Slaby Signed-off-by: Kees Cook Signed-off-by: Greg Kroah-Hartman --- include/linux/cyclades.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cyclades.h b/include/linux/cyclades.h index 19ae518f5471..3638eb2d52f6 100644 --- a/include/linux/cyclades.h +++ b/include/linux/cyclades.h @@ -156,6 +156,9 @@ struct cyclades_port { struct cyclades_icount icount; struct completion shutdown_wait; int throttle; +#ifdef CONFIG_CYZ_INTR + struct timer_list rx_full_timer; +#endif }; #define CLOSING_WAIT_DELAY 30*HZ -- cgit v1.2.3 From 88022d7201e96b43f1754b0358fc6bcd8dbdcde1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 5 Nov 2017 02:21:12 +0800 Subject: blk-mq: don't handle failure in .get_budget It is enough to just check if we can get the budget via .get_budget(). And we don't need to deal with device state change in .get_budget(). For SCSI, one issue to be fixed is that we have to call scsi_mq_uninit_cmd() to free allocated ressources if SCSI device fails to handle the request. And it isn't enough to simply call blk_mq_end_request() to do that if this request is marked as RQF_DONTPREP. Fixes: 0df21c86bdbf(scsi: implement .get_budget and .put_budget for blk-mq) Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index f2e3079eecdd..674641527da7 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -92,7 +92,7 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); -typedef blk_status_t (get_budget_fn)(struct blk_mq_hw_ctx *); +typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); -- cgit v1.2.3 From c02762eb20cb57ec5b7c037b056c37d5838c803f Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 18 Jul 2017 16:03:17 -0500 Subject: net/mlx5: QCAM register firmware command support The QCAM register provides capability bit for all the QoS registers using ACCESS_REG command. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 14 ++++++++++++++ include/linux/mlx5/driver.h | 2 ++ include/linux/mlx5/mlx5_ifc.h | 40 +++++++++++++++++++++++++++++++++++++++- 3 files changed, 55 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index e32dbc4934db..6d79b3f79458 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -1000,6 +1000,14 @@ enum mlx5_mcam_feature_groups { MLX5_MCAM_FEATURE_ENHANCED_FEATURES = 0x0, }; +enum mlx5_qcam_reg_groups { + MLX5_QCAM_REGS_FIRST_128 = 0x0, +}; + +enum mlx5_qcam_feature_groups { + MLX5_QCAM_FEATURE_ENHANCED_FEATURES = 0x0, +}; + /* GET Dev Caps macros */ #define MLX5_CAP_GEN(mdev, cap) \ MLX5_GET(cmd_hca_cap, mdev->caps.hca_cur[MLX5_CAP_GENERAL], cap) @@ -1108,6 +1116,12 @@ enum mlx5_mcam_feature_groups { #define MLX5_CAP_MCAM_FEATURE(mdev, fld) \ MLX5_GET(mcam_reg, (mdev)->caps.mcam, mng_feature_cap_mask.enhanced_features.fld) +#define MLX5_CAP_QCAM_REG(mdev, fld) \ + MLX5_GET(qcam_reg, (mdev)->caps.qcam, qos_access_reg_cap_mask.reg_cap.fld) + +#define MLX5_CAP_QCAM_FEATURE(mdev, fld) \ + MLX5_GET(qcam_reg, (mdev)->caps.qcam, qos_feature_cap_mask.feature_cap.fld) + #define MLX5_CAP_FPGA(mdev, cap) \ MLX5_GET(fpga_cap, (mdev)->caps.fpga, cap) diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 08c77b7e59cb..ed5be52282ea 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -109,6 +109,7 @@ enum { enum { MLX5_REG_QETCR = 0x4005, MLX5_REG_QTCT = 0x400a, + MLX5_REG_QCAM = 0x4019, MLX5_REG_DCBX_PARAM = 0x4020, MLX5_REG_DCBX_APP = 0x4021, MLX5_REG_FPGA_CAP = 0x4022, @@ -798,6 +799,7 @@ struct mlx5_core_dev { u32 pcam[MLX5_ST_SZ_DW(pcam_reg)]; u32 mcam[MLX5_ST_SZ_DW(mcam_reg)]; u32 fpga[MLX5_ST_SZ_DW(fpga_cap)]; + u32 qcam[MLX5_ST_SZ_DW(qcam_reg)]; } caps; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 69772347f866..f127c5b310c5 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -838,7 +838,8 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 cc_modify_allowed[0x1]; u8 start_pad[0x1]; u8 cache_line_128byte[0x1]; - u8 reserved_at_165[0xb]; + u8 reserved_at_165[0xa]; + u8 qcam_reg[0x1]; u8 gid_table_size[0x10]; u8 out_of_seq_cnt[0x1]; @@ -7890,6 +7891,43 @@ struct mlx5_ifc_mcam_reg_bits { u8 reserved_at_1c0[0x80]; }; +struct mlx5_ifc_qcam_access_reg_cap_mask { + u8 qcam_access_reg_cap_mask_127_to_20[0x6C]; + u8 qpdpm[0x1]; + u8 qcam_access_reg_cap_mask_18_to_4[0x0F]; + u8 qdpm[0x1]; + u8 qpts[0x1]; + u8 qcap[0x1]; + u8 qcam_access_reg_cap_mask_0[0x1]; +}; + +struct mlx5_ifc_qcam_qos_feature_cap_mask { + u8 qcam_qos_feature_cap_mask_127_to_1[0x7F]; + u8 qpts_trust_both[0x1]; +}; + +struct mlx5_ifc_qcam_reg_bits { + u8 reserved_at_0[0x8]; + u8 feature_group[0x8]; + u8 reserved_at_10[0x8]; + u8 access_reg_group[0x8]; + u8 reserved_at_20[0x20]; + + union { + struct mlx5_ifc_qcam_access_reg_cap_mask reg_cap; + u8 reserved_at_0[0x80]; + } qos_access_reg_cap_mask; + + u8 reserved_at_c0[0x80]; + + union { + struct mlx5_ifc_qcam_qos_feature_cap_mask feature_cap; + u8 reserved_at_0[0x80]; + } qos_feature_cap_mask; + + u8 reserved_at_1c0[0x80]; +}; + struct mlx5_ifc_pcap_reg_bits { u8 reserved_at_0[0x8]; u8 local_port[0x8]; -- cgit v1.2.3 From 71c70eb21c33c60433b95e72a59d40bb128db649 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Wed, 2 Aug 2017 21:36:23 -0500 Subject: net/mlx5: Add MLX5_SET16 and MLX5_GET16 Add MLX5_SET16 and MLX5_GET16 for 16bit structure field in firmware command. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Reviewed-by: Eli Cohen Signed-off-by: Saeed Mahameed --- include/linux/mlx5/device.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 6d79b3f79458..409ffb14298a 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -49,11 +49,15 @@ #define __mlx5_nullp(typ) ((struct mlx5_ifc_##typ##_bits *)0) #define __mlx5_bit_sz(typ, fld) sizeof(__mlx5_nullp(typ)->fld) #define __mlx5_bit_off(typ, fld) (offsetof(struct mlx5_ifc_##typ##_bits, fld)) +#define __mlx5_16_off(typ, fld) (__mlx5_bit_off(typ, fld) / 16) #define __mlx5_dw_off(typ, fld) (__mlx5_bit_off(typ, fld) / 32) #define __mlx5_64_off(typ, fld) (__mlx5_bit_off(typ, fld) / 64) +#define __mlx5_16_bit_off(typ, fld) (16 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0xf)) #define __mlx5_dw_bit_off(typ, fld) (32 - __mlx5_bit_sz(typ, fld) - (__mlx5_bit_off(typ, fld) & 0x1f)) #define __mlx5_mask(typ, fld) ((u32)((1ull << __mlx5_bit_sz(typ, fld)) - 1)) #define __mlx5_dw_mask(typ, fld) (__mlx5_mask(typ, fld) << __mlx5_dw_bit_off(typ, fld)) +#define __mlx5_mask16(typ, fld) ((u16)((1ull << __mlx5_bit_sz(typ, fld)) - 1)) +#define __mlx5_16_mask(typ, fld) (__mlx5_mask16(typ, fld) << __mlx5_16_bit_off(typ, fld)) #define __mlx5_st_sz_bits(typ) sizeof(struct mlx5_ifc_##typ##_bits) #define MLX5_FLD_SZ_BYTES(typ, fld) (__mlx5_bit_sz(typ, fld) / 8) @@ -116,6 +120,19 @@ __mlx5_mask(typ, fld)) ___t; \ }) +#define MLX5_GET16(typ, p, fld) ((be16_to_cpu(*((__be16 *)(p) +\ +__mlx5_16_off(typ, fld))) >> __mlx5_16_bit_off(typ, fld)) & \ +__mlx5_mask16(typ, fld)) + +#define MLX5_SET16(typ, p, fld, v) do { \ + u16 _v = v; \ + BUILD_BUG_ON(__mlx5_st_sz_bits(typ) % 16); \ + *((__be16 *)(p) + __mlx5_16_off(typ, fld)) = \ + cpu_to_be16((be16_to_cpu(*((__be16 *)(p) + __mlx5_16_off(typ, fld))) & \ + (~__mlx5_16_mask(typ, fld))) | (((_v) & __mlx5_mask16(typ, fld)) \ + << __mlx5_16_bit_off(typ, fld))); \ +} while (0) + /* Big endian getters */ #define MLX5_GET64_BE(typ, p, fld) (*((__be64 *)(p) +\ __mlx5_64_off(typ, fld))) -- cgit v1.2.3 From 415a64aa8dc6b4fc478609c549ca652d95a12f13 Mon Sep 17 00:00:00 2001 From: Huy Nguyen Date: Tue, 18 Jul 2017 16:08:46 -0500 Subject: net/mlx5: QPTS and QPDPM register firmware command support The QPTS register allows changing the priority trust state between pcp and dscp. Add support to get/set trust state from device. When the port is in pcp/dscp trust state, packet is routed by hardware to matching priority based on its pcp/dscp value respectively. The QPDPM register allow channing the dscp to priority mapping. Add support to get/set dscp to priority mapping from device. Note that to change a dscp mapping, the "e" bit of this dscp structure must be set in the QPDPM firmware command. Signed-off-by: Huy Nguyen Reviewed-by: Parav Pandit Reviewed-by: Or Gerlitz Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 7 +++++++ include/linux/mlx5/mlx5_ifc.h | 20 ++++++++++++++++++++ include/linux/mlx5/port.h | 5 +++++ 3 files changed, 32 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index ed5be52282ea..a886b51511ab 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -107,8 +107,10 @@ enum { }; enum { + MLX5_REG_QPTS = 0x4002, MLX5_REG_QETCR = 0x4005, MLX5_REG_QTCT = 0x400a, + MLX5_REG_QPDPM = 0x4013, MLX5_REG_QCAM = 0x4019, MLX5_REG_DCBX_PARAM = 0x4020, MLX5_REG_DCBX_APP = 0x4021, @@ -142,6 +144,11 @@ enum { MLX5_REG_MCAM = 0x907f, }; +enum mlx5_qpts_trust_state { + MLX5_QPTS_TRUST_PCP = 1, + MLX5_QPTS_TRUST_DSCP = 2, +}; + enum mlx5_dcbx_oper_mode { MLX5E_DCBX_PARAM_VER_OPER_HOST = 0x0, MLX5E_DCBX_PARAM_VER_OPER_AUTO = 0x3, diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index f127c5b310c5..3e5363f760dd 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -8578,6 +8578,26 @@ struct mlx5_ifc_qetc_reg_bits { struct mlx5_ifc_ets_global_config_reg_bits global_configuration; }; +struct mlx5_ifc_qpdpm_dscp_reg_bits { + u8 e[0x1]; + u8 reserved_at_01[0x0b]; + u8 prio[0x04]; +}; + +struct mlx5_ifc_qpdpm_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x10]; + struct mlx5_ifc_qpdpm_dscp_reg_bits dscp[64]; +}; + +struct mlx5_ifc_qpts_reg_bits { + u8 reserved_at_0[0x8]; + u8 local_port[0x8]; + u8 reserved_at_10[0x2d]; + u8 trust_state[0x3]; +}; + struct mlx5_ifc_qtct_reg_bits { u8 reserved_at_0[0x8]; u8 port_number[0x8]; diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index c59af8ab753a..035f0d4dc9fe 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -179,4 +179,9 @@ int mlx5_query_module_eeprom(struct mlx5_core_dev *dev, int mlx5_query_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *out); int mlx5_set_port_dcbx_param(struct mlx5_core_dev *mdev, u32 *in); + +int mlx5_set_trust_state(struct mlx5_core_dev *mdev, u8 trust_state); +int mlx5_query_trust_state(struct mlx5_core_dev *mdev, u8 *trust_state); +int mlx5_set_dscp2prio(struct mlx5_core_dev *mdev, u8 dscp, u8 prio); +int mlx5_query_dscp2prio(struct mlx5_core_dev *mdev, u8 *dscp2prio); #endif /* __MLX5_PORT_H__ */ -- cgit v1.2.3 From eb3b05fb0ff2cf0097e70b6c5e3afbe854a8deb7 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Sat, 4 Nov 2017 21:49:26 +0100 Subject: i2c: nuc900: remove platform_data, too Commit 7da62cb1853025 ("i2c: nuc900: remove driver") removed the driver, we should remove the platform_data as well. Signed-off-by: Wolfram Sang --- include/linux/platform_data/i2c-nuc900.h | 9 --------- 1 file changed, 9 deletions(-) delete mode 100644 include/linux/platform_data/i2c-nuc900.h (limited to 'include/linux') diff --git a/include/linux/platform_data/i2c-nuc900.h b/include/linux/platform_data/i2c-nuc900.h deleted file mode 100644 index 9ffb12d06e91..000000000000 --- a/include/linux/platform_data/i2c-nuc900.h +++ /dev/null @@ -1,9 +0,0 @@ -#ifndef __ASM_ARCH_NUC900_I2C_H -#define __ASM_ARCH_NUC900_I2C_H - -struct nuc900_platform_i2c { - int bus_num; - unsigned long bus_freq; -}; - -#endif /* __ASM_ARCH_NUC900_I2C_H */ -- cgit v1.2.3 From f4e63525ee35f9c02e9f51f90571718363e9a9a9 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:16 -0700 Subject: net: bpf: rename ndo_xdp to ndo_bpf ndo_xdp is a control path callback for setting up XDP in the driver. We can reuse it for other forms of communication between the eBPF stack and the drivers. Rename the callback and associated structures and definitions. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/linux/netdevice.h | 23 ++++++++++++----------- 1 file changed, 12 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 7de7656550c2..9af9feaaeb64 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -779,10 +779,10 @@ enum tc_setup_type { TC_SETUP_CBS, }; -/* These structures hold the attributes of xdp state that are being passed - * to the netdevice through the xdp op. +/* These structures hold the attributes of bpf state that are being passed + * to the netdevice through the bpf op. */ -enum xdp_netdev_command { +enum bpf_netdev_command { /* Set or clear a bpf program used in the earliest stages of packet * rx. The prog will have been loaded as BPF_PROG_TYPE_XDP. The callee * is responsible for calling bpf_prog_put on any old progs that are @@ -801,8 +801,8 @@ enum xdp_netdev_command { struct netlink_ext_ack; -struct netdev_xdp { - enum xdp_netdev_command command; +struct netdev_bpf { + enum bpf_netdev_command command; union { /* XDP_SETUP_PROG */ struct { @@ -1124,9 +1124,10 @@ struct dev_ifalias { * appropriate rx headroom value allows avoiding skb head copy on * forward. Setting a negative value resets the rx headroom to the * default value. - * int (*ndo_xdp)(struct net_device *dev, struct netdev_xdp *xdp); + * int (*ndo_bpf)(struct net_device *dev, struct netdev_bpf *bpf); * This function is used to set or query state related to XDP on the - * netdevice. See definition of enum xdp_netdev_command for details. + * netdevice and manage BPF offload. See definition of + * enum bpf_netdev_command for details. * int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); * This function is used to submit a XDP packet for transmit on a * netdevice. @@ -1315,8 +1316,8 @@ struct net_device_ops { struct sk_buff *skb); void (*ndo_set_rx_headroom)(struct net_device *dev, int needed_headroom); - int (*ndo_xdp)(struct net_device *dev, - struct netdev_xdp *xdp); + int (*ndo_bpf)(struct net_device *dev, + struct netdev_bpf *bpf); int (*ndo_xdp_xmit)(struct net_device *dev, struct xdp_buff *xdp); void (*ndo_xdp_flush)(struct net_device *dev); @@ -3311,10 +3312,10 @@ struct sk_buff *validate_xmit_skb_list(struct sk_buff *skb, struct net_device *d struct sk_buff *dev_hard_start_xmit(struct sk_buff *skb, struct net_device *dev, struct netdev_queue *txq, int *ret); -typedef int (*xdp_op_t)(struct net_device *dev, struct netdev_xdp *xdp); +typedef int (*bpf_op_t)(struct net_device *dev, struct netdev_bpf *bpf); int dev_change_xdp_fd(struct net_device *dev, struct netlink_ext_ack *extack, int fd, u32 flags); -u8 __dev_xdp_attached(struct net_device *dev, xdp_op_t xdp_op, u32 *prog_id); +u8 __dev_xdp_attached(struct net_device *dev, bpf_op_t xdp_op, u32 *prog_id); int __dev_forward_skb(struct net_device *dev, struct sk_buff *skb); int dev_forward_skb(struct net_device *dev, struct sk_buff *skb); -- cgit v1.2.3 From ab3f0063c48c26c927851b6767824e35a716d878 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:17 -0700 Subject: bpf: offload: add infrastructure for loading programs for a specific netdev The fact that we don't know which device the program is going to be used on is quite limiting in current eBPF infrastructure. We have to reverse or limit the changes which kernel makes to the loaded bytecode if we want it to be offloaded to a networking device. We also have to invent new APIs for debugging and troubleshooting support. Make it possible to load programs for a specific netdev. This helps us to bring the debug information closer to the core eBPF infrastructure (e.g. we will be able to reuse the verifer log in device JIT). It allows device JITs to perform translation on the original bytecode. __bpf_prog_get() when called to get a reference for an attachment point will now refuse to give it if program has a device assigned. Following patches will add a version of that function which passes the expected netdev in. @type argument in __bpf_prog_get() is renamed to attach_type to make it clearer that it's only set on attachment. All calls to ndo_bpf are protected by rtnl, only verifier callbacks are not. We need a wait queue to make sure netdev doesn't get destroyed while verifier is still running and calling its driver. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/linux/bpf.h | 36 ++++++++++++++++++++++++++++++++++++ include/linux/bpf_verifier.h | 10 ++++++++++ include/linux/netdevice.h | 14 ++++++++++++++ 3 files changed, 60 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 520aeebe0d93..e45d43f9ec92 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -15,6 +15,7 @@ #include #include #include +#include struct perf_event; struct bpf_prog; @@ -182,6 +183,16 @@ struct bpf_verifier_ops { struct bpf_prog *prog, u32 *target_size); }; +struct bpf_dev_offload { + struct bpf_prog *prog; + struct net_device *netdev; + void *dev_priv; + struct list_head offloads; + bool dev_state; + bool verifier_running; + wait_queue_head_t verifier_done; +}; + struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; @@ -199,6 +210,7 @@ struct bpf_prog_aux { #ifdef CONFIG_SECURITY void *security; #endif + struct bpf_dev_offload *offload; union { struct work_struct work; struct rcu_head rcu; @@ -317,6 +329,7 @@ extern const struct file_operations bpf_prog_fops; #undef BPF_PROG_TYPE #undef BPF_MAP_TYPE +extern const struct bpf_prog_ops bpf_offload_prog_ops; extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; extern const struct bpf_verifier_ops xdp_analyzer_ops; @@ -491,6 +504,29 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, } #endif /* CONFIG_BPF_SYSCALL */ +int bpf_prog_offload_compile(struct bpf_prog *prog); +void bpf_prog_offload_destroy(struct bpf_prog *prog); + +#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) +int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); + +static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +{ + return aux->offload; +} +#else +static inline int bpf_prog_offload_init(struct bpf_prog *prog, + union bpf_attr *attr) +{ + return -EOPNOTSUPP; +} + +static inline bool bpf_prog_is_dev_bound(struct bpf_prog_aux *aux) +{ + return false; +} +#endif /* CONFIG_NET && CONFIG_BPF_SYSCALL */ + #if defined(CONFIG_STREAM_PARSER) && defined(CONFIG_BPF_SYSCALL) struct sock *__sock_map_lookup_elem(struct bpf_map *map, u32 key); int sock_map_prog(struct bpf_map *map, struct bpf_prog *prog, u32 type); diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 3b0976aaac75..e45011dbc02d 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -153,6 +153,7 @@ struct bpf_verifier_env { struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ + const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */ void *analyzer_priv; /* pointer to external analyzer's private data */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ @@ -169,6 +170,15 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) return env->cur_state->regs; } +#if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) +int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); +#else +int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) +{ + return -EOPNOTSUPP; +} +#endif + int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, void *priv); diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 9af9feaaeb64..fda527ccb263 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -797,8 +797,13 @@ enum bpf_netdev_command { * is equivalent to XDP_ATTACHED_DRV. */ XDP_QUERY_PROG, + /* BPF program for offload callbacks, invoked at program load time. */ + BPF_OFFLOAD_VERIFIER_PREP, + BPF_OFFLOAD_TRANSLATE, + BPF_OFFLOAD_DESTROY, }; +struct bpf_ext_analyzer_ops; struct netlink_ext_ack; struct netdev_bpf { @@ -815,6 +820,15 @@ struct netdev_bpf { u8 prog_attached; u32 prog_id; }; + /* BPF_OFFLOAD_VERIFIER_PREP */ + struct { + struct bpf_prog *prog; + const struct bpf_ext_analyzer_ops *ops; /* callee set */ + } verifier; + /* BPF_OFFLOAD_TRANSLATE, BPF_OFFLOAD_DESTROY */ + struct { + struct bpf_prog *prog; + } offload; }; }; -- cgit v1.2.3 From bd601b6ada11fdfb9e277f24ad2eb54bc599156b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:18 -0700 Subject: bpf: report offload info to user space Extend struct bpf_prog_info to contain information about program being bound to a device. Since the netdev may get destroyed while program still exists we need a flag to indicate the program is loaded for a device, even if the device is gone. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e45d43f9ec92..98bacd0fa5cc 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -506,6 +506,7 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); +u32 bpf_prog_offload_ifindex(struct bpf_prog *prog); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); -- cgit v1.2.3 From 248f346ffe9508dee0039db4ac839cb31ba3bdec Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:20 -0700 Subject: xdp: allow attaching programs loaded for specific device Pass the netdev pointer to bpf_prog_get_type(). This way BPF code can decide whether the device matches what the code was loaded/translated for. Signed-off-by: Jakub Kicinski Reviewed-by: Simon Horman Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 98bacd0fa5cc..c397934f91dd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -335,6 +335,8 @@ extern const struct bpf_verifier_ops xdp_analyzer_ops; struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); +struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, + struct net_device *netdev); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); @@ -428,6 +430,14 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, { return ERR_PTR(-EOPNOTSUPP); } + +static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, + enum bpf_prog_type type, + struct net_device *netdev) +{ + return ERR_PTR(-EOPNOTSUPP); +} + static inline struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i) { -- cgit v1.2.3 From b37a530613104aa3f592376c67a462823298759c Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Fri, 3 Nov 2017 13:56:30 -0700 Subject: bpf: remove old offload/analyzer Thanks to the ability to load a program for a specific device, running verifier twice is no longer needed. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Signed-off-by: David S. Miller --- include/linux/bpf_verifier.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index e45011dbc02d..07b96aaca256 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -152,9 +152,7 @@ struct bpf_verifier_env { bool strict_alignment; /* perform strict pointer alignment checks */ struct bpf_verifier_state *cur_state; /* current verifier state */ struct bpf_verifier_state_list **explored_states; /* search pruning optimization */ - const struct bpf_ext_analyzer_ops *analyzer_ops; /* external analyzer ops */ const struct bpf_ext_analyzer_ops *dev_ops; /* device analyzer ops */ - void *analyzer_priv; /* pointer to external analyzer's private data */ struct bpf_map *used_maps[MAX_USED_MAPS]; /* array of map's used by eBPF program */ u32 used_map_cnt; /* number of used maps */ u32 id_gen; /* used to generate unique reg IDs */ @@ -179,7 +177,4 @@ int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) } #endif -int bpf_analyzer(struct bpf_prog *prog, const struct bpf_ext_analyzer_ops *ops, - void *priv); - #endif /* _LINUX_BPF_VERIFIER_H */ -- cgit v1.2.3 From 1f2556916d974cfb62b6af51660186b5f58bd869 Mon Sep 17 00:00:00 2001 From: Priyaranjan Jha Date: Fri, 3 Nov 2017 16:38:48 -0700 Subject: tcp: higher throughput under reordering with adaptive RACK reordering wnd Currently TCP RACK loss detection does not work well if packets are being reordered beyond its static reordering window (min_rtt/4).Under such reordering it may falsely trigger loss recoveries and reduce TCP throughput significantly. This patch improves that by increasing and reducing the reordering window based on DSACK, which is now supported in major TCP implementations. It makes RACK's reo_wnd adaptive based on DSACK and no. of recoveries. - If DSACK is received, increment reo_wnd by min_rtt/4 (upper bounded by srtt), since there is possibility that spurious retransmission was due to reordering delay longer than reo_wnd. - Persist the current reo_wnd value for TCP_RACK_RECOVERY_THRESH (16) no. of successful recoveries (accounts for full DSACK-based loss recovery undo). After that, reset it to default (min_rtt/4). - At max, reo_wnd is incremented only once per rtt. So that the new DSACK on which we are reacting, is due to the spurious retx (approx) after the reo_wnd has been updated last time. - reo_wnd is tracked in terms of steps (of min_rtt/4), rather than absolute value to account for change in rtt. In our internal testing, we observed significant increase in throughput, in scenarios where reordering exceeds min_rtt/4 (previous static value). Signed-off-by: Priyaranjan Jha Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: David S. Miller --- include/linux/tcp.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8c431385b272..22f40c96a15b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -210,8 +210,13 @@ struct tcp_sock { u64 mstamp; /* (Re)sent time of the skb */ u32 rtt_us; /* Associated RTT */ u32 end_seq; /* Ending TCP sequence of the skb */ - u8 advanced; /* mstamp advanced since last lost marking */ - u8 reord; /* reordering detected */ + u32 last_delivered; /* tp->delivered at last reo_wnd adj */ + u8 reo_wnd_steps; /* Allowed reordering window */ +#define TCP_RACK_RECOVERY_THRESH 16 + u8 reo_wnd_persist:5, /* No. of recovery since last adj */ + dsack_seen:1, /* Whether DSACK seen after last adj */ + advanced:1, /* mstamp advanced since last lost marking */ + reord:1; /* reordering detected */ } rack; u16 advmss; /* Advertised MSS */ u32 chrono_start; /* Start time in jiffies of a TCP chrono */ -- cgit v1.2.3 From ecf8fecb7828648cba0e42de7464a7e600c93459 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 5 Nov 2017 08:15:31 -0500 Subject: device_cgroup: prepare code for bpf-based device controller This is non-functional change to prepare the device cgroup code for adding eBPF-based controller for cgroups v2. The patch performs the following changes: 1) __devcgroup_inode_permission() and devcgroup_inode_mknod() are moving to the device-cgroup.h and converting into static inline. 2) __devcgroup_check_permission() is exported. 3) devcgroup_check_permission() wrapper is introduced to be used by both existing and new bpf-based implementations. Signed-off-by: Roman Gushchin Acked-by: Tejun Heo Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/device_cgroup.h | 61 ++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 57 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index cdbc344a92e4..2d93d7ecd479 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -1,17 +1,70 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#define DEVCG_ACC_MKNOD 1 +#define DEVCG_ACC_READ 2 +#define DEVCG_ACC_WRITE 4 +#define DEVCG_ACC_MASK (DEVCG_ACC_MKNOD | DEVCG_ACC_READ | DEVCG_ACC_WRITE) + +#define DEVCG_DEV_BLOCK 1 +#define DEVCG_DEV_CHAR 2 +#define DEVCG_DEV_ALL 4 /* this represents all devices */ + +#ifdef CONFIG_CGROUP_DEVICE +extern int __devcgroup_check_permission(short type, u32 major, u32 minor, + short access); +#else +static inline int __devcgroup_check_permission(short type, u32 major, u32 minor, + short access) +{ return 0; } +#endif + #ifdef CONFIG_CGROUP_DEVICE -extern int __devcgroup_inode_permission(struct inode *inode, int mask); -extern int devcgroup_inode_mknod(int mode, dev_t dev); +static inline int devcgroup_check_permission(short type, u32 major, u32 minor, + short access) +{ + return __devcgroup_check_permission(type, major, minor, access); +} + static inline int devcgroup_inode_permission(struct inode *inode, int mask) { + short type, access = 0; + if (likely(!inode->i_rdev)) return 0; - if (!S_ISBLK(inode->i_mode) && !S_ISCHR(inode->i_mode)) + + if (S_ISBLK(inode->i_mode)) + type = DEVCG_DEV_BLOCK; + else if (S_ISCHR(inode->i_mode)) + type = DEVCG_DEV_CHAR; + else return 0; - return __devcgroup_inode_permission(inode, mask); + + if (mask & MAY_WRITE) + access |= DEVCG_ACC_WRITE; + if (mask & MAY_READ) + access |= DEVCG_ACC_READ; + + return devcgroup_check_permission(type, imajor(inode), iminor(inode), + access); } + +static inline int devcgroup_inode_mknod(int mode, dev_t dev) +{ + short type; + + if (!S_ISBLK(mode) && !S_ISCHR(mode)) + return 0; + + if (S_ISBLK(mode)) + type = DEVCG_DEV_BLOCK; + else + type = DEVCG_DEV_CHAR; + + return devcgroup_check_permission(type, MAJOR(dev), MINOR(dev), + DEVCG_ACC_MKNOD); +} + #else static inline int devcgroup_inode_permission(struct inode *inode, int mask) { return 0; } -- cgit v1.2.3 From ebc614f687369f9df99828572b1d85a7c2de3d92 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Sun, 5 Nov 2017 08:15:32 -0500 Subject: bpf, cgroup: implement eBPF-based device controller for cgroup v2 Cgroup v2 lacks the device controller, provided by cgroup v1. This patch adds a new eBPF program type, which in combination of previously added ability to attach multiple eBPF programs to a cgroup, will provide a similar functionality, but with some additional flexibility. This patch introduces a BPF_PROG_TYPE_CGROUP_DEVICE program type. A program takes major and minor device numbers, device type (block/character) and access type (mknod/read/write) as parameters and returns an integer which defines if the operation should be allowed or terminated with -EPERM. Signed-off-by: Roman Gushchin Acked-by: Alexei Starovoitov Acked-by: Tejun Heo Cc: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf-cgroup.h | 15 +++++++++++++++ include/linux/bpf_types.h | 3 +++ include/linux/device_cgroup.h | 8 +++++++- 3 files changed, 25 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf-cgroup.h b/include/linux/bpf-cgroup.h index 87a7db9feb38..a7f16e0f8d68 100644 --- a/include/linux/bpf-cgroup.h +++ b/include/linux/bpf-cgroup.h @@ -67,6 +67,9 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, struct bpf_sock_ops_kern *sock_ops, enum bpf_attach_type type); +int __cgroup_bpf_check_dev_permission(short dev_type, u32 major, u32 minor, + short access, enum bpf_attach_type type); + /* Wrappers for __cgroup_bpf_run_filter_skb() guarded by cgroup_bpf_enabled. */ #define BPF_CGROUP_RUN_PROG_INET_INGRESS(sk, skb) \ ({ \ @@ -112,6 +115,17 @@ int __cgroup_bpf_run_filter_sock_ops(struct sock *sk, } \ __ret; \ }) + +#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access) \ +({ \ + int __ret = 0; \ + if (cgroup_bpf_enabled) \ + __ret = __cgroup_bpf_check_dev_permission(type, major, minor, \ + access, \ + BPF_CGROUP_DEVICE); \ + \ + __ret; \ +}) #else struct cgroup_bpf {}; @@ -122,6 +136,7 @@ static inline int cgroup_bpf_inherit(struct cgroup *cgrp) { return 0; } #define BPF_CGROUP_RUN_PROG_INET_EGRESS(sk,skb) ({ 0; }) #define BPF_CGROUP_RUN_PROG_INET_SOCK(sk) ({ 0; }) #define BPF_CGROUP_RUN_PROG_SOCK_OPS(sock_ops) ({ 0; }) +#define BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type,major,minor,access) ({ 0; }) #endif /* CONFIG_CGROUP_BPF */ diff --git a/include/linux/bpf_types.h b/include/linux/bpf_types.h index 53c5b9ad7220..978c1d9c9383 100644 --- a/include/linux/bpf_types.h +++ b/include/linux/bpf_types.h @@ -19,6 +19,9 @@ BPF_PROG_TYPE(BPF_PROG_TYPE_KPROBE, kprobe) BPF_PROG_TYPE(BPF_PROG_TYPE_TRACEPOINT, tracepoint) BPF_PROG_TYPE(BPF_PROG_TYPE_PERF_EVENT, perf_event) #endif +#ifdef CONFIG_CGROUP_BPF +BPF_PROG_TYPE(BPF_PROG_TYPE_CGROUP_DEVICE, cg_dev) +#endif BPF_MAP_TYPE(BPF_MAP_TYPE_ARRAY, array_map_ops) BPF_MAP_TYPE(BPF_MAP_TYPE_PERCPU_ARRAY, percpu_array_map_ops) diff --git a/include/linux/device_cgroup.h b/include/linux/device_cgroup.h index 2d93d7ecd479..8557efe096dc 100644 --- a/include/linux/device_cgroup.h +++ b/include/linux/device_cgroup.h @@ -1,5 +1,6 @@ /* SPDX-License-Identifier: GPL-2.0 */ #include +#include #define DEVCG_ACC_MKNOD 1 #define DEVCG_ACC_READ 2 @@ -19,10 +20,15 @@ static inline int __devcgroup_check_permission(short type, u32 major, u32 minor, { return 0; } #endif -#ifdef CONFIG_CGROUP_DEVICE +#if defined(CONFIG_CGROUP_DEVICE) || defined(CONFIG_CGROUP_BPF) static inline int devcgroup_check_permission(short type, u32 major, u32 minor, short access) { + int rc = BPF_CGROUP_RUN_PROG_DEVICE_CGROUP(type, major, minor, access); + + if (rc) + return -EPERM; + return __devcgroup_check_permission(type, major, minor, access); } -- cgit v1.2.3 From 63dcb81e5b9e1faadf4b55450141bc4446e5a3d3 Mon Sep 17 00:00:00 2001 From: Mike Rapoport Date: Tue, 3 Oct 2017 08:53:07 +0300 Subject: include/linux/fs.h: fix comment about struct address_space Before commit 9c5d760b8d22 ("mm: split gfp_mask and mapping flags into separate fields") the private_* fields of struct adrress_space were grouped together and using "ditto" in comments describing the last fields was correct. With introduction of gpf_mask between private_lock and private_list "ditto" references the wrong description. Fix it by using the elaborate description. Signed-off-by: Mike Rapoport Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 31f8b2ea358c..ccbac0ed672c 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -403,7 +403,7 @@ struct address_space { unsigned long flags; /* error bits */ spinlock_t private_lock; /* for use by the address_space */ gfp_t gfp_mask; /* implicit gfp mask for allocations */ - struct list_head private_list; /* ditto */ + struct list_head private_list; /* for use by the address_space */ void *private_data; /* ditto */ errseq_t wb_err; } __attribute__((aligned(sizeof(long)))) __randomize_layout; -- cgit v1.2.3 From 6afc662e68b5f988282ff20afd58a89b1c279dca Mon Sep 17 00:00:00 2001 From: Chao Yu Date: Wed, 6 Sep 2017 21:59:50 +0800 Subject: f2fs: support flexible inline xattr size Now, in product, more and more features based on file encryption were introduced, their demand of xattr space is increasing, however, inline xattr has fixed-size of 200 bytes, once inline xattr space is full, new increased xattr data would occupy additional xattr block which may bring us more space usage and performance regression during persisting. In order to resolve above issue, it's better to expand inline xattr size flexibly according to user's requirement. So this patch introduces new filesystem feature 'flexible inline xattr', and new mount option 'inline_xattr_size=%u', once mkfs enables the feature, we can use the option to make f2fs supporting flexible inline xattr size. To support this feature, we add extra attribute i_inline_xattr_size in inode layout, indicating that how many space inline xattr borrows from block address mapping space in inode layout, by this, we can easily locate and store flexible-sized inline xattr data in inode. Inode disk layout: +----------------------+ | .i_mode | | ... | | .i_ext | +----------------------+ | .i_extra_isize | | .i_inline_xattr_size |-----------+ | ... | | +----------------------+ | | .i_addr | | | - block address or | | | - inline data | | +----------------------+<---+ v | inline xattr | +---inline xattr range +----------------------+<---+ | .i_nid | +----------------------+ | node_footer | | (nid, ino, offset) | +----------------------+ Note that, we have to cnosider backward compatibility which reserved inline_data space, 200 bytes, all the time, reported by Sheng Yong. Previous inline data or directory always reserved 200 bytes in inode layout, even if inline_xattr is disabled. In order to keep inline_dentry's structure for backward compatibility, we get the space back only from inline_data. Signed-off-by: Chao Yu Reported-by: Sheng Yong Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 2a0c453d7235..50a8ee501bf1 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -184,7 +184,8 @@ struct f2fs_extent { } __packed; #define F2FS_NAME_LEN 255 -#define F2FS_INLINE_XATTR_ADDRS 50 /* 200 bytes for inline xattrs */ +/* 200 bytes for inline xattrs by default */ +#define DEFAULT_INLINE_XATTR_ADDRS 50 #define DEF_ADDRS_PER_INODE 923 /* Address Pointers in an Inode */ #define CUR_ADDRS_PER_INODE(inode) (DEF_ADDRS_PER_INODE - \ get_extra_isize(inode)) @@ -238,7 +239,7 @@ struct f2fs_inode { union { struct { __le16 i_extra_isize; /* extra inode attribute size */ - __le16 i_padding; /* padding */ + __le16 i_inline_xattr_size; /* inline xattr size, unit: 4 bytes */ __le32 i_projid; /* project id */ __le32 i_inode_checksum;/* inode meta checksum */ __le32 i_extra_end[0]; /* for attribute size calculation */ -- cgit v1.2.3 From 234a9689614272d93271b308adbd303b59d266e3 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 5 Oct 2017 21:03:06 -0700 Subject: f2fs: add quota_ino feature infra This patch adds quota_ino feature infra to be used for quota files. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 50a8ee501bf1..ce34007972c3 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -36,6 +36,9 @@ #define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) #define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) +#define F2FS_QUOTA_INO 3 +#define F2FS_MAX_QUOTAS 3 + #define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ #define F2FS_IO_SIZE_KB(sbi) (1 << ((sbi)->write_io_size_bits + 2)) /* KB */ #define F2FS_IO_SIZE_BYTES(sbi) (1 << ((sbi)->write_io_size_bits + 12)) /* B */ @@ -108,7 +111,8 @@ struct f2fs_super_block { __u8 encryption_level; /* versioning level for encryption */ __u8 encrypt_pw_salt[16]; /* Salt used for string2key algorithm */ struct f2fs_device devs[MAX_DEVICES]; /* device list */ - __u8 reserved[327]; /* valid reserved region */ + __le32 qf_ino[F2FS_MAX_QUOTAS]; /* quota inode numbers */ + __u8 reserved[315]; /* valid reserved region */ } __packed; /* -- cgit v1.2.3 From ea6767337f86312ebe473585899a159bf50ef1b7 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Fri, 6 Oct 2017 09:14:28 -0700 Subject: f2fs: support quota sys files This patch supports hidden quota files in the system, which will be used for Android. It requires up-to-date f2fs-tools later than v1.9.0. Reviewed-by: Chao Yu Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index ce34007972c3..43e98d30d2df 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -36,7 +36,6 @@ #define F2FS_NODE_INO(sbi) ((sbi)->node_ino_num) #define F2FS_META_INO(sbi) ((sbi)->meta_ino_num) -#define F2FS_QUOTA_INO 3 #define F2FS_MAX_QUOTAS 3 #define F2FS_IO_SIZE(sbi) (1 << (sbi)->write_io_size_bits) /* Blocks */ -- cgit v1.2.3 From 08810a4119aaebf6318f209ec5dd9828e969cba4 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 25 Oct 2017 14:12:29 +0200 Subject: PM / core: Add NEVER_SKIP and SMART_PREPARE driver flags The motivation for this change is to provide a way to work around a problem with the direct-complete mechanism used for avoiding system suspend/resume handling for devices in runtime suspend. The problem is that some middle layer code (the PCI bus type and the ACPI PM domain in particular) returns positive values from its system suspend ->prepare callbacks regardless of whether the driver's ->prepare returns a positive value or 0, which effectively prevents drivers from being able to control the direct-complete feature. Some drivers need that control, however, and the PCI bus type has grown its own flag to deal with this issue, but since it is not limited to PCI, it is better to address it by adding driver flags at the core level. To that end, add a driver_flags field to struct dev_pm_info for flags that can be set by device drivers at the probe time to inform the PM core and/or bus types, PM domains and so on on the capabilities and/or preferences of device drivers. Also add two static inline helpers for setting that field and testing it against a given set of flags and make the driver core clear it automatically on driver remove and probe failures. Define and document two PM driver flags related to the direct- complete feature: NEVER_SKIP and SMART_PREPARE that can be used, respectively, to indicate to the PM core that the direct-complete mechanism should never be used for the device and to inform the middle layer code (bus types, PM domains etc) that it can only request the PM core to use the direct-complete mechanism for the device (by returning a positive value from its ->prepare callback) if it also has been requested by the driver. While at it, make the core check pm_runtime_suspended() when setting power.direct_complete so that it doesn't need to be checked by ->prepare callbacks. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas Reviewed-by: Ulf Hansson --- include/linux/device.h | 10 ++++++++++ include/linux/pm.h | 20 ++++++++++++++++++++ 2 files changed, 30 insertions(+) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index c32e6f974d4a..fb9451599aca 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -1070,6 +1070,16 @@ static inline void dev_pm_syscore_device(struct device *dev, bool val) #endif } +static inline void dev_pm_set_driver_flags(struct device *dev, u32 flags) +{ + dev->power.driver_flags = flags; +} + +static inline bool dev_pm_test_driver_flags(struct device *dev, u32 flags) +{ + return !!(dev->power.driver_flags & flags); +} + static inline void device_lock(struct device *dev) { mutex_lock(&dev->mutex); diff --git a/include/linux/pm.h b/include/linux/pm.h index a0ceeccf2846..f10bad831bfa 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -550,6 +550,25 @@ struct pm_subsys_data { #endif }; +/* + * Driver flags to control system suspend/resume behavior. + * + * These flags can be set by device drivers at the probe time. They need not be + * cleared by the drivers as the driver core will take care of that. + * + * NEVER_SKIP: Do not skip system suspend/resume callbacks for the device. + * SMART_PREPARE: Check the return value of the driver's ->prepare callback. + * + * Setting SMART_PREPARE instructs bus types and PM domains which may want + * system suspend/resume callbacks to be skipped for the device to return 0 from + * their ->prepare callbacks if the driver's ->prepare callback returns 0 (in + * other words, the system suspend/resume callbacks can only be skipped for the + * device if its driver doesn't object against that). This flag has no effect + * if NEVER_SKIP is set. + */ +#define DPM_FLAG_NEVER_SKIP BIT(0) +#define DPM_FLAG_SMART_PREPARE BIT(1) + struct dev_pm_info { pm_message_t power_state; unsigned int can_wakeup:1; @@ -561,6 +580,7 @@ struct dev_pm_info { bool is_late_suspended:1; bool early_init:1; /* Owned by the PM core */ bool direct_complete:1; /* Owned by the PM core */ + u32 driver_flags; spinlock_t lock; #ifdef CONFIG_PM_SLEEP struct list_head entry; -- cgit v1.2.3 From c2eac4d3a115e2f511844e7bcf73f4e877fbf5da Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 25 Oct 2017 14:16:46 +0200 Subject: PCI / PM: Use the NEVER_SKIP driver flag Replace the PCI-specific flag PCI_DEV_FLAGS_NEEDS_RESUME with the PM core's DPM_FLAG_NEVER_SKIP one everywhere and drop it. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas Reviewed-by: Ulf Hansson --- include/linux/pci.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f4f8ee5a7362..4b65fa4fb94e 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -205,13 +205,8 @@ enum pci_dev_flags { PCI_DEV_FLAGS_BRIDGE_XLATE_ROOT = (__force pci_dev_flags_t) (1 << 9), /* Do not use FLR even if device advertises PCI_AF_CAP */ PCI_DEV_FLAGS_NO_FLR_RESET = (__force pci_dev_flags_t) (1 << 10), - /* - * Resume before calling the driver's system suspend hooks, disabling - * the direct_complete optimization. - */ - PCI_DEV_FLAGS_NEEDS_RESUME = (__force pci_dev_flags_t) (1 << 11), /* Don't use Relaxed Ordering for TLPs directed at this device */ - PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 12), + PCI_DEV_FLAGS_NO_RELAXED_ORDERING = (__force pci_dev_flags_t) (1 << 11), }; enum pci_irq_reroute_variant { -- cgit v1.2.3 From 0eab11c9ae3b3cc5dd76f20b81d0247647a6e96f Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 26 Oct 2017 12:12:08 +0200 Subject: PM / core: Add SMART_SUSPEND driver flag Define and document a SMART_SUSPEND flag to instruct bus types and PM domains that the system suspend callbacks provided by the driver can cope with runtime-suspended devices, so from the driver's perspective it should be safe to leave devices in runtime suspend during system suspend. Setting that flag may also cause middle-layer code (bus types, PM domains etc.) to skip invocations of the ->suspend_late and ->suspend_noirq callbacks provided by the driver if the device is in runtime suspend at the beginning of the "late" phase of the system-wide suspend transition, in which case the driver's system-wide resume callbacks may be invoked back-to-back with its ->runtime_suspend callback, so the driver has to be able to cope with that too. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Reviewed-by: Ulf Hansson --- include/linux/pm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index f10bad831bfa..43b5418e05bb 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -558,6 +558,7 @@ struct pm_subsys_data { * * NEVER_SKIP: Do not skip system suspend/resume callbacks for the device. * SMART_PREPARE: Check the return value of the driver's ->prepare callback. + * SMART_SUSPEND: No need to resume the device from runtime suspend. * * Setting SMART_PREPARE instructs bus types and PM domains which may want * system suspend/resume callbacks to be skipped for the device to return 0 from @@ -565,9 +566,16 @@ struct pm_subsys_data { * other words, the system suspend/resume callbacks can only be skipped for the * device if its driver doesn't object against that). This flag has no effect * if NEVER_SKIP is set. + * + * Setting SMART_SUSPEND instructs bus types and PM domains which may want to + * runtime resume the device upfront during system suspend that doing so is not + * necessary from the driver's perspective. It also may cause them to skip + * invocations of the ->suspend_late and ->suspend_noirq callbacks provided by + * the driver if they decide to leave the device in runtime suspend. */ #define DPM_FLAG_NEVER_SKIP BIT(0) #define DPM_FLAG_SMART_PREPARE BIT(1) +#define DPM_FLAG_SMART_SUSPEND BIT(2) struct dev_pm_info { pm_message_t power_state; -- cgit v1.2.3 From c4b65157aeefad29b2351a00a010e8c40ce7fd0e Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 26 Oct 2017 12:12:22 +0200 Subject: PCI / PM: Take SMART_SUSPEND driver flag into account Make the PCI bus type take DPM_FLAG_SMART_SUSPEND into account in its system-wide PM callbacks and make sure that all code that should not run in parallel with pci_pm_runtime_resume() is executed in the "late" phases of system suspend, freeze and poweroff transitions. [Note that the pm_runtime_suspended() check in pci_dev_keep_suspended() is an optimization, because if is not passed, all of the subsequent checks may be skipped and some of them are much more overhead in general.] Also use the observation that if the device is in runtime suspend at the beginning of the "late" phase of a system-wide suspend-like transition, its state cannot change going forward (runtime PM is disabled for it at that time) until the transition is over and the subsequent system-wide PM callbacks should be skipped for it (as they generally assume the device to not be suspended), so add checks for that in pci_pm_suspend_late/noirq(), pci_pm_freeze_late/noirq() and pci_pm_poweroff_late/noirq(). Moreover, if pci_pm_resume_noirq() or pci_pm_restore_noirq() is called during the subsequent system-wide resume transition and if the device was left in runtime suspend previously, its runtime PM status needs to be changed to "active" as it is going to be put into the full-power state, so add checks for that too to these functions. In turn, if pci_pm_thaw_noirq() runs after the device has been left in runtime suspend, the subsequent "thaw" callbacks need to be skipped for it (as they may not work correctly with a suspended device), so set the power.direct_complete flag for the device then to make the PM core skip those callbacks. In addition to the above add a core helper for checking if DPM_FLAG_SMART_SUSPEND is set and the device runtime PM status is "suspended" at the same time, which is done quite often in the new code (and will be done elsewhere going forward too). Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas --- include/linux/pm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 43b5418e05bb..65d39115f06d 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -765,6 +765,8 @@ extern int pm_generic_poweroff_late(struct device *dev); extern int pm_generic_poweroff(struct device *dev); extern void pm_generic_complete(struct device *dev); +extern bool dev_pm_smart_suspend_and_suspended(struct device *dev); + #else /* !CONFIG_PM_SLEEP */ #define device_pm_lock() do {} while (0) -- cgit v1.2.3 From 05087360fd7acf2cc9b7bbb243c12765c44c7693 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Fri, 27 Oct 2017 10:10:16 +0200 Subject: ACPI / PM: Take SMART_SUSPEND driver flag into account Make the ACPI PM domain take DPM_FLAG_SMART_SUSPEND into account in its system suspend callbacks. [Note that the pm_runtime_suspended() check in acpi_dev_needs_resume() is an optimization, because if is not passed, all of the subsequent checks may be skipped and some of them are much more overhead in general.] Also use the observation that if the device is in runtime suspend at the beginning of the "late" phase of a system-wide suspend-like transition, its state cannot change going forward (runtime PM is disabled for it at that time) until the transition is over and the subsequent system-wide PM callbacks should be skipped for it (as they generally assume the device to not be suspended), so add checks for that in acpi_subsys_suspend_late/noirq() and acpi_subsys_freeze_late/noirq(). Moreover, if acpi_subsys_resume_noirq() is called during the subsequent system-wide resume transition and if the device was left in runtime suspend previously, its runtime PM status needs to be changed to "active" as it is going to be put into the full-power state going forward, so add a check for that too in there. In turn, if acpi_subsys_thaw_noirq() runs after the device has been left in runtime suspend, the subsequent "thaw" callbacks need to be skipped for it (as they may not work correctly with a suspended device), so set the power.direct_complete flag for the device then to make the PM core skip those callbacks. On top of the above, make the analogous changes in the acpi_lpss driver that uses the ACPI PM domain callbacks. Signed-off-by: Rafael J. Wysocki Acked-by: Greg Kroah-Hartman --- include/linux/acpi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 0ada2a948b44..dc1ebfeeb5ec 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -885,17 +885,27 @@ int acpi_dev_suspend_late(struct device *dev); int acpi_subsys_prepare(struct device *dev); void acpi_subsys_complete(struct device *dev); int acpi_subsys_suspend_late(struct device *dev); +int acpi_subsys_suspend_noirq(struct device *dev); +int acpi_subsys_resume_noirq(struct device *dev); int acpi_subsys_resume_early(struct device *dev); int acpi_subsys_suspend(struct device *dev); int acpi_subsys_freeze(struct device *dev); +int acpi_subsys_freeze_late(struct device *dev); +int acpi_subsys_freeze_noirq(struct device *dev); +int acpi_subsys_thaw_noirq(struct device *dev); #else static inline int acpi_dev_resume_early(struct device *dev) { return 0; } static inline int acpi_subsys_prepare(struct device *dev) { return 0; } static inline void acpi_subsys_complete(struct device *dev) {} static inline int acpi_subsys_suspend_late(struct device *dev) { return 0; } +static inline int acpi_subsys_suspend_noirq(struct device *dev) { return 0; } +static inline int acpi_subsys_resume_noirq(struct device *dev) { return 0; } static inline int acpi_subsys_resume_early(struct device *dev) { return 0; } static inline int acpi_subsys_suspend(struct device *dev) { return 0; } static inline int acpi_subsys_freeze(struct device *dev) { return 0; } +static inline int acpi_subsys_freeze_late(struct device *dev) { return 0; } +static inline int acpi_subsys_freeze_noirq(struct device *dev) { return 0; } +static inline int acpi_subsys_thaw_noirq(struct device *dev) { return 0; } #endif #ifdef CONFIG_ACPI -- cgit v1.2.3 From 10738ba8e02bc8cb1c4e832611621aa9666f4a24 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 17 Oct 2017 21:06:32 -0700 Subject: ide: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: "David S. Miller" Cc: linux-ide@vger.kernel.org Signed-off-by: Kees Cook --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index dc152e4b7f73..cc412175d036 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -1211,7 +1211,7 @@ extern int ide_wait_not_busy(ide_hwif_t *hwif, unsigned long timeout); extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); -extern void ide_timer_expiry(unsigned long); +extern void ide_timer_expiry(struct timer_list *t); extern irqreturn_t ide_intr(int irq, void *dev_id); extern void do_ide_request(struct request_queue *); extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq); -- cgit v1.2.3 From 971e4aeeaa126b207d1e85eec6dc6e29bead316e Mon Sep 17 00:00:00 2001 From: Pavel Machek Date: Sat, 23 Sep 2017 22:13:14 +0200 Subject: mtd: Fix C++ comment in include/linux/mtd/mtd.h C++ comments look wrong in kernel tree. Fix one. Signed-off-by: Pavel Machek Acked-by: Boris Brezillon Signed-off-by: Richard Weinberger --- include/linux/mtd/mtd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 6cd0f6b7658b..849543f1f233 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -267,7 +267,7 @@ struct mtd_info { */ unsigned int bitflip_threshold; - // Kernel-only stuff starts here. + /* Kernel-only stuff starts here. */ const char *name; int index; -- cgit v1.2.3 From 24a0c654d7d6063301c51361f911369264342b3c Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 20 Oct 2017 15:38:54 -0500 Subject: PCI: Add for_each_pci_bridge() helper The following pattern is often used: list_for_each_entry(dev, &bus->devices, bus_list) { if (pci_is_bridge(dev)) { ... } } Add a for_each_pci_bridge() helper to make that code easier to write and read by reducing indentation level. It also saves one or few lines of code in each occurrence. Convert PCI core parts here at the same time. Signed-off-by: Andy Shevchenko [bhelgaas: fold in http://lkml.kernel.org/r/20171013165352.25550-1-andriy.shevchenko@linux.intel.com] Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index f4f8ee5a7362..3dbe947b4152 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -596,6 +596,10 @@ static inline bool pci_is_bridge(struct pci_dev *dev) dev->hdr_type == PCI_HEADER_TYPE_CARDBUS; } +#define for_each_pci_bridge(dev, bus) \ + list_for_each_entry(dev, &bus->devices, bus_list) \ + if (!pci_is_bridge(dev)) {} else + static inline struct pci_dev *pci_upstream_bridge(struct pci_dev *dev) { dev = pci_physfn(dev); -- cgit v1.2.3 From d0c2e691d1cbe43662df3a08a4933f13acc352c3 Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 6 Nov 2017 07:17:37 -0600 Subject: objtool: Add a comment for the unreachable annotation macros Add a comment for the unreachable annotation macros to explain their purpose and the '__COUNTER__' label hack. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1570e48d9f87e0fc6f0126c32e7e1de6e109cb67.1509974104.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 202710420d6d..f8734fca54ce 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -187,6 +187,11 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, /* Unreachable code */ #ifdef CONFIG_STACK_VALIDATION +/* + * These macros help objtool understand GCC code flow for unreachable code. + * The __COUNTER__ based labels are a hack to make each instance of the macros + * unique, to convince GCC not to merge duplicate inline asm statements. + */ #define annotate_reachable() ({ \ asm("%c0:\n\t" \ ".pushsection .discard.reachable\n\t" \ -- cgit v1.2.3 From 10259821ac47dbefa6f83ae57f1fa9f1f2c54b3d Mon Sep 17 00:00:00 2001 From: Josh Poimboeuf Date: Mon, 6 Nov 2017 07:17:38 -0600 Subject: objtool: Make unreachable annotation inline asms explicitly volatile Add 'volatile' to the unreachable annotation macro inline asm statements. They're already implicitly volatile because they don't have output constraints, but it's clearer and more robust to make them explicitly volatile. Suggested-by: Linus Torvalds Signed-off-by: Josh Poimboeuf Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/28659257b7a6adf4a7f65920dad70b2b0226e996.1509974104.git.jpoimboe@redhat.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index f8734fca54ce..4fac29cdffd1 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -193,16 +193,16 @@ void ftrace_likely_update(struct ftrace_likely_data *f, int val, * unique, to convince GCC not to merge duplicate inline asm statements. */ #define annotate_reachable() ({ \ - asm("%c0:\n\t" \ - ".pushsection .discard.reachable\n\t" \ - ".long %c0b - .\n\t" \ - ".popsection\n\t" : : "i" (__COUNTER__)); \ + asm volatile("%c0:\n\t" \ + ".pushsection .discard.reachable\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ }) #define annotate_unreachable() ({ \ - asm("%c0:\n\t" \ - ".pushsection .discard.unreachable\n\t" \ - ".long %c0b - .\n\t" \ - ".popsection\n\t" : : "i" (__COUNTER__)); \ + asm volatile("%c0:\n\t" \ + ".pushsection .discard.unreachable\n\t" \ + ".long %c0b - .\n\t" \ + ".popsection\n\t" : : "i" (__COUNTER__)); \ }) #define ASM_UNREACHABLE \ "999:\n\t" \ -- cgit v1.2.3 From f791dd2589d7e217625d7e411621a5bac71cbf69 Mon Sep 17 00:00:00 2001 From: Cheng Jian Date: Fri, 3 Nov 2017 18:59:48 +0800 Subject: locking/rwlocks: Fix comments - fix the list of locking API headers in kernel/locking/spinlock.c - fix an #endif comment Signed-off-by: Cheng Jian Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: huawei.libin@huawei.com Cc: xiexiuqi@huawei.com Link: http://lkml.kernel.org/r/1509706788-152547-1-git-send-email-cj.chengjian@huawei.com Signed-off-by: Ingo Molnar --- include/linux/rwlock_api_smp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rwlock_api_smp.h b/include/linux/rwlock_api_smp.h index 5b9b84b20407..86ebb4bf9c6e 100644 --- a/include/linux/rwlock_api_smp.h +++ b/include/linux/rwlock_api_smp.h @@ -211,7 +211,7 @@ static inline void __raw_write_lock(rwlock_t *lock) LOCK_CONTENDED(lock, do_raw_write_trylock, do_raw_write_lock); } -#endif /* CONFIG_PREEMPT */ +#endif /* !CONFIG_GENERIC_LOCKBREAK || CONFIG_DEBUG_LOCK_ALLOC */ static inline void __raw_write_unlock(rwlock_t *lock) { -- cgit v1.2.3 From d8aa7eea78a1401cce39b3bb61ead0150044a3df Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 20 Oct 2017 09:30:44 -0500 Subject: x86/mm: Add Secure Encrypted Virtualization (SEV) support Provide support for Secure Encrypted Virtualization (SEV). This initial support defines a flag that is used by the kernel to determine if it is running with SEV active. Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Tested-by: Borislav Petkov Cc: kvm@vger.kernel.org Cc: Borislav Petkov Cc: Andy Lutomirski Link: https://lkml.kernel.org/r/20171020143059.3291-3-brijesh.singh@amd.com --- include/linux/mem_encrypt.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mem_encrypt.h b/include/linux/mem_encrypt.h index 265a9cd21cb4..b310a9c18113 100644 --- a/include/linux/mem_encrypt.h +++ b/include/linux/mem_encrypt.h @@ -23,11 +23,14 @@ #define sme_me_mask 0ULL +static inline bool sme_active(void) { return false; } +static inline bool sev_active(void) { return false; } + #endif /* CONFIG_ARCH_HAS_MEM_ENCRYPT */ -static inline bool sme_active(void) +static inline bool mem_encrypt_active(void) { - return !!sme_me_mask; + return sme_me_mask; } static inline u64 sme_get_me_mask(void) -- cgit v1.2.3 From 1d2e733b13b450e5854f4a8f8efcd77fa7362d62 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 20 Oct 2017 09:30:51 -0500 Subject: resource: Provide resource struct in resource walk callback In preperation for a new function that will need additional resource information during the resource walk, update the resource walk callback to pass the resource structure. Since the current callback start and end arguments are pulled from the resource structure, the callback functions can obtain them from the resource structure directly. Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Thomas Gleixner Reviewed-by: Kees Cook Reviewed-by: Borislav Petkov Tested-by: Borislav Petkov Cc: kvm@vger.kernel.org Cc: Benjamin Herrenschmidt Cc: Borislav Petkov Cc: linuxppc-dev@lists.ozlabs.org Link: https://lkml.kernel.org/r/20171020143059.3291-10-brijesh.singh@amd.com --- include/linux/ioport.h | 4 ++-- include/linux/kexec.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 83c8d6530f0f..c0070d7c4b99 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -272,10 +272,10 @@ walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); extern int walk_system_ram_res(u64 start, u64 end, void *arg, - int (*func)(u64, u64, void *)); + int (*func)(struct resource *, void *)); extern int walk_iomem_res_desc(unsigned long desc, unsigned long flags, u64 start, u64 end, - void *arg, int (*func)(u64, u64, void *)); + void *arg, int (*func)(struct resource *, void *)); /* True if any part of r1 overlaps r2 */ static inline bool resource_overlaps(struct resource *r1, struct resource *r2) diff --git a/include/linux/kexec.h b/include/linux/kexec.h index 1c08c925cefb..f16f6ceb3875 100644 --- a/include/linux/kexec.h +++ b/include/linux/kexec.h @@ -160,7 +160,7 @@ struct kexec_buf { }; int __weak arch_kexec_walk_mem(struct kexec_buf *kbuf, - int (*func)(u64, u64, void *)); + int (*func)(struct resource *, void *)); extern int kexec_add_buffer(struct kexec_buf *kbuf); int kexec_locate_mem_hole(struct kexec_buf *kbuf); #endif /* CONFIG_KEXEC_FILE */ -- cgit v1.2.3 From 0e4c12b45aa88e74fdda117896d2b61c4e510cb9 Mon Sep 17 00:00:00 2001 From: Tom Lendacky Date: Fri, 20 Oct 2017 09:30:52 -0500 Subject: x86/mm, resource: Use PAGE_KERNEL protection for ioremap of memory pages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In order for memory pages to be properly mapped when SEV is active, it's necessary to use the PAGE_KERNEL protection attribute as the base protection. This ensures that memory mapping of, e.g. ACPI tables, receives the proper mapping attributes. Signed-off-by: Tom Lendacky Signed-off-by: Brijesh Singh Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Tested-by: Borislav Petkov Cc: Laura Abbott Cc: Kees Cook Cc: kvm@vger.kernel.org Cc: Jérôme Glisse Cc: Borislav Petkov Cc: Andy Lutomirski Cc: Andrew Morton Cc: Dan Williams Cc: "Kirill A. Shutemov" Link: https://lkml.kernel.org/r/20171020143059.3291-11-brijesh.singh@amd.com --- include/linux/ioport.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index c0070d7c4b99..93b4183cf53d 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -271,6 +271,9 @@ extern int walk_system_ram_range(unsigned long start_pfn, unsigned long nr_pages, void *arg, int (*func)(unsigned long, unsigned long, void *)); extern int +walk_mem_res(u64 start, u64 end, void *arg, + int (*func)(struct resource *, void *)); +extern int walk_system_ram_res(u64 start, u64 end, void *arg, int (*func)(struct resource *, void *)); extern int -- cgit v1.2.3 From ac26963a1175c813e3ed21c0d2435b083173136e Mon Sep 17 00:00:00 2001 From: Brijesh Singh Date: Fri, 20 Oct 2017 09:30:57 -0500 Subject: percpu: Introduce DEFINE_PER_CPU_DECRYPTED KVM guest defines three per-CPU variables (steal-time, apf_reason, and kvm_pic_eoi) which are shared between a guest and a hypervisor. When SEV is active, memory is encrypted with a guest-specific key, and if the guest OS wants to share the memory region with the hypervisor then it must clear the C-bit (i.e set decrypted) before sharing it. DEFINE_PER_CPU_DECRYPTED can be used to define the per-CPU variables which will be shared between a guest and a hypervisor. Signed-off-by: Brijesh Singh Signed-off-by: Thomas Gleixner Tested-by: Borislav Petkov Acked-by: Tejun Heo Reviewed-by: Borislav Petkov Cc: linux-arch@vger.kernel.org Cc: Tom Lendacky Cc: kvm@vger.kernel.org Cc: Arnd Bergmann Cc: Borislav Petkov Cc: Christoph Lameter Link: https://lkml.kernel.org/r/20171020143059.3291-16-brijesh.singh@amd.com --- include/linux/percpu-defs.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/percpu-defs.h b/include/linux/percpu-defs.h index 8f16299ca068..2d2096ba1cfe 100644 --- a/include/linux/percpu-defs.h +++ b/include/linux/percpu-defs.h @@ -172,6 +172,21 @@ #define DEFINE_PER_CPU_READ_MOSTLY(type, name) \ DEFINE_PER_CPU_SECTION(type, name, "..read_mostly") +/* + * Declaration/definition used for per-CPU variables that should be accessed + * as decrypted when memory encryption is enabled in the guest. + */ +#if defined(CONFIG_VIRTUALIZATION) && defined(CONFIG_AMD_MEM_ENCRYPT) + +#define DECLARE_PER_CPU_DECRYPTED(type, name) \ + DECLARE_PER_CPU_SECTION(type, name, "..decrypted") + +#define DEFINE_PER_CPU_DECRYPTED(type, name) \ + DEFINE_PER_CPU_SECTION(type, name, "..decrypted") +#else +#define DEFINE_PER_CPU_DECRYPTED(type, name) DEFINE_PER_CPU(type, name) +#endif + /* * Intermodule exports for per-CPU variables. sparse forgets about * address space across EXPORT_SYMBOL(), change EXPORT_SYMBOL() to -- cgit v1.2.3 From 3c377ef1000d57cb1faf8b86ea77cfa47141db33 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 2 Nov 2017 10:57:39 +0200 Subject: usb: core: rename usb_get_status() 'type' argument to 'recip' This makes it a lot clearer that we're expecting a recipient as the argument. A follow-up patch will use the argument 'type' as the status type selector (standard or ptm). Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index cb9fbd54386e..2e2c1d40081b 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1766,7 +1766,7 @@ extern int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe, extern int usb_get_descriptor(struct usb_device *dev, unsigned char desctype, unsigned char descindex, void *buf, int size); extern int usb_get_status(struct usb_device *dev, - int type, int target, void *data); + int recip, int target, void *data); extern int usb_string(struct usb_device *dev, int index, char *buf, size_t size); -- cgit v1.2.3 From d9e1e1484ade396b3a979ba6c68798dbaceed1b9 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 2 Nov 2017 10:57:40 +0200 Subject: usb: core: introduce a new usb_get_std_status() helper This new helper is a simple wrapper around usb_get_status(). This patch is in preparation to adding support for fetching PTM_STATUS types. No functional changes. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 2e2c1d40081b..e86b6a2a35e4 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1767,6 +1767,13 @@ extern int usb_get_descriptor(struct usb_device *dev, unsigned char desctype, unsigned char descindex, void *buf, int size); extern int usb_get_status(struct usb_device *dev, int recip, int target, void *data); + +static inline int usb_get_std_status(struct usb_device *dev, + int recip, int target, void *data) +{ + return usb_get_status(dev, recip, target, data); +} + extern int usb_string(struct usb_device *dev, int index, char *buf, size_t size); -- cgit v1.2.3 From 2e43f0fe379c317d1ca27a69d860e397682ce957 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 2 Nov 2017 10:57:41 +0200 Subject: usb: core: add a 'type' parameter to usb_get_status() This new 'type' parameter will allows interested drivers to request for PTM status or Standard status. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index e86b6a2a35e4..6228d8177bfe 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1766,12 +1766,13 @@ extern int usb_bulk_msg(struct usb_device *usb_dev, unsigned int pipe, extern int usb_get_descriptor(struct usb_device *dev, unsigned char desctype, unsigned char descindex, void *buf, int size); extern int usb_get_status(struct usb_device *dev, - int recip, int target, void *data); + int recip, int type, int target, void *data); static inline int usb_get_std_status(struct usb_device *dev, int recip, int target, void *data) { - return usb_get_status(dev, recip, target, data); + return usb_get_status(dev, recip, USB_STATUS_TYPE_STANDARD, target, + data); } extern int usb_string(struct usb_device *dev, int index, -- cgit v1.2.3 From f8f3e4acbde3dff2e433d02476034606e07ac742 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 2 Nov 2017 10:57:42 +0200 Subject: usb: core: add a new usb_get_ptm_status() helper Drivers who are interested in the PTM status stype, should use this new helper to make sure they issue the correct GetStatus message. Signed-off-by: Felipe Balbi Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 6228d8177bfe..3555936025ec 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1775,6 +1775,12 @@ static inline int usb_get_std_status(struct usb_device *dev, data); } +static inline int usb_get_ptm_status(struct usb_device *dev, void *data) +{ + return usb_get_status(dev, USB_RECIP_DEVICE, USB_STATUS_TYPE_PTM, + 0, data); +} + extern int usb_string(struct usb_device *dev, int index, char *buf, size_t size); -- cgit v1.2.3 From 5b143d2a6edeae59700420c948f7d793da3356a8 Mon Sep 17 00:00:00 2001 From: Sebastien Bourdelin Date: Wed, 1 Nov 2017 13:14:33 -0400 Subject: bus: add driver for the Technologic Systems NBUS This driver implements a GPIOs bit-banged bus, called the NBUS by Technologic Systems. It is used to communicate with the peripherals in the FPGA on the TS-4600 SoM. Signed-off-by: Sebastien Bourdelin Acked-by: Linus Walleij Signed-off-by: Arnd Bergmann --- include/linux/ts-nbus.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) create mode 100644 include/linux/ts-nbus.h (limited to 'include/linux') diff --git a/include/linux/ts-nbus.h b/include/linux/ts-nbus.h new file mode 100644 index 000000000000..5bd4c822f7cf --- /dev/null +++ b/include/linux/ts-nbus.h @@ -0,0 +1,18 @@ +/* + * Copyright (c) 2016 - Savoir-faire Linux + * Author: Sebastien Bourdelin + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ + +#ifndef _TS_NBUS_H +#define _TS_NBUS_H + +struct ts_nbus; + +extern int ts_nbus_read(struct ts_nbus *ts_nbus, u8 adr, u16 *val); +extern int ts_nbus_write(struct ts_nbus *ts_nbus, u8 adr, u16 val); + +#endif /* _TS_NBUS_H */ -- cgit v1.2.3 From 7c8d469877b16d2c1cecf101a0abb7b218db85bc Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 31 Oct 2017 00:15:47 +0100 Subject: debugfs: add support for more elaborate ->d_fsdata Currently, the user provided fops, "real_fops", are stored directly into ->d_fsdata. In order to be able to store more per-file state and thus prepare for more granular file removal protection, wrap the real_fops into a dynamically allocated container struct, debugfs_fsdata. A struct debugfs_fsdata gets allocated at file creation and freed from the newly intoduced ->d_release(). Finally, move the implementation of debugfs_real_fops() out of the public debugfs header such that struct debugfs_fsdata's declaration can be kept private. Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 20 +++----------------- 1 file changed, 3 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index b93efc8feecd..cbee5f4a02a3 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -45,23 +45,6 @@ extern struct dentry *arch_debugfs_dir; extern struct srcu_struct debugfs_srcu; -/** - * debugfs_real_fops - getter for the real file operation - * @filp: a pointer to a struct file - * - * Must only be called under the protection established by - * debugfs_use_file_start(). - */ -static inline const struct file_operations *debugfs_real_fops(const struct file *filp) - __must_hold(&debugfs_srcu) -{ - /* - * Neither the pointer to the struct file_operations, nor its - * contents ever change -- srcu_dereference() is not needed here. - */ - return filp->f_path.dentry->d_fsdata; -} - #define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ @@ -112,6 +95,9 @@ int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu); +const struct file_operations *debugfs_real_fops(const struct file *filp) + __must_hold(&debugfs_srcu); + ssize_t debugfs_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); ssize_t debugfs_attr_write(struct file *file, const char __user *buf, -- cgit v1.2.3 From e9117a5a4bf65d8e99f060d356a04d27a60b436d Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 31 Oct 2017 00:15:48 +0100 Subject: debugfs: implement per-file removal protection Since commit 49d200deaa68 ("debugfs: prevent access to removed files' private data"), accesses to a file's private data are protected from concurrent removal by covering all file_operations with a SRCU read section and sychronizing with those before returning from debugfs_remove() by means of synchronize_srcu(). As pointed out by Johannes Berg, there are debugfs files with forever blocking file_operations. Their corresponding SRCU read side sections would block any debugfs_remove() forever as well, even unrelated ones. This results in a livelock. Because a remover can't cancel any indefinite blocking within foreign files, this is a problem. Resolve this by introducing support for more granular protection on a per-file basis. This is implemented by introducing an 'active_users' refcount_t to the per-file struct debugfs_fsdata state. At file creation time, it is set to one and a debugfs_remove() will drop that initial reference. The new debugfs_file_get() and debugfs_file_put(), intended to be used in place of former debugfs_use_file_start() and debugfs_use_file_finish(), increment and decrement it respectively. Once the count drops to zero, debugfs_file_put() will signal a completion which is possibly being waited for from debugfs_remove(). Thus, as long as there is a debugfs_file_get() not yet matched by a corresponding debugfs_file_put() around, debugfs_remove() will block. Actual users of debugfs_use_file_start() and -finish() will get converted to the new debugfs_file_get() and debugfs_file_put() by followup patches. Fixes: 49d200deaa68 ("debugfs: prevent access to removed files' private data") Reported-by: Johannes Berg Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index cbee5f4a02a3..3b914d588148 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -98,6 +98,9 @@ void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu); const struct file_operations *debugfs_real_fops(const struct file *filp) __must_hold(&debugfs_srcu); +int debugfs_file_get(struct dentry *dentry); +void debugfs_file_put(struct dentry *dentry); + ssize_t debugfs_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos); ssize_t debugfs_attr_write(struct file *file, const char __user *buf, @@ -236,6 +239,14 @@ static inline void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu) { } +static inline int debugfs_file_get(struct dentry *dentry) +{ + return 0; +} + +static inline void debugfs_file_put(struct dentry *dentry) +{ } + static inline ssize_t debugfs_attr_read(struct file *file, char __user *buf, size_t len, loff_t *ppos) { -- cgit v1.2.3 From 055ab8e3e3d52e005d2047b14ce63551b3a8b8b5 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 31 Oct 2017 00:15:49 +0100 Subject: debugfs: debugfs_real_fops(): drop __must_hold sparse annotation Currently, debugfs_real_fops() is annotated with a __must_hold(&debugfs_srcu) sparse annotation. With the conversion of the SRCU based protection of users against concurrent file removals to a per-file refcount based scheme, this becomes wrong. Drop this annotation. Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 3b914d588148..c5eda259b9d6 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -95,8 +95,7 @@ int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu); -const struct file_operations *debugfs_real_fops(const struct file *filp) - __must_hold(&debugfs_srcu); +const struct file_operations *debugfs_real_fops(const struct file *filp); int debugfs_file_get(struct dentry *dentry); void debugfs_file_put(struct dentry *dentry); -- cgit v1.2.3 From c9afbec27089cd6b4e621b639f41c7fc726c3bf1 Mon Sep 17 00:00:00 2001 From: Nicolai Stange Date: Tue, 31 Oct 2017 00:15:52 +0100 Subject: debugfs: purge obsolete SRCU based removal protection Purge the SRCU based file removal race protection in favour of the new, refcount based debugfs_file_get()/debugfs_file_put() API. Fixes: 49d200deaa68 ("debugfs: prevent access to removed files' private data") Signed-off-by: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index c5eda259b9d6..9f821a43bb0d 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -23,7 +23,6 @@ struct device; struct file_operations; -struct srcu_struct; struct debugfs_blob_wrapper { void *data; @@ -43,8 +42,6 @@ struct debugfs_regset32 { extern struct dentry *arch_debugfs_dir; -extern struct srcu_struct debugfs_srcu; - #define DEFINE_DEBUGFS_ATTRIBUTE(__fops, __get, __set, __fmt) \ static int __fops ## _open(struct inode *inode, struct file *file) \ { \ @@ -90,11 +87,6 @@ struct dentry *debugfs_create_automount(const char *name, void debugfs_remove(struct dentry *dentry); void debugfs_remove_recursive(struct dentry *dentry); -int debugfs_use_file_start(const struct dentry *dentry, int *srcu_idx) - __acquires(&debugfs_srcu); - -void debugfs_use_file_finish(int srcu_idx) __releases(&debugfs_srcu); - const struct file_operations *debugfs_real_fops(const struct file *filp); int debugfs_file_get(struct dentry *dentry); @@ -227,17 +219,6 @@ static inline void debugfs_remove(struct dentry *dentry) static inline void debugfs_remove_recursive(struct dentry *dentry) { } -static inline int debugfs_use_file_start(const struct dentry *dentry, - int *srcu_idx) - __acquires(&debugfs_srcu) -{ - return 0; -} - -static inline void debugfs_use_file_finish(int srcu_idx) - __releases(&debugfs_srcu) -{ } - static inline int debugfs_file_get(struct dentry *dentry) { return 0; -- cgit v1.2.3 From 3bce94fd5f4c05337dedbe218501fb9f8789fc40 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 7 Nov 2017 16:59:23 +0100 Subject: debugfs: add SPDX identifiers to all debugfs files It's good to have SPDX identifiers in all files to make it easier to audit the kernel tree for correct licenses. Update the debugfs files files with the correct SPDX license identifier based on the license text in the file itself. The SPDX identifier is a legally binding shorthand, which can be used instead of the full boiler plate text. This work is based on a script and data from Thomas Gleixner, Philippe Ombredanne, and Kate Stewart. Cc: Thomas Gleixner Cc: Kate Stewart Cc: Philippe Ombredanne Cc: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 9f821a43bb0d..7beab3fd2f1a 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -1,3 +1,4 @@ +// SPDX-License-Identifier: GPL-2.0 /* * debugfs.h - a tiny little debug file system * -- cgit v1.2.3 From 2b2d8788dd565cbe1ab22da6a1bc63d0934a80eb Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Tue, 7 Nov 2017 17:01:46 +0100 Subject: debugfs: Remove redundant license text Now that the SPDX tag is in all debugfs files, that identifies the license in a specific and legally-defined manner. So the extra GPL text wording can be removed as it is no longer needed at all. This is done on a quest to remove the 700+ different ways that files in the kernel describe the GPL license text. And there's unneeded stuff like the address (sometimes incorrect) for the FSF which is never needed. No copyright headers or other non-license-description text was removed. Cc: Nicolai Stange Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 7beab3fd2f1a..f36ecc2a5712 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -5,10 +5,6 @@ * Copyright (C) 2004 Greg Kroah-Hartman * Copyright (C) 2004 IBM Inc. * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License version - * 2 as published by the Free Software Foundation. - * * debugfs is for people to use instead of /proc or /sys. * See Documentation/filesystems/ for more details. */ -- cgit v1.2.3 From 22700f3c6df55387cec2ee27c533a7b23c76dc51 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Tue, 10 Oct 2017 17:31:43 -0400 Subject: SUNRPC: Improve ordering of transport processing Since it can take a while before a specific thread gets scheduled, it is better to just implement a first come first served queue mechanism. That way, if a thread is already scheduled and is idle, it can pick up the work to do from the queue. Signed-off-by: Trond Myklebust Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index 38f561b2dda3..23c4d6496aac 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -46,6 +46,7 @@ struct svc_pool { struct svc_pool_stats sp_stats; /* statistics on pool operation */ #define SP_TASK_PENDING (0) /* still work to do even if no * xprt is queued. */ +#define SP_CONGESTED (1) unsigned long sp_flags; } ____cacheline_aligned_in_smp; -- cgit v1.2.3 From 95a20ef6f7e54c6a982715a7d0da2fd81790db28 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 7 Nov 2017 13:48:11 +0100 Subject: PM / Domains: Allow genpd users to specify default active wakeup behavior It is quite common for PM Domains to require slave devices to be kept active during system suspend if they are to be used as wakeup sources. To enable this, currently each PM Domain or driver has to provide its own gpd_dev_ops.active_wakeup() callback. Introduce a new flag GENPD_FLAG_ACTIVE_WAKEUP to consolidate this. If specified, all slave devices configured as wakeup sources will be kept active during system suspend. PM Domains that need more fine-grained controls, based on the slave device, can still provide their own callbacks, as before. Signed-off-by: Geert Uytterhoeven Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 9af0356bd69c..28c24c58d479 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -18,9 +18,10 @@ #include /* Defines used for the flags field in the struct generic_pm_domain */ -#define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ -#define GENPD_FLAG_IRQ_SAFE (1U << 1) /* PM domain operates in atomic */ -#define GENPD_FLAG_ALWAYS_ON (1U << 2) /* PM domain is always powered on */ +#define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ +#define GENPD_FLAG_IRQ_SAFE (1U << 1) /* PM domain operates in atomic */ +#define GENPD_FLAG_ALWAYS_ON (1U << 2) /* PM domain is always powered on */ +#define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) /* Keep devices active if wakeup */ enum gpd_status { GPD_STATE_ACTIVE = 0, /* PM domain is active */ -- cgit v1.2.3 From d0af45f1f6528949e05385976eb61c5ebd31854e Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 7 Nov 2017 13:48:15 +0100 Subject: PM / Domains: Remove gpd_dev_ops.active_wakeup() callback There are no more users left of the gpd_dev_ops.active_wakeup() callback. All have been converted to GENPD_FLAG_ACTIVE_WAKEUP. Hence remove the callback. Signed-off-by: Geert Uytterhoeven Acked-by: Ulf Hansson Reviewed-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 28c24c58d479..04dbef9847d3 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -36,7 +36,6 @@ struct dev_power_governor { struct gpd_dev_ops { int (*start)(struct device *dev); int (*stop)(struct device *dev); - bool (*active_wakeup)(struct device *dev); }; struct genpd_power_state { -- cgit v1.2.3 From 3928ee6485a316c8abde7e24c7f82033a1c8d3ae Mon Sep 17 00:00:00 2001 From: "Maciej S. Szmigiero" Date: Thu, 2 Nov 2017 00:49:18 +0100 Subject: net: phy: leds: Add support for "link" trigger Currently, we create a LED trigger for any link speed known to a PHY. These triggers only fire when their exact link speed had been negotiated (they aren't cumulative, that is, they don't fire for "their or any higher" link speed). What we are missing, however, is a trigger which will fire on any link speed known to the PHY. Such trigger can then be used for implementing a poor man's substitute of the "link" LED on boards that lack it. Let's add it. Signed-off-by: Maciej S. Szmigiero Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index d78cd01ea513..dc82a07cb4fd 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -451,6 +451,8 @@ struct phy_device { struct phy_led_trigger *phy_led_triggers; unsigned int phy_num_led_triggers; struct phy_led_trigger *last_triggered; + + struct phy_led_trigger *led_link_trigger; #endif /* -- cgit v1.2.3 From 4a1a57df97636b9323c4221cc75a35694b6d34c7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Tue, 31 Oct 2017 09:47:28 -0700 Subject: Input: st1232 - remove obsolete platform device support Commit 1fa59bda21c7fa36 ("ARM: shmobile: Remove legacy board code for Armadillo-800 EVA"), removed the last user of st1232_pdata and the "st1232-ts" platform device. All remaining users use DT. Signed-off-by: Geert Uytterhoeven Signed-off-by: Dmitry Torokhov --- include/linux/platform_data/st1232_pdata.h | 14 -------------- 1 file changed, 14 deletions(-) delete mode 100644 include/linux/platform_data/st1232_pdata.h (limited to 'include/linux') diff --git a/include/linux/platform_data/st1232_pdata.h b/include/linux/platform_data/st1232_pdata.h deleted file mode 100644 index 1dcd23bee24e..000000000000 --- a/include/linux/platform_data/st1232_pdata.h +++ /dev/null @@ -1,14 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef _LINUX_ST1232_PDATA_H -#define _LINUX_ST1232_PDATA_H - -/* - * Optional platform data - * - * Use this if you want the driver to drive the reset pin. - */ -struct st1232_pdata { - int reset_gpio; -}; - -#endif -- cgit v1.2.3 From 602f3baf22188aad24b9a58be3209ab774b97d74 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:41 +0100 Subject: net_sch: red: Add offload ability to RED qdisc Add the ability to offload RED qdisc by using ndo_setup_tc. There are four commands for RED offloading: * TC_RED_SET: handles set and change. * TC_RED_DESTROY: handle qdisc destroy. * TC_RED_STATS: update the qdiscs counters (given as reference) * TC_RED_XSTAT: returns red xstats. Whether RED is being offloaded is being determined every time dump action is being called because parent change of this qdisc could change its offload state but doesn't require any RED function to be called. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index fda527ccb263..71968a2ca9f3 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -777,6 +777,7 @@ enum tc_setup_type { TC_SETUP_CLSBPF, TC_SETUP_BLOCK, TC_SETUP_CBS, + TC_SETUP_QDISC_RED, }; /* These structures hold the attributes of bpf state that are being passed -- cgit v1.2.3 From 575ed7d39e2fbe602a3894bc766a8cb49af83bd3 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:42 +0100 Subject: net_sch: mqprio: Change TC_SETUP_MQPRIO to TC_SETUP_QDISC_MQPRIO Change TC_SETUP_MQPRIO to TC_SETUP_QDISC_MQPRIO to match the new convention. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 71968a2ca9f3..703885aed856 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -770,7 +770,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, struct sk_buff *skb); enum tc_setup_type { - TC_SETUP_MQPRIO, + TC_SETUP_QDISC_MQPRIO, TC_SETUP_CLSU32, TC_SETUP_CLSFLOWER, TC_SETUP_CLSMATCHALL, -- cgit v1.2.3 From 8521db4c7e155d12fb280686c0552e47f77e9110 Mon Sep 17 00:00:00 2001 From: Nogah Frankel Date: Mon, 6 Nov 2017 07:23:43 +0100 Subject: net_sch: cbs: Change TC_SETUP_CBS to TC_SETUP_QDISC_CBS Change TC_SETUP_CBS to TC_SETUP_QDISC_CBS to match the new convention.. Signed-off-by: Nogah Frankel Signed-off-by: Jiri Pirko Reviewed-by: Simon Horman Acked-by: Vinicius Costa Gomes Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 703885aed856..30f0f2928808 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -776,7 +776,7 @@ enum tc_setup_type { TC_SETUP_CLSMATCHALL, TC_SETUP_CLSBPF, TC_SETUP_BLOCK, - TC_SETUP_CBS, + TC_SETUP_QDISC_CBS, TC_SETUP_QDISC_RED, }; -- cgit v1.2.3 From 620a5c860b774a81ce3f193eefb52bf4d128cca5 Mon Sep 17 00:00:00 2001 From: Egil Hjelmeland Date: Mon, 6 Nov 2017 12:42:01 +0100 Subject: net: dsa: lan9303: Correct register names in comments Two comments refer to registers, but lack the LAN9303_ prefix. Fix that. Signed-off-by: Egil Hjelmeland Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/dsa/lan9303.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dsa/lan9303.h b/include/linux/dsa/lan9303.h index 05d8d136baab..f48a85c377de 100644 --- a/include/linux/dsa/lan9303.h +++ b/include/linux/dsa/lan9303.h @@ -13,8 +13,8 @@ struct lan9303_phy_ops { #define LAN9303_NUM_ALR_RECORDS 512 struct lan9303_alr_cache_entry { u8 mac_addr[ETH_ALEN]; - u8 port_map; /* Bitmap of ports. Zero if unused entry */ - u8 stp_override; /* non zero if set ALR_DAT1_AGE_OVERRID */ + u8 port_map; /* Bitmap of ports. Zero if unused entry */ + u8 stp_override; /* non zero if set LAN9303_ALR_DAT1_AGE_OVERRID */ }; struct lan9303 { @@ -28,7 +28,9 @@ struct lan9303 { struct mutex indirect_mutex; /* protect indexed register access */ const struct lan9303_phy_ops *ops; bool is_bridged; /* true if port 1 and 2 are bridged */ - u32 swe_port_state; /* remember SWE_PORT_STATE while not bridged */ + + /* remember LAN9303_SWE_PORT_STATE while not bridged */ + u32 swe_port_state; /* LAN9303 do not offer reading specific ALR entry. Cache all * static entries in a flat table **/ -- cgit v1.2.3 From 96c623e51f1c40bf524decc48c6fac7ce5dd41f7 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 6 Nov 2017 14:26:10 +0100 Subject: of: add of_property_read_variable_* dummy helpers Commit a67e9472da42 ("of: Add array read functions with min/max size limits") added a new interface for reading variable-length arrays from DT properties. One user was added in dsa recently and this causes a build error because that code can be built with CONFIG_OF disabled: net/dsa/dsa2.c: In function 'dsa_switch_parse_member_of': net/dsa/dsa2.c:678:7: error: implicit declaration of function 'of_property_read_variable_u32_array'; did you mean 'of_property_read_u32_array'? [-Werror=implicit-function-declaration] This adds a dummy functions for of_property_read_variable_u32_array() and a few others that had been missing here. I decided to move of_property_read_string() and of_property_read_string_helper() in the process to make it easier to compare the two sets of function prototypes to make sure they match. Fixes: 975e6e32215e ("net: dsa: rework switch parsing") Signed-off-by: Arnd Bergmann Acked-by: Rob Herring Reviewed-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/of.h | 62 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 50 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index b240ed69dc96..b32d418d011a 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -675,12 +675,6 @@ static inline int of_property_count_elems_of_size(const struct device_node *np, return -ENOSYS; } -static inline int of_property_read_u32_index(const struct device_node *np, - const char *propname, u32 index, u32 *out_value) -{ - return -ENOSYS; -} - static inline int of_property_read_u8_array(const struct device_node *np, const char *propname, u8 *out_values, size_t sz) { @@ -707,16 +701,14 @@ static inline int of_property_read_u64_array(const struct device_node *np, return -ENOSYS; } -static inline int of_property_read_string(const struct device_node *np, - const char *propname, - const char **out_string) +static inline int of_property_read_u32_index(const struct device_node *np, + const char *propname, u32 index, u32 *out_value) { return -ENOSYS; } -static inline int of_property_read_string_helper(const struct device_node *np, - const char *propname, - const char **out_strs, size_t sz, int index) +static inline int of_property_read_u64_index(const struct device_node *np, + const char *propname, u32 index, u64 *out_value) { return -ENOSYS; } @@ -744,12 +736,51 @@ static inline int of_n_size_cells(struct device_node *np) return 0; } +static inline int of_property_read_variable_u8_array(const struct device_node *np, + const char *propname, u8 *out_values, + size_t sz_min, size_t sz_max) +{ + return -ENOSYS; +} + +static inline int of_property_read_variable_u16_array(const struct device_node *np, + const char *propname, u16 *out_values, + size_t sz_min, size_t sz_max) +{ + return -ENOSYS; +} + +static inline int of_property_read_variable_u32_array(const struct device_node *np, + const char *propname, + u32 *out_values, + size_t sz_min, + size_t sz_max) +{ + return -ENOSYS; +} + static inline int of_property_read_u64(const struct device_node *np, const char *propname, u64 *out_value) { return -ENOSYS; } +static inline int of_property_read_variable_u64_array(const struct device_node *np, + const char *propname, + u64 *out_values, + size_t sz_min, + size_t sz_max) +{ + return -ENOSYS; +} + +static inline int of_property_read_string(const struct device_node *np, + const char *propname, + const char **out_string) +{ + return -ENOSYS; +} + static inline int of_property_match_string(const struct device_node *np, const char *propname, const char *string) @@ -757,6 +788,13 @@ static inline int of_property_match_string(const struct device_node *np, return -ENOSYS; } +static inline int of_property_read_string_helper(const struct device_node *np, + const char *propname, + const char **out_strs, size_t sz, int index) +{ + return -ENOSYS; +} + static inline struct device_node *of_parse_phandle(const struct device_node *np, const char *phandle_name, int index) -- cgit v1.2.3 From f54bb2ec02c839f6bfe3e8d438cd93d30b4809dd Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Mon, 6 Nov 2017 16:01:17 +0100 Subject: locking/lockdep: Add IRQs disabled/enabled assertion APIs: lockdep_assert_irqs_enabled()/disabled() Checking whether IRQs are enabled or disabled is a very common sanity check, however not free of overhead especially on fastpath where such assertion is very common. Lockdep is a good host for such concurrency correctness check and it even already tracks down IRQs disablement state. Just reuse its machinery. This will allow us to get rid of the flags pop and check overhead from fast path when kernel is built for production. Suggested-by: Peter Zijlstra Signed-off-by: Frederic Weisbecker Acked-by: Thomas Gleixner Cc: David S . Miller Cc: Lai Jiangshan Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Tejun Heo Link: http://lkml.kernel.org/r/1509980490-4285-2-git-send-email-frederic@kernel.org Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 02720769c159..a842551fe044 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -725,9 +725,24 @@ do { \ lock_acquire(&(lock)->dep_map, 0, 0, 1, 1, NULL, _THIS_IP_); \ lock_release(&(lock)->dep_map, 0, _THIS_IP_); \ } while (0) + +#define lockdep_assert_irqs_enabled() do { \ + WARN_ONCE(debug_locks && !current->lockdep_recursion && \ + !current->hardirqs_enabled, \ + "IRQs not enabled as expected\n"); \ + } while (0) + +#define lockdep_assert_irqs_disabled() do { \ + WARN_ONCE(debug_locks && !current->lockdep_recursion && \ + current->hardirqs_enabled, \ + "IRQs not disabled as expected\n"); \ + } while (0) + #else # define might_lock(lock) do { } while (0) # define might_lock_read(lock) do { } while (0) +# define lockdep_assert_irqs_enabled() do { } while (0) +# define lockdep_assert_irqs_disabled() do { } while (0) #endif #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 0759e80b84e34a84e7e46e2b1adb528c83d84a47 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Tue, 7 Nov 2017 11:33:49 +0100 Subject: PM / QoS: Fix device resume latency framework The special value of 0 for device resume latency PM QoS means "no restriction", but there are two problems with that. First, device resume latency PM QoS requests with 0 as the value are always put in front of requests with positive values in the priority lists used internally by the PM QoS framework, causing 0 to be chosen as an effective constraint value. However, that 0 is then interpreted as "no restriction" effectively overriding the other requests with specific restrictions which is incorrect. Second, the users of device resume latency PM QoS have no way to specify that *any* resume latency at all should be avoided, which is an artificial limitation in general. To address these issues, modify device resume latency PM QoS to use S32_MAX as the "no constraint" value and 0 as the "no latency at all" one and rework its users (the cpuidle menu governor, the genpd QoS governor and the runtime PM framework) to follow these changes. Also add a special "n/a" value to the corresponding user space I/F to allow user space to indicate that it cannot accept any resume latencies at all for the given device. Fixes: 85dc0b8a4019 (PM / QoS: Make it possible to expose PM QoS latency constraints) Link: https://bugzilla.kernel.org/show_bug.cgi?id=197323 Reported-by: Reinette Chatre Signed-off-by: Rafael J. Wysocki Tested-by: Reinette Chatre Tested-by: Geert Uytterhoeven Tested-by: Tero Kristo Reviewed-by: Ramesh Thomas --- include/linux/pm_qos.h | 26 ++++++++++++++++++-------- 1 file changed, 18 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_qos.h b/include/linux/pm_qos.h index 51f0d7e0b15f..2a3b36da61b1 100644 --- a/include/linux/pm_qos.h +++ b/include/linux/pm_qos.h @@ -27,16 +27,19 @@ enum pm_qos_flags_status { PM_QOS_FLAGS_ALL, }; -#define PM_QOS_DEFAULT_VALUE -1 +#define PM_QOS_DEFAULT_VALUE (-1) +#define PM_QOS_LATENCY_ANY S32_MAX +#define PM_QOS_LATENCY_ANY_NS ((s64)PM_QOS_LATENCY_ANY * NSEC_PER_USEC) #define PM_QOS_CPU_DMA_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_LAT_DEFAULT_VALUE (2000 * USEC_PER_SEC) #define PM_QOS_NETWORK_THROUGHPUT_DEFAULT_VALUE 0 #define PM_QOS_MEMORY_BANDWIDTH_DEFAULT_VALUE 0 -#define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE 0 +#define PM_QOS_RESUME_LATENCY_DEFAULT_VALUE PM_QOS_LATENCY_ANY +#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT PM_QOS_LATENCY_ANY +#define PM_QOS_RESUME_LATENCY_NO_CONSTRAINT_NS PM_QOS_LATENCY_ANY_NS #define PM_QOS_LATENCY_TOLERANCE_DEFAULT_VALUE 0 #define PM_QOS_LATENCY_TOLERANCE_NO_CONSTRAINT (-1) -#define PM_QOS_LATENCY_ANY ((s32)(~(__u32)0 >> 1)) #define PM_QOS_FLAG_NO_POWER_OFF (1 << 0) @@ -173,7 +176,8 @@ static inline s32 dev_pm_qos_requested_flags(struct device *dev) static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return IS_ERR_OR_NULL(dev->power.qos) ? - 0 : pm_qos_read_value(&dev->power.qos->resume_latency); + PM_QOS_RESUME_LATENCY_NO_CONSTRAINT : + pm_qos_read_value(&dev->power.qos->resume_latency); } #else static inline enum pm_qos_flags_status __dev_pm_qos_flags(struct device *dev, @@ -183,9 +187,9 @@ static inline enum pm_qos_flags_status dev_pm_qos_flags(struct device *dev, s32 mask) { return PM_QOS_FLAGS_UNDEFINED; } static inline s32 __dev_pm_qos_read_value(struct device *dev) - { return 0; } + { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } static inline s32 dev_pm_qos_read_value(struct device *dev) - { return 0; } + { return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; } static inline int dev_pm_qos_add_request(struct device *dev, struct dev_pm_qos_request *req, enum dev_pm_qos_req_type type, @@ -231,9 +235,15 @@ static inline int dev_pm_qos_expose_latency_tolerance(struct device *dev) { return 0; } static inline void dev_pm_qos_hide_latency_tolerance(struct device *dev) {} -static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) { return 0; } +static inline s32 dev_pm_qos_requested_resume_latency(struct device *dev) +{ + return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; +} static inline s32 dev_pm_qos_requested_flags(struct device *dev) { return 0; } -static inline s32 dev_pm_qos_raw_read_value(struct device *dev) { return 0; } +static inline s32 dev_pm_qos_raw_read_value(struct device *dev) +{ + return PM_QOS_RESUME_LATENCY_NO_CONSTRAINT; +} #endif #endif -- cgit v1.2.3 From e0e1e39de490a2d9b8a173363ccf2415ddada871 Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Sat, 28 Oct 2017 15:37:17 +0200 Subject: pinctrl: Add skew-delay pin config and bindings Some pin controllers (such as the Gemini) can control the expected clock skew and output delay on certain pins with a sub-nanosecond granularity. This is typically done by shunting in a number of double inverters in front of or behind the pin. Make it possible to configure this with a generic binding. Cc: devicetree@vger.kernel.org Acked-by: Rob Herring Acked-by: Hans Ulli Kroll Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index 5d8bc7f21c2a..ec6dadcc1fde 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -90,6 +90,10 @@ * @PIN_CONFIG_SLEW_RATE: if the pin can select slew rate, the argument to * this parameter (on a custom format) tells the driver which alternative * slew rate to use. + * @PIN_CONFIG_SKEW_DELAY: if the pin has programmable skew rate (on inputs) + * or latch delay (on outputs) this parameter (in a custom format) + * specifies the clock skew or latch delay. It typically controls how + * many double inverters are put in front of the line. * @PIN_CONFIG_END: this is the last enumerator for pin configurations, if * you need to pass in custom configurations to the pin controller, use * PIN_CONFIG_END+1 as the base offset. @@ -117,6 +121,7 @@ enum pin_config_param { PIN_CONFIG_POWER_SOURCE, PIN_CONFIG_SLEEP_HARDWARE_STATE, PIN_CONFIG_SLEW_RATE, + PIN_CONFIG_SKEW_DELAY, PIN_CONFIG_END = 0x7F, PIN_CONFIG_MAX = 0xFF, }; -- cgit v1.2.3 From c44eafd79be666e7c81d22e215c945b27f2785f7 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:45 +0100 Subject: gpio: Introduce struct gpio_irq_chip This new structure will be used to group all fields related to interrupt handling in a GPIO chip. Doing so will properly namespace these fields and make it easier to distinguish which fields are used for IRQ support. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index ed04fa2a00a8..36a065521fa0 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -19,6 +19,36 @@ struct module; #ifdef CONFIG_GPIOLIB +#ifdef CONFIG_GPIOLIB_IRQCHIP +/** + * struct gpio_irq_chip - GPIO interrupt controller + */ +struct gpio_irq_chip { + /** + * @domain_ops: + * + * Table of interrupt domain operations for this IRQ chip. + */ + const struct irq_domain_ops *domain_ops; + + /** + * @parent_handler: + * + * The interrupt handler for the GPIO chip's parent interrupts, may be + * NULL if the parent interrupts are nested rather than cascaded. + */ + irq_flow_handler_t parent_handler; + + /** + * @parent_handler_data: + * + * Data associated, and passed to, the handler for the parent + * interrupt. + */ + void *parent_handler_data; +}; +#endif + /** * struct gpio_chip - abstract a GPIO controller * @label: a functional name for the GPIO device, such as a part @@ -176,6 +206,14 @@ struct gpio_chip { bool irq_need_valid_mask; unsigned long *irq_valid_mask; struct lock_class_key *lock_key; + + /** + * @irq: + * + * Integrates interrupt chip functionality with the GPIO chip. Can be + * used to handle IRQs for most practical cases. + */ + struct gpio_irq_chip irq; #endif #if defined(CONFIG_OF_GPIO) -- cgit v1.2.3 From da80ff81a8f54611b834d73149f8ac0d59151c87 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:46 +0100 Subject: gpio: Move irqchip into struct gpio_irq_chip In order to consolidate the multiple ways to associate an IRQ chip with a GPIO chip, move more fields into the new struct gpio_irq_chip. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 36a065521fa0..a79b3b18fadd 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -24,6 +24,13 @@ struct module; * struct gpio_irq_chip - GPIO interrupt controller */ struct gpio_irq_chip { + /** + * @chip: + * + * GPIO IRQ chip implementation, provided by GPIO driver. + */ + struct irq_chip *chip; + /** * @domain_ops: * @@ -47,6 +54,11 @@ struct gpio_irq_chip { */ void *parent_handler_data; }; + +static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) +{ + return container_of(chip, struct gpio_irq_chip, chip); +} #endif /** @@ -112,7 +124,6 @@ struct gpio_irq_chip { * safely. * @bgpio_dir: shadowed direction register for generic GPIO to clear/set * direction safely. - * @irqchip: GPIO IRQ chip impl, provided by GPIO driver * @irqdomain: Interrupt translation domain; responsible for mapping * between GPIO hwirq number and linux irq number * @irq_handler: the irq handler to use (often a predefined irq core function) @@ -197,7 +208,6 @@ struct gpio_chip { * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ - struct irq_chip *irqchip; struct irq_domain *irqdomain; irq_flow_handler_t irq_handler; unsigned int irq_default_type; -- cgit v1.2.3 From f0fbe7bce733561b76a5b55c5f4625888acd3792 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:47 +0100 Subject: gpio: Move irqdomain into struct gpio_irq_chip In order to consolidate the multiple ways to associate an IRQ chip with a GPIO chip, move more fields into the new struct gpio_irq_chip. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index a79b3b18fadd..c5dfa8c0b829 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -31,6 +31,14 @@ struct gpio_irq_chip { */ struct irq_chip *chip; + /** + * @domain: + * + * Interrupt translation domain; responsible for mapping between GPIO + * hwirq number and Linux IRQ number. + */ + struct irq_domain *domain; + /** * @domain_ops: * @@ -124,8 +132,6 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) * safely. * @bgpio_dir: shadowed direction register for generic GPIO to clear/set * direction safely. - * @irqdomain: Interrupt translation domain; responsible for mapping - * between GPIO hwirq number and linux irq number * @irq_handler: the irq handler to use (often a predefined irq core function) * for GPIO IRQs, provided by GPIO driver * @irq_default_type: default IRQ triggering type applied during GPIO driver @@ -208,7 +214,6 @@ struct gpio_chip { * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ - struct irq_domain *irqdomain; irq_flow_handler_t irq_handler; unsigned int irq_default_type; unsigned int irq_chained_parent; -- cgit v1.2.3 From c7a0aa59524c5bb20bebaca360f7c5faaec6b806 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:48 +0100 Subject: gpio: Move irq_handler to struct gpio_irq_chip In order to consolidate the multiple ways to associate an IRQ chip with a GPIO chip, move more fields into the new struct gpio_irq_chip. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c5dfa8c0b829..864f507e859b 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -46,6 +46,14 @@ struct gpio_irq_chip { */ const struct irq_domain_ops *domain_ops; + /** + * @handler: + * + * The IRQ handler to use (often a predefined IRQ core function) for + * GPIO IRQs, provided by GPIO driver. + */ + irq_flow_handler_t handler; + /** * @parent_handler: * @@ -132,8 +140,6 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) * safely. * @bgpio_dir: shadowed direction register for generic GPIO to clear/set * direction safely. - * @irq_handler: the irq handler to use (often a predefined irq core function) - * for GPIO IRQs, provided by GPIO driver * @irq_default_type: default IRQ triggering type applied during GPIO driver * initialization, provided by GPIO driver * @irq_chained_parent: GPIO IRQ chip parent/bank linux irq number, @@ -214,7 +220,6 @@ struct gpio_chip { * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ - irq_flow_handler_t irq_handler; unsigned int irq_default_type; unsigned int irq_chained_parent; bool irq_nested; -- cgit v1.2.3 From 3634eeb0fe9176e453c99834749dce21ea1305c1 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:49 +0100 Subject: gpio: Move irq_default_type to struct gpio_irq_chip In order to consolidate the multiple ways to associate an IRQ chip with a GPIO chip, move more fields into the new struct gpio_irq_chip. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 864f507e859b..1367fa94105f 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -54,6 +54,14 @@ struct gpio_irq_chip { */ irq_flow_handler_t handler; + /** + * @default_type: + * + * Default IRQ triggering type applied during GPIO driver + * initialization, provided by GPIO driver. + */ + unsigned int default_type; + /** * @parent_handler: * @@ -140,8 +148,6 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) * safely. * @bgpio_dir: shadowed direction register for generic GPIO to clear/set * direction safely. - * @irq_default_type: default IRQ triggering type applied during GPIO driver - * initialization, provided by GPIO driver * @irq_chained_parent: GPIO IRQ chip parent/bank linux irq number, * provided by GPIO driver for chained interrupt (not for nested * interrupts). @@ -220,7 +226,6 @@ struct gpio_chip { * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ - unsigned int irq_default_type; unsigned int irq_chained_parent; bool irq_nested; bool irq_need_valid_mask; -- cgit v1.2.3 From 39e5f0969514fbfd6c235ac52586c72ca77cee00 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:50 +0100 Subject: gpio: Move irq_chained_parent to struct gpio_irq_chip In order to consolidate the multiple ways to associate an IRQ chip with a GPIO chip, move more fields into the new struct gpio_irq_chip. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 1367fa94105f..86f00d908e90 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -77,6 +77,21 @@ struct gpio_irq_chip { * interrupt. */ void *parent_handler_data; + + /** + * @num_parents: + * + * The number of interrupt parents of a GPIO chip. + */ + unsigned int num_parents; + + /** + * @parents: + * + * A list of interrupt parents of a GPIO chip. This is owned by the + * driver, so the core will only reference this list, not modify it. + */ + unsigned int *parents; }; static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) @@ -148,9 +163,6 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) * safely. * @bgpio_dir: shadowed direction register for generic GPIO to clear/set * direction safely. - * @irq_chained_parent: GPIO IRQ chip parent/bank linux irq number, - * provided by GPIO driver for chained interrupt (not for nested - * interrupts). * @irq_nested: True if set the interrupt handling is nested. * @irq_need_valid_mask: If set core allocates @irq_valid_mask with all * bits set to one @@ -226,7 +238,6 @@ struct gpio_chip { * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ - unsigned int irq_chained_parent; bool irq_nested; bool irq_need_valid_mask; unsigned long *irq_valid_mask; -- cgit v1.2.3 From dc6bafee86897419b0908e8d1e52ef46ca0ea487 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:51 +0100 Subject: gpio: Move irq_nested into struct gpio_irq_chip In order to consolidate the multiple ways to associate an IRQ chip with a GPIO chip, move more fields into the new struct gpio_irq_chip. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 86f00d908e90..1c3d06fe54b1 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -92,6 +92,13 @@ struct gpio_irq_chip { * driver, so the core will only reference this list, not modify it. */ unsigned int *parents; + + /** + * @nested: + * + * True if set the interrupt handling is nested. + */ + bool nested; }; static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) @@ -163,7 +170,6 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) * safely. * @bgpio_dir: shadowed direction register for generic GPIO to clear/set * direction safely. - * @irq_nested: True if set the interrupt handling is nested. * @irq_need_valid_mask: If set core allocates @irq_valid_mask with all * bits set to one * @irq_valid_mask: If not %NULL holds bitmask of GPIOs which are valid to @@ -238,7 +244,6 @@ struct gpio_chip { * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ - bool irq_nested; bool irq_need_valid_mask; unsigned long *irq_valid_mask; struct lock_class_key *lock_key; -- cgit v1.2.3 From dc7b0387ee894c115ef5ddcaaf794125d6d9058c Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:52 +0100 Subject: gpio: Move irq_valid_mask into struct gpio_irq_chip In order to consolidate the multiple ways to associate an IRQ chip with a GPIO chip, move more fields into the new struct gpio_irq_chip. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 21 +++++++++++++++------ 1 file changed, 15 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 1c3d06fe54b1..067efcd4f46d 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -99,6 +99,21 @@ struct gpio_irq_chip { * True if set the interrupt handling is nested. */ bool nested; + + /** + * @need_valid_mask: + * + * If set core allocates @valid_mask with all bits set to one. + */ + bool need_valid_mask; + + /** + * @valid_mask: + * + * If not %NULL holds bitmask of GPIOs which are valid to be included + * in IRQ domain of the chip. + */ + unsigned long *valid_mask; }; static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) @@ -170,10 +185,6 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) * safely. * @bgpio_dir: shadowed direction register for generic GPIO to clear/set * direction safely. - * @irq_need_valid_mask: If set core allocates @irq_valid_mask with all - * bits set to one - * @irq_valid_mask: If not %NULL holds bitmask of GPIOs which are valid to - * be included in IRQ domain of the chip * @lock_key: per GPIO IRQ chip lockdep class * * A gpio_chip can help platforms abstract various sources of GPIOs so @@ -244,8 +255,6 @@ struct gpio_chip { * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ - bool irq_need_valid_mask; - unsigned long *irq_valid_mask; struct lock_class_key *lock_key; /** -- cgit v1.2.3 From ca9df053fb2bb2fcc64f37a1668321c7e19edd04 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:53 +0100 Subject: gpio: Move lock_key into struct gpio_irq_chip In order to consolidate the multiple ways to associate an IRQ chip with a GPIO chip, move more fields into the new struct gpio_irq_chip. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 067efcd4f46d..c363ee198ff9 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -62,6 +62,13 @@ struct gpio_irq_chip { */ unsigned int default_type; + /** + * @lock_key: + * + * Per GPIO IRQ chip lockdep class. + */ + struct lock_class_key *lock_key; + /** * @parent_handler: * @@ -185,7 +192,6 @@ static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) * safely. * @bgpio_dir: shadowed direction register for generic GPIO to clear/set * direction safely. - * @lock_key: per GPIO IRQ chip lockdep class * * A gpio_chip can help platforms abstract various sources of GPIOs so * they can all be accessed through a common programing interface. @@ -255,7 +261,6 @@ struct gpio_chip { * With CONFIG_GPIOLIB_IRQCHIP we get an irqchip inside the gpiolib * to handle IRQs for most practical cases. */ - struct lock_class_key *lock_key; /** * @irq: -- cgit v1.2.3 From e0d89728981393b7d694bd3419b7794b9882c92d Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:54 +0100 Subject: gpio: Implement tighter IRQ chip integration Currently GPIO drivers are required to add the GPIO chip and its corresponding IRQ chip separately, which can result in a lot of boilerplate. Use the newly introduced struct gpio_irq_chip, embedded in struct gpio_chip, that drivers can fill in if they want the GPIO core to automatically register the IRQ chip associated with a GPIO chip. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index c363ee198ff9..51fc7b023364 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -100,6 +100,13 @@ struct gpio_irq_chip { */ unsigned int *parents; + /** + * @map: + * + * A list of interrupt parents for each line of a GPIO chip. + */ + unsigned int *map; + /** * @nested: * -- cgit v1.2.3 From 1b95b4eb567aab1cafcdcb14c60a7dd9d56236a9 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:55 +0100 Subject: gpio: Export gpiochip_irq_{map,unmap}() Export these functions so that drivers can explicitly use these when setting up their IRQ domain. Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 51fc7b023364..bbe5c647f29d 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -367,6 +367,10 @@ int bgpio_init(struct gpio_chip *gc, struct device *dev, #ifdef CONFIG_GPIOLIB_IRQCHIP +int gpiochip_irq_map(struct irq_domain *d, unsigned int irq, + irq_hw_number_t hwirq); +void gpiochip_irq_unmap(struct irq_domain *d, unsigned int irq); + void gpiochip_set_chained_irqchip(struct gpio_chip *gpiochip, struct irq_chip *irqchip, unsigned int parent_irq, -- cgit v1.2.3 From 60ed54cae8dc0f2d41cafbd477bbed6deb716615 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:57 +0100 Subject: gpio: Disambiguate struct gpio_irq_chip.nested The nested field in struct gpio_irq_chip currently has two meanings. On one hand it marks an IRQ chip as being nested (as opposed to chained), while on the other hand it also means that an IRQ chip uses nested thread handlers. However, nested IRQ chips can already be identified by the fact that they don't pass a parent handler (the driver would instead already have installed a nested handler using request_irq()). Therefore, the only use for the nested attribute is to inform gpiolib that an IRQ chip uses nested thread handlers (as opposed to regular, non-threaded handlers). To clarify its purpose, rename the field to "threaded". Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index bbe5c647f29d..0a3fdd4d9d8d 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -108,11 +108,11 @@ struct gpio_irq_chip { unsigned int *map; /** - * @nested: + * @threaded: * - * True if set the interrupt handling is nested. + * True if set the interrupt handling uses nested threads. */ - bool nested; + bool threaded; /** * @need_valid_mask: @@ -385,7 +385,7 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, unsigned int first_irq, irq_flow_handler_t handler, unsigned int type, - bool nested, + bool threaded, struct lock_class_key *lock_key); #ifdef CONFIG_LOCKDEP -- cgit v1.2.3 From 8302cf585288f75fd253f6b9a094d51ae371a3f3 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:58 +0100 Subject: gpio: Introduce struct gpio_irq_chip.first Some GPIO chips cannot support sparse IRQ numbering and therefore need to manually allocate their interrupt descriptors statically. For these cases, a driver can pass the first allocated IRQ via the struct gpio_irq_chip's "first" field and thereby cause the IRQ domain to map all IRQs during initialization. Suggested-by: Grygorii Strashko Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 0a3fdd4d9d8d..a77d4ada060c 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -128,6 +128,14 @@ struct gpio_irq_chip { * in IRQ domain of the chip. */ unsigned long *valid_mask; + + /** + * @first: + * + * Required for static IRQ allocation. If set, irq_domain_add_simple() + * will allocate and map all IRQs during initialization. + */ + unsigned int first; }; static inline struct gpio_irq_chip *to_gpio_irq_chip(struct irq_chip *chip) -- cgit v1.2.3 From 959bc7b22bd25a3a907fbb9b26a1d0cbdf98ef40 Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Tue, 7 Nov 2017 19:15:59 +0100 Subject: gpio: Automatically add lockdep keys In order to avoid lockdep boilerplate in individual drivers, turn the gpiochip_add_data() function into a macro that creates a unique class key for each driver. Note that this has the slight disadvantage of adding a key for each driver registered with the system. However, these keys are 8 bytes in size, which is negligible and a small price to pay for generic infrastructure. Suggested-by: Grygorii Strashko Signed-off-by: Thierry Reding Acked-by: Grygorii Strashko [renane __gpiochip_add_data() to gpiochip_add_data_with_key] Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 36 +++++++++++++++++++++++++++++++++++- 1 file changed, 35 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index a77d4ada060c..8dd282c5167a 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -321,7 +321,41 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip, unsigned offset); /* add/remove chips */ -extern int gpiochip_add_data(struct gpio_chip *chip, void *data); +extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, + struct lock_class_key *lock_key); + +/** + * gpiochip_add_data() - register a gpio_chip + * @chip: the chip to register, with chip->base initialized + * @data: driver-private data associated with this chip + * + * Context: potentially before irqs will work + * + * When gpiochip_add_data() is called very early during boot, so that GPIOs + * can be freely used, the chip->parent device must be registered before + * the gpio framework's arch_initcall(). Otherwise sysfs initialization + * for GPIOs will fail rudely. + * + * gpiochip_add_data() must only be called after gpiolib initialization, + * ie after core_initcall(). + * + * If chip->base is negative, this requests dynamic assignment of + * a range of valid GPIOs. + * + * Returns: + * A negative errno if the chip can't be registered, such as because the + * chip->base is invalid or already associated with a different chip. + * Otherwise it returns zero as a success code. + */ +#ifdef CONFIG_LOCKDEP +#define gpiochip_add_data(chip, data) ({ \ + static struct lock_class_key key; \ + gpiochip_add_data_with_key(chip, data, &key); \ + }) +#else +#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL) +#endif + static inline int gpiochip_add(struct gpio_chip *chip) { return gpiochip_add_data(chip, NULL); -- cgit v1.2.3 From a7d3d0392a325d630225b7dbccf2558f944114e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 10 Sep 2017 09:49:45 +0200 Subject: integrity: use kernel_read_file_from_path() to read x509 certs The CONFIG_IMA_LOAD_X509 and CONFIG_EVM_LOAD_X509 options permit loading x509 signed certificates onto the trusted keyrings without verifying the x509 certificate file's signature. This patch replaces the call to the integrity_read_file() specific function with the common kernel_read_file_from_path() function. To avoid verifying the file signature, this patch defines READING_X509_CERTFICATE. Signed-off-by: Christoph Hellwig Signed-off-by: Mimi Zohar --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 339e73742e73..456325084f1d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2792,6 +2792,7 @@ extern int do_pipe_flags(int *, int); id(KEXEC_IMAGE, kexec-image) \ id(KEXEC_INITRAMFS, kexec-initramfs) \ id(POLICY, security-policy) \ + id(X509_CERTIFICATE, x509-certificate) \ id(MAX_ID, ) #define __fid_enumify(ENUM, dummy) READING_ ## ENUM, -- cgit v1.2.3 From fda784e50aace694ec2e4e16e2de07b91a938563 Mon Sep 17 00:00:00 2001 From: "Bruno E. O. Meneguele" Date: Tue, 24 Oct 2017 15:37:00 -0200 Subject: module: export module signature enforcement status A static variable sig_enforce is used as status var to indicate the real value of CONFIG_MODULE_SIG_FORCE, once this one is set the var will hold true, but if the CONFIG is not set the status var will hold whatever value is present in the module.sig_enforce kernel cmdline param: true when =1 and false when =0 or not present. Considering this cmdline param take place over the CONFIG value when it's not set, other places in the kernel could misbehave since they would have only the CONFIG_MODULE_SIG_FORCE value to rely on. Exporting this status var allows the kernel to rely in the effective value of module signature enforcement, being it from CONFIG value or cmdline param. Signed-off-by: Bruno E. O. Meneguele Signed-off-by: Mimi Zohar --- include/linux/module.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index fe5aa3736707..c69b49abe877 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -639,6 +639,8 @@ static inline bool is_livepatch_module(struct module *mod) } #endif /* CONFIG_LIVEPATCH */ +bool is_module_sig_enforced(void); + #else /* !CONFIG_MODULES... */ static inline struct module *__module_address(unsigned long addr) @@ -753,6 +755,11 @@ static inline bool module_requested_async_probing(struct module *module) return false; } +static inline bool is_module_sig_enforced(void) +{ + return false; +} + #endif /* CONFIG_MODULES */ #ifdef CONFIG_SYSFS -- cgit v1.2.3 From 2dd9789c76ffde05d5f4c56f45c3cb71b3936694 Mon Sep 17 00:00:00 2001 From: Himanshu Jha Date: Sun, 5 Nov 2017 03:27:32 +0530 Subject: freezer: Fix typo in freezable_schedule_timeout() comment Signed-off-by: Himanshu Jha Acked-by: Luis R. Rodriguez Acked-by: Pavel Machek Signed-off-by: Rafael J. Wysocki --- include/linux/freezer.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/freezer.h b/include/linux/freezer.h index dd03e837ebb7..5b2cf48b2a7c 100644 --- a/include/linux/freezer.h +++ b/include/linux/freezer.h @@ -181,7 +181,7 @@ static inline void freezable_schedule_unsafe(void) } /* - * Like freezable_schedule_timeout(), but should not block the freezer. Do not + * Like schedule_timeout(), but should not block the freezer. Do not * call this with locks held. */ static inline long freezable_schedule_timeout(long timeout) -- cgit v1.2.3 From cf89a31ca55272e1dfb9527b5a61eee4d417747a Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 19 Sep 2017 12:39:11 +0300 Subject: device property: Make fwnode_handle_get() return the fwnode The fwnode_handle_get() function is used to obtain a reference to an fwnode. A common usage pattern for the OF equivalent of the function is: mynode = of_node_get(node); Similarly make fwnode_handle_get() return the fwnode to which the reference was obtained. Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/fwnode.h | 2 +- include/linux/property.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 0c35b6caf0f6..411a84c6c400 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -68,7 +68,7 @@ struct fwnode_reference_args { * @graph_parse_endpoint: Parse endpoint for port and endpoint id. */ struct fwnode_operations { - void (*get)(struct fwnode_handle *fwnode); + struct fwnode_handle *(*get)(struct fwnode_handle *fwnode); void (*put)(struct fwnode_handle *fwnode); bool (*device_is_available)(const struct fwnode_handle *fwnode); bool (*property_present)(const struct fwnode_handle *fwnode, diff --git a/include/linux/property.h b/include/linux/property.h index 6bebee13c5e0..42069c20caee 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -100,7 +100,7 @@ struct fwnode_handle *fwnode_get_named_child_node( struct fwnode_handle *device_get_named_child_node(struct device *dev, const char *childname); -void fwnode_handle_get(struct fwnode_handle *fwnode); +struct fwnode_handle *fwnode_handle_get(struct fwnode_handle *fwnode); void fwnode_handle_put(struct fwnode_handle *fwnode); unsigned int device_get_child_node_count(struct device *dev); -- cgit v1.2.3 From 5e4b1b707b7ed795585b8ee28c44ce692aa8dfef Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 19 Sep 2017 12:39:12 +0300 Subject: device property: Add a macro for interating over graph endpoints Add a convenience macro for iterating over graph endpoints. Iterating over graph endpoints using fwnode_graph_get_next_endpoint() is a recurring pattern, and this macro allows calling that function in a slightly more convenient way. For instance, for (child = NULL; (child = fwnode_graph_get_next_endpoint(fwnode, child)); ) becomes fwnode_graph_for_each_endpoint(fwnode, child) Signed-off-by: Sakari Ailus Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 42069c20caee..f6189a3ac63c 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -293,6 +293,10 @@ struct fwnode_handle * fwnode_graph_get_remote_node(const struct fwnode_handle *fwnode, u32 port, u32 endpoint); +#define fwnode_graph_for_each_endpoint(fwnode, child) \ + for (child = NULL; \ + (child = fwnode_graph_get_next_endpoint(fwnode, child)); ) + int fwnode_graph_parse_endpoint(const struct fwnode_handle *fwnode, struct fwnode_endpoint *endpoint); -- cgit v1.2.3 From 3653bc95bcc7daa938c0fdcd64ff199ed8f7f208 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 9 Oct 2017 21:52:09 -0700 Subject: timer: Prepare to change all DEFINE_TIMER() callbacks Before we can globally change the function prototype of all timer callbacks, we have to change those set up by DEFINE_TIMER(). Prepare for this by casting the callbacks until the prototype changes globally. Cc: Thomas Gleixner Signed-off-by: Kees Cook --- include/linux/timer.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index a1af92bac0d5..9f8895decb82 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -63,6 +63,9 @@ struct timer_list { #define TIMER_TRACE_FLAGMASK (TIMER_MIGRATING | TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE) +#define TIMER_DATA_TYPE unsigned long +#define TIMER_FUNC_TYPE void (*)(TIMER_DATA_TYPE) + #define __TIMER_INITIALIZER(_function, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ @@ -74,7 +77,7 @@ struct timer_list { #define DEFINE_TIMER(_name, _function) \ struct timer_list _name = \ - __TIMER_INITIALIZER(_function, 0, 0) + __TIMER_INITIALIZER((TIMER_FUNC_TYPE)_function, 0, 0) void init_timer_key(struct timer_list *timer, unsigned int flags, const char *name, struct lock_class_key *key); @@ -147,9 +150,6 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, #define setup_pinned_deferrable_timer_on_stack(timer, fn, data) \ __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED) -#define TIMER_DATA_TYPE unsigned long -#define TIMER_FUNC_TYPE void (*)(TIMER_DATA_TYPE) - #ifndef CONFIG_LOCKDEP static inline void timer_setup(struct timer_list *timer, void (*callback)(struct timer_list *), -- cgit v1.2.3 From 375ef2b1f0d0b43b0d36ffdd521637ff59b0c13c Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 17 Sep 2017 13:43:58 +0300 Subject: net: Introduce netdev_*_once functions Extend the net device error logging with netdev_*_once macros. netdev_*_once are the equivalents of the dev_*_once macros which are useful for messages that should only be logged once. Also add netdev_WARN_ONCE, which is the "once" extension for the already existing netdev_WARN macro. Signed-off-by: Gal Pressman Signed-off-by: Saeed Mahameed --- include/linux/netdevice.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 30f0f2928808..79518ede3170 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4336,6 +4336,31 @@ void netdev_notice(const struct net_device *dev, const char *format, ...); __printf(2, 3) void netdev_info(const struct net_device *dev, const char *format, ...); +#define netdev_level_once(level, dev, fmt, ...) \ +do { \ + static bool __print_once __read_mostly; \ + \ + if (!__print_once) { \ + __print_once = true; \ + netdev_printk(level, dev, fmt, ##__VA_ARGS__); \ + } \ +} while (0) + +#define netdev_emerg_once(dev, fmt, ...) \ + netdev_level_once(KERN_EMERG, dev, fmt, ##__VA_ARGS__) +#define netdev_alert_once(dev, fmt, ...) \ + netdev_level_once(KERN_ALERT, dev, fmt, ##__VA_ARGS__) +#define netdev_crit_once(dev, fmt, ...) \ + netdev_level_once(KERN_CRIT, dev, fmt, ##__VA_ARGS__) +#define netdev_err_once(dev, fmt, ...) \ + netdev_level_once(KERN_ERR, dev, fmt, ##__VA_ARGS__) +#define netdev_warn_once(dev, fmt, ...) \ + netdev_level_once(KERN_WARNING, dev, fmt, ##__VA_ARGS__) +#define netdev_notice_once(dev, fmt, ...) \ + netdev_level_once(KERN_NOTICE, dev, fmt, ##__VA_ARGS__) +#define netdev_info_once(dev, fmt, ...) \ + netdev_level_once(KERN_INFO, dev, fmt, ##__VA_ARGS__) + #define MODULE_ALIAS_NETDEV(device) \ MODULE_ALIAS("netdev-" device) @@ -4376,6 +4401,10 @@ do { \ WARN(1, "netdevice: %s%s\n" format, netdev_name(dev), \ netdev_reg_state(dev), ##args) +#define netdev_WARN_ONCE(dev, condition, format, arg...) \ + WARN_ONCE(1, "netdevice: %s%s\n" format, netdev_name(dev) \ + netdev_reg_state(dev), ##args) + /* netif printk helpers, similar to netdev_printk */ #define netif_printk(priv, type, level, dev, fmt, args...) \ -- cgit v1.2.3 From 4382c7b92a1db397874ca62c73aa8b023af6dba8 Mon Sep 17 00:00:00 2001 From: Gal Pressman Date: Sun, 10 Sep 2017 13:22:51 +0300 Subject: net/mlx5e: Add 802.1ad VLAN insertion support Report VLAN insertion support for S-tagged packets and add support by choosing the correct VLAN type in the WQE. Signed-off-by: Gal Pressman Reviewed-by: Tariq Toukan Signed-off-by: Saeed Mahameed --- include/linux/mlx5/qp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/qp.h b/include/linux/mlx5/qp.h index 66d19b611fe4..62af7512dabb 100644 --- a/include/linux/mlx5/qp.h +++ b/include/linux/mlx5/qp.h @@ -221,6 +221,7 @@ enum { }; enum { + MLX5_ETH_WQE_SVLAN = 1 << 0, MLX5_ETH_WQE_INSERT_VLAN = 1 << 15, }; -- cgit v1.2.3 From f7a6509fe002e3909cb41c09e807b7f3ca4a361b Mon Sep 17 00:00:00 2001 From: David Hildenbrand Date: Fri, 1 Sep 2017 17:11:43 +0200 Subject: KVM: s390: vsie: use common code functions for pinning We will not see -ENOMEM (gfn_to_hva() will return KVM_ERR_PTR_BAD_PAGE for all errors). So we can also get rid of special handling in the callers of pin_guest_page() and always assume that it is a g2 error. As also kvm_s390_inject_program_int() should never fail, we can simplify pin_scb(), too. Signed-off-by: David Hildenbrand Message-Id: <20170901151143.22714-1-david@redhat.com> Acked-by: Cornelia Huck Signed-off-by: Christian Borntraeger --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 6882538eda32..2e754b7c282c 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -667,6 +667,7 @@ kvm_pfn_t __gfn_to_pfn_memslot(struct kvm_memory_slot *slot, gfn_t gfn, bool *writable); void kvm_release_pfn_clean(kvm_pfn_t pfn); +void kvm_release_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_dirty(kvm_pfn_t pfn); void kvm_set_pfn_accessed(kvm_pfn_t pfn); void kvm_get_pfn(kvm_pfn_t pfn); -- cgit v1.2.3 From f121aadede37bcbb77ea552d195a09c734486fe7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Thu, 9 Nov 2017 10:23:28 +0100 Subject: vfs: add path_put_init() This one initializes the struct path to all zeroes so that multiple path_put_init() on the path is safe. Signed-off-by: Miklos Szeredi --- include/linux/path.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/path.h b/include/linux/path.h index 81e65a5be7ce..475225a03d0d 100644 --- a/include/linux/path.h +++ b/include/linux/path.h @@ -18,4 +18,10 @@ static inline int path_equal(const struct path *path1, const struct path *path2) return path1->mnt == path2->mnt && path1->dentry == path2->dentry; } +static inline void path_put_init(struct path *path) +{ + path_put(path); + *path = (struct path) { }; +} + #endif /* _LINUX_PATH_H */ -- cgit v1.2.3 From 54985120a1c461b74f9510e5d730971f2a2383b1 Mon Sep 17 00:00:00 2001 From: Girish Moodalbail Date: Tue, 7 Nov 2017 11:32:11 -0800 Subject: net: fix incorrect comment with regard to VLAN packet handling The commit bcc6d4790361 ("net: vlan: make non-hw-accel rx path similar to hw-accel") unified accel and non-accel path for VLAN RX. With that fix we do not register any packet_type handler for VLANs anymore, so fix the incorrect comment. Signed-off-by: Girish Moodalbail Signed-off-by: David S. Miller --- include/linux/netdevice.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 79518ede3170..6b274bfe489f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4479,15 +4479,7 @@ do { \ * Why 16. Because with 16 the only overlap we get on a hash of the * low nibble of the protocol value is RARP/SNAP/X.25. * - * NOTE: That is no longer true with the addition of VLAN tags. Not - * sure which should go first, but I bet it won't make much - * difference if we are running VLANs. The good news is that - * this protocol won't be in the list unless compiled in, so - * the average user (w/out VLANs) will not be adversely affected. - * --BLG - * * 0800 IP - * 8100 802.1Q VLAN * 0001 802.3 * 0002 AX.25 * 0004 802.2 -- cgit v1.2.3 From 4f8413a3a799c958f7a10a6310a451e6b8aef5ad Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Thu, 9 Nov 2017 14:17:59 +0000 Subject: genirq: Track whether the trigger type has been set When requesting a shared interrupt, we assume that the firmware support code (DT or ACPI) has called irqd_set_trigger_type already, so that we can retrieve it and check that the requester is being reasonnable. Unfortunately, we still have non-DT, non-ACPI systems around, and these guys won't call irqd_set_trigger_type before requesting the interrupt. The consequence is that we fail the request that would have worked before. We can either chase all these use cases (boring), or address it in core code (easier). Let's have a per-irq_desc flag that indicates whether irqd_set_trigger_type has been called, and let's just check it when checking for a shared interrupt. If it hasn't been set, just take whatever the interrupt requester asks. Fixes: 382bd4de6182 ("genirq: Use irqd_get_trigger_type to compare the trigger type for shared IRQs") Cc: stable@vger.kernel.org Reported-and-tested-by: Petr Cvek Signed-off-by: Marc Zyngier --- include/linux/irq.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index fda8da7c45e7..73f61eeb152e 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -210,6 +210,7 @@ struct irq_data { * IRQD_MANAGED_SHUTDOWN - Interrupt was shutdown due to empty affinity * mask. Applies only to affinity managed irqs. * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target + * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -230,6 +231,7 @@ enum { IRQD_IRQ_STARTED = (1 << 22), IRQD_MANAGED_SHUTDOWN = (1 << 23), IRQD_SINGLE_TARGET = (1 << 24), + IRQD_DEFAULT_TRIGGER_SET = (1 << 25), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -259,18 +261,25 @@ static inline void irqd_mark_affinity_was_set(struct irq_data *d) __irqd_to_state(d) |= IRQD_AFFINITY_SET; } +static inline bool irqd_trigger_type_was_set(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_DEFAULT_TRIGGER_SET; +} + static inline u32 irqd_get_trigger_type(struct irq_data *d) { return __irqd_to_state(d) & IRQD_TRIGGER_MASK; } /* - * Must only be called inside irq_chip.irq_set_type() functions. + * Must only be called inside irq_chip.irq_set_type() functions or + * from the DT/ACPI setup code. */ static inline void irqd_set_trigger_type(struct irq_data *d, u32 type) { __irqd_to_state(d) &= ~IRQD_TRIGGER_MASK; __irqd_to_state(d) |= type & IRQD_TRIGGER_MASK; + __irqd_to_state(d) |= IRQD_DEFAULT_TRIGGER_SET; } static inline bool irqd_is_level_type(struct irq_data *d) -- cgit v1.2.3 From 125b1192bd48544d9429e240c8f8e64d1533b2c1 Mon Sep 17 00:00:00 2001 From: Keerthy Date: Fri, 10 Nov 2017 17:22:44 +0530 Subject: regulator: tps65218: remove unused tps_info structure remove unused tps_info structure. Signed-off-by: Keerthy Signed-off-by: Mark Brown --- include/linux/mfd/tps65218.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h index bccd2d68b1e3..f069c518c0ed 100644 --- a/include/linux/mfd/tps65218.h +++ b/include/linux/mfd/tps65218.h @@ -245,24 +245,6 @@ enum tps65218_irqs { TPS65218_INVALID4_IRQ, }; -/** - * struct tps_info - packages regulator constraints - * @id: Id of the regulator - * @name: Voltage regulator name - * @min_uV: minimum micro volts - * @max_uV: minimum micro volts - * @strobe: sequencing strobe value for the regulator - * - * This data is used to check the regualtor voltage limits while setting. - */ -struct tps_info { - int id; - const char *name; - int min_uV; - int max_uV; - int strobe; -}; - /** * struct tps65218 - tps65218 sub-driver chip access routines * @@ -280,7 +262,6 @@ struct tps65218 { u32 irq_mask; struct regmap_irq_chip_data *irq_data; struct regulator_desc desc[TPS65218_NUM_REGULATOR]; - struct tps_info *info[TPS65218_NUM_REGULATOR]; struct regmap *regmap; u8 *strobes; }; -- cgit v1.2.3 From f7b53637c090bd8ce2dc74ad0f3aa1898aff2524 Mon Sep 17 00:00:00 2001 From: Casey Schaufler Date: Tue, 24 Oct 2017 18:52:31 -0700 Subject: Audit: remove unused audit_log_secctx function The function audit_log_secctx() is unused in the upstream kernel. All it does is wrap another function that doesn't need wrapping. It claims to give you the SELinux context, but that is not true if you are using a different security module. Signed-off-by: Casey Schaufler Reviewed-by: James Morris Signed-off-by: Paul Moore --- include/linux/audit.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 2150bdccfbab..fa1b068d911d 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -149,12 +149,6 @@ extern void audit_log_key(struct audit_buffer *ab, extern void audit_log_link_denied(const char *operation, const struct path *link); extern void audit_log_lost(const char *message); -#ifdef CONFIG_SECURITY -extern void audit_log_secctx(struct audit_buffer *ab, u32 secid); -#else -static inline void audit_log_secctx(struct audit_buffer *ab, u32 secid) -{ } -#endif extern int audit_log_task_context(struct audit_buffer *ab); extern void audit_log_task_info(struct audit_buffer *ab, @@ -203,8 +197,6 @@ static inline void audit_log_key(struct audit_buffer *ab, char *key) static inline void audit_log_link_denied(const char *string, const struct path *link) { } -static inline void audit_log_secctx(struct audit_buffer *ab, u32 secid) -{ } static inline int audit_log_task_context(struct audit_buffer *ab) { return 0; -- cgit v1.2.3 From 84fef62d135b6e47b52f4e9280b5dbc5bb0050ba Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Nov 2017 10:28:32 -0700 Subject: nvme: check admin passthru command effects The NVMe standard provides a command effects log page so the host may be aware of special requirements it may need to do for a particular command. For example, the command may need to run with IO quiesced to prevent timeouts or undefined behavior, or it may change the logical block formats that determine how the host needs to construct future commands. This patch saves the nvme command effects log page if the controller supports it, and performs appropriate actions before and after an admin passthrough command is completed. If the controller does not support the command effects log page, the driver will define the effects for known opcodes. The nvme format and santize are the only commands in this patch with known effects. Signed-off-by: Keith Busch Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9310ce77d8e1..fd1d4508a612 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -267,6 +267,7 @@ enum { NVME_CTRL_OACS_SEC_SUPP = 1 << 0, NVME_CTRL_OACS_DIRECTIVES = 1 << 5, NVME_CTRL_OACS_DBBUF_SUPP = 1 << 8, + NVME_CTRL_LPA_CMD_EFFECTS_LOG = 1 << 1, }; struct nvme_lbaf { @@ -395,6 +396,21 @@ struct nvme_fw_slot_info_log { __u8 rsvd64[448]; }; +enum { + NVME_CMD_EFFECTS_CSUPP = 1 << 0, + NVME_CMD_EFFECTS_LBCC = 1 << 1, + NVME_CMD_EFFECTS_NCC = 1 << 2, + NVME_CMD_EFFECTS_NIC = 1 << 3, + NVME_CMD_EFFECTS_CCC = 1 << 4, + NVME_CMD_EFFECTS_CSE_MASK = 3 << 16, +}; + +struct nvme_effects_log { + __le32 acs[256]; + __le32 iocs[256]; + __u8 resv[2048]; +}; + enum { NVME_SMART_CRIT_SPARE = 1 << 0, NVME_SMART_CRIT_TEMPERATURE = 1 << 1, @@ -681,6 +697,7 @@ enum nvme_admin_opcode { nvme_admin_format_nvm = 0x80, nvme_admin_security_send = 0x81, nvme_admin_security_recv = 0x82, + nvme_admin_sanitize_nvm = 0x84, }; enum { @@ -712,6 +729,7 @@ enum { NVME_LOG_ERROR = 0x01, NVME_LOG_SMART = 0x02, NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_CMD_EFFECTS = 0x05, NVME_LOG_DISC = 0x70, NVME_LOG_RESERVATION = 0x80, NVME_FWACT_REPL = (0 << 3), -- cgit v1.2.3 From 83f5f7ed72f316eda4c4c3833beebfe6578926f4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 8 Nov 2017 11:15:37 -0700 Subject: block: kill bio_kmap/kunmap_irq() There are no users of it anymore. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9c75f58f6a50..1d7e63d7505f 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -573,17 +573,6 @@ static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) } #endif -static inline char *__bio_kmap_irq(struct bio *bio, struct bvec_iter iter, - unsigned long *flags) -{ - return bvec_kmap_irq(&bio_iter_iovec(bio, iter), flags); -} -#define __bio_kunmap_irq(buf, flags) bvec_kunmap_irq(buf, flags) - -#define bio_kmap_irq(bio, flags) \ - __bio_kmap_irq((bio), (bio)->bi_iter, (flags)) -#define bio_kunmap_irq(buf,flags) __bio_kunmap_irq(buf, flags) - /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * -- cgit v1.2.3 From d004a5e7d4dd6335ce6e2044af42f5e0fbebb51d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 8 Nov 2017 19:13:48 +0100 Subject: block: remove __bio_kmap_atomic This helper doesn't buy us much over calling kmap_atomic directly. In fact in the only caller it does a bit of useless work as the caller already has the bvec at hand, and said caller would even buggy for a multi-segment bio due to the use of this helper. So just remove it. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 1d7e63d7505f..d4eec19a6d3c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -128,18 +128,6 @@ static inline void *bio_data(struct bio *bio) */ #define bvec_to_phys(bv) (page_to_phys((bv)->bv_page) + (unsigned long) (bv)->bv_offset) -/* - * queues that have highmem support enabled may still need to revert to - * PIO transfers occasionally and thus map high pages temporarily. For - * permanent PIO fall back, user is probably better off disabling highmem - * I/O completely on that queue (see ide-dma for example) - */ -#define __bio_kmap_atomic(bio, iter) \ - (kmap_atomic(bio_iter_iovec((bio), (iter)).bv_page) + \ - bio_iter_iovec((bio), (iter)).bv_offset) - -#define __bio_kunmap_atomic(addr) kunmap_atomic(addr) - /* * merge helpers etc */ -- cgit v1.2.3 From f00c4d80ffdac9e3a64947ebd57489f3232d5a74 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 5 Nov 2017 10:36:31 +0300 Subject: block: pass full fmode_t to blk_verify_command Use the obvious calling convention. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 225617dd0a3f..1437ef4d8037 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1360,7 +1360,7 @@ static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, gfp_mask, 0); } -extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); +extern int blk_verify_command(unsigned char *cmd, fmode_t mode); enum blk_default_limits { BLK_MAX_SEGMENTS = 128, -- cgit v1.2.3 From 38dabe210fbab4e7e8a03670ab3ba42f247ea08f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Nov 2017 15:13:10 -0700 Subject: nvme: centralize AEN defines All the transports were unnecessarilly duplicating the AEN request accounting. This patch defines everything in one place. Signed-off-by: Keith Busch Reviewed-by: Guan Junxiong Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index fd1d4508a612..89ffa7eed2fd 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -90,6 +90,14 @@ enum { }; #define NVME_AQ_DEPTH 32 +#define NVME_NR_AEN_COMMANDS 1 +#define NVME_AQ_BLK_MQ_DEPTH (NVME_AQ_DEPTH - NVME_NR_AEN_COMMANDS) + +/* + * Subtract one to leave an empty queue entry for 'Full Queue' condition. See + * NVM-Express 1.2 specification, section 4.1.2. + */ +#define NVME_AQ_MQ_TAG_DEPTH (NVME_AQ_BLK_MQ_DEPTH - 1) enum { NVME_REG_CAP = 0x0000, /* Controller Capabilities */ -- cgit v1.2.3 From e3d7874dcf175cca2dca7795d6453f637ad8ba9b Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 7 Nov 2017 15:13:14 -0700 Subject: nvme: send uevent for some asynchronous events This will give udev a chance to observe and handle asynchronous event notifications and clear the log to unmask future events of the same type. The driver will create a change uevent of the asyncronuos event result before submitting the next AEN request to the device if a completed AEN event is of type error, smart, command set or vendor specific, Signed-off-by: Keith Busch Reviewed-by: Guan Junxiong Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 89ffa7eed2fd..aea87f0d917b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -428,6 +428,10 @@ enum { }; enum { + NVME_AER_ERROR = 0, + NVME_AER_SMART = 1, + NVME_AER_CSS = 6, + NVME_AER_VS = 7, NVME_AER_NOTICE_NS_CHANGED = 0x0002, NVME_AER_NOTICE_FW_ACT_STARTING = 0x0102, }; -- cgit v1.2.3 From eb619fdb2d4cb8b3d3419e9113921e87e7daf557 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 9 Nov 2017 08:32:43 -0700 Subject: blk-mq: fix issue with shared tag queue re-running This patch attempts to make the case of hctx re-running on driver tag failure more robust. Without this patch, it's pretty easy to trigger a stall condition with shared tags. An example is using null_blk like this: modprobe null_blk queue_mode=2 nr_devices=4 shared_tags=1 submit_queues=1 hw_queue_depth=1 which sets up 4 devices, sharing the same tag set with a depth of 1. Running a fio job ala: [global] bs=4k rw=randread norandommap direct=1 ioengine=libaio iodepth=4 [nullb0] filename=/dev/nullb0 [nullb1] filename=/dev/nullb1 [nullb2] filename=/dev/nullb2 [nullb3] filename=/dev/nullb3 will inevitably end with one or more threads being stuck waiting for a scheduler tag. That IO is then stuck forever, until someone else triggers a run of the queue. Ensure that we always re-run the hardware queue, if the driver tag we were waiting for got freed before we added our leftover request entries back on the dispatch list. Reviewed-by: Bart Van Assche Tested-by: Bart Van Assche Reviewed-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 674641527da7..4ae987c2352c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -35,7 +35,7 @@ struct blk_mq_hw_ctx { struct blk_mq_ctx **ctxs; unsigned int nr_ctx; - wait_queue_entry_t dispatch_wait; + wait_queue_entry_t dispatch_wait; atomic_t wait_index; struct blk_mq_tags *tags; @@ -181,8 +181,7 @@ enum { BLK_MQ_S_STOPPED = 0, BLK_MQ_S_TAG_ACTIVE = 1, BLK_MQ_S_SCHED_RESTART = 2, - BLK_MQ_S_TAG_WAITING = 3, - BLK_MQ_S_START_ON_RUN = 4, + BLK_MQ_S_START_ON_RUN = 3, BLK_MQ_MAX_DEPTH = 10240, -- cgit v1.2.3 From 6a15674d1e90917f1723a814e2e8c949000440f7 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Nov 2017 10:49:54 -0800 Subject: block: Introduce blk_get_request_flags() A side effect of this patch is that the GFP mask that is passed to several allocation functions in the legacy block layer is changed from GFP_KERNEL into __GFP_DIRECT_RECLAIM. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Tested-by: Martin Steigerwald Tested-by: Oleksandr Natalenko Cc: Christoph Hellwig Cc: Ming Lei Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1437ef4d8037..1af5ddd0f631 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -927,6 +927,9 @@ extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); +extern struct request *blk_get_request_flags(struct request_queue *, + unsigned int op, + unsigned int flags); extern struct request *blk_get_request(struct request_queue *, unsigned int op, gfp_t gfp_mask); extern void blk_requeue_request(struct request_queue *, struct request *); -- cgit v1.2.3 From 1b6d65a0bfb5df2a6029c1430e99fcc5d96bb59a Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Nov 2017 10:49:55 -0800 Subject: block: Introduce BLK_MQ_REQ_PREEMPT Set RQF_PREEMPT if BLK_MQ_REQ_PREEMPT is passed to blk_get_request_flags(). Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Tested-by: Martin Steigerwald Tested-by: Oleksandr Natalenko Cc: Christoph Hellwig Cc: Ming Lei Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 4ae987c2352c..82b56609736a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -212,6 +212,7 @@ enum { BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */ + BLK_MQ_REQ_PREEMPT = (1 << 3), /* set RQF_PREEMPT */ }; struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, -- cgit v1.2.3 From c9254f2ddb19387ea9714a57ea48463c20333b92 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Nov 2017 10:49:57 -0800 Subject: block: Add the QUEUE_FLAG_PREEMPT_ONLY request queue flag This flag will be used in the next patch to let the block layer core know whether or not a SCSI request queue has been quiesced. A quiesced SCSI queue namely only processes RQF_PREEMPT requests. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Tested-by: Martin Steigerwald Tested-by: Oleksandr Natalenko Cc: Ming Lei Cc: Christoph Hellwig Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1af5ddd0f631..2147e2381a22 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -632,6 +632,7 @@ struct request_queue { #define QUEUE_FLAG_REGISTERED 26 /* queue has been registered to a disk */ #define QUEUE_FLAG_SCSI_PASSTHROUGH 27 /* queue supports SCSI commands */ #define QUEUE_FLAG_QUIESCED 28 /* queue has been quiesced */ +#define QUEUE_FLAG_PREEMPT_ONLY 29 /* only process REQ_PREEMPT requests */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ @@ -732,6 +733,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ REQ_FAILFAST_DRIVER)) #define blk_queue_quiesced(q) test_bit(QUEUE_FLAG_QUIESCED, &(q)->queue_flags) +#define blk_queue_preempt_only(q) \ + test_bit(QUEUE_FLAG_PREEMPT_ONLY, &(q)->queue_flags) + +extern int blk_set_preempt_only(struct request_queue *q); +extern void blk_clear_preempt_only(struct request_queue *q); static inline bool blk_account_rq(struct request *rq) { -- cgit v1.2.3 From 3a0a529971ec4e2d933e9c7798db101dfb6b1aec Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Nov 2017 10:49:58 -0800 Subject: block, scsi: Make SCSI quiesce and resume work reliably The contexts from which a SCSI device can be quiesced or resumed are: * Writing into /sys/class/scsi_device/*/device/state. * SCSI parallel (SPI) domain validation. * The SCSI device power management methods. See also scsi_bus_pm_ops. It is essential during suspend and resume that neither the filesystem state nor the filesystem metadata in RAM changes. This is why while the hibernation image is being written or restored that SCSI devices are quiesced. The SCSI core quiesces devices through scsi_device_quiesce() and scsi_device_resume(). In the SDEV_QUIESCE state execution of non-preempt requests is deferred. This is realized by returning BLKPREP_DEFER from inside scsi_prep_state_check() for quiesced SCSI devices. Avoid that a full queue prevents power management requests to be submitted by deferring allocation of non-preempt requests for devices in the quiesced state. This patch has been tested by running the following commands and by verifying that after each resume the fio job was still running: for ((i=0; i<10; i++)); do ( cd /sys/block/md0/md && while true; do [ "$( sync_action sleep 1 done ) & pids=($!) for d in /sys/class/block/sd*[a-z]; do bdev=${d#/sys/class/block/} hcil=$(readlink "$d/device") hcil=${hcil#../../../} echo 4 > "$d/queue/nr_requests" echo 1 > "/sys/class/scsi_device/$hcil/device/queue_depth" fio --name="$bdev" --filename="/dev/$bdev" --buffered=0 --bs=512 \ --rw=randread --ioengine=libaio --numjobs=4 --iodepth=16 \ --iodepth_batch=1 --thread --loops=$((2**31)) & pids+=($!) done sleep 1 echo "$(date) Hibernating ..." >>hibernate-test-log.txt systemctl hibernate sleep 10 kill "${pids[@]}" echo idle > /sys/block/md0/md/sync_action wait echo "$(date) Done." >>hibernate-test-log.txt done Reported-by: Oleksandr Natalenko References: "I/O hangs after resuming from suspend-to-ram" (https://marc.info/?l=linux-block&m=150340235201348). Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Tested-by: Martin Steigerwald Tested-by: Oleksandr Natalenko Cc: Martin K. Petersen Cc: Ming Lei Cc: Christoph Hellwig Cc: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2147e2381a22..402c9d536ae1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -959,7 +959,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int blk_queue_enter(struct request_queue *q, bool nowait); +extern int blk_queue_enter(struct request_queue *q, unsigned int flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_start_queue_async(struct request_queue *q); -- cgit v1.2.3 From 9a95e4ef709533efac4aafcb8bddf73f96db50ed Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 9 Nov 2017 10:49:59 -0800 Subject: block, nvme: Introduce blk_mq_req_flags_t Several block layer and NVMe core functions accept a combination of BLK_MQ_REQ_* flags through the 'flags' argument but there is no verification at compile time whether the right type of block layer flags is passed. Make it possible for sparse to verify this. This patch does not change any functionality. Signed-off-by: Bart Van Assche Reviewed-by: Hannes Reinecke Tested-by: Oleksandr Natalenko Cc: linux-nvme@lists.infradead.org Cc: Christoph Hellwig Cc: Johannes Thumshirn Cc: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 17 +++++++++++------ include/linux/blk_types.h | 2 ++ include/linux/blkdev.h | 4 ++-- 3 files changed, 15 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 82b56609736a..b326208277ee 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -209,16 +209,21 @@ void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); enum { - BLK_MQ_REQ_NOWAIT = (1 << 0), /* return when out of requests */ - BLK_MQ_REQ_RESERVED = (1 << 1), /* allocate from reserved pool */ - BLK_MQ_REQ_INTERNAL = (1 << 2), /* allocate internal/sched tag */ - BLK_MQ_REQ_PREEMPT = (1 << 3), /* set RQF_PREEMPT */ + /* return when out of requests */ + BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), + /* allocate from reserved pool */ + BLK_MQ_REQ_RESERVED = (__force blk_mq_req_flags_t)(1 << 1), + /* allocate internal/sched tag */ + BLK_MQ_REQ_INTERNAL = (__force blk_mq_req_flags_t)(1 << 2), + /* set RQF_PREEMPT */ + BLK_MQ_REQ_PREEMPT = (__force blk_mq_req_flags_t)(1 << 3), }; struct request *blk_mq_alloc_request(struct request_queue *q, unsigned int op, - unsigned int flags); + blk_mq_req_flags_t flags); struct request *blk_mq_alloc_request_hctx(struct request_queue *q, - unsigned int op, unsigned int flags, unsigned int hctx_idx); + unsigned int op, blk_mq_req_flags_t flags, + unsigned int hctx_idx); struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); enum { diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1b04085255fb..13ccfc9b210a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -162,6 +162,8 @@ struct bio { */ #define BIO_RESET_BITS BVEC_POOL_OFFSET +typedef __u32 __bitwise blk_mq_req_flags_t; + /* * Operations and flags common to the bio and request structures. * We use 8 bits for encoding the operation, and the remaining 24 for flags. diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 402c9d536ae1..e80ea1d31343 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -935,7 +935,7 @@ extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request_flags(struct request_queue *, unsigned int op, - unsigned int flags); + blk_mq_req_flags_t flags); extern struct request *blk_get_request(struct request_queue *, unsigned int op, gfp_t gfp_mask); extern void blk_requeue_request(struct request_queue *, struct request *); @@ -959,7 +959,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int blk_queue_enter(struct request_queue *q, unsigned int flags); +extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_start_queue_async(struct request_queue *q); -- cgit v1.2.3 From 67f2519fe2903c4041c0e94394d14d372fe51399 Mon Sep 17 00:00:00 2001 From: Greg Edwards Date: Tue, 24 Oct 2017 11:21:48 -0600 Subject: fs: guard_bio_eod() needs to consider partitions guard_bio_eod() needs to look at the partition capacity, not just the capacity of the whole device, when determining if truncation is necessary. [ 60.268688] attempt to access beyond end of device [ 60.268690] unknown-block(9,1): rw=0, want=67103509, limit=67103506 [ 60.268693] buffer_io_error: 2 callbacks suppressed [ 60.268696] Buffer I/O error on dev md1p7, logical block 4524305, async page read Fixes: 74d46992e0d9 ("block: replace bi_bdev with a gendisk pointer and partitions index") Cc: stable@vger.kernel.org # v4.13 Reviewed-by: Christoph Hellwig Signed-off-by: Greg Edwards Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 93aae3476f58..ca10cc292187 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -243,6 +243,7 @@ static inline dev_t part_devt(struct hd_struct *part) return part_to_dev(part)->devt; } +extern struct hd_struct *__disk_get_part(struct gendisk *disk, int partno); extern struct hd_struct *disk_get_part(struct gendisk *disk, int partno); static inline void disk_put_part(struct hd_struct *part) -- cgit v1.2.3 From 79f720a751cad613620d0237e3b44f89f4a69181 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 10 Nov 2017 09:13:21 -0700 Subject: blk-mq: only run the hardware queue if IO is pending Currently we are inconsistent in when we decide to run the queue. Using blk_mq_run_hw_queues() we check if the hctx has pending IO before running it, but we don't do that from the individual queue run function, blk_mq_run_hw_queue(). This results in a lot of extra and pointless queue runs, potentially, on flush requests and (much worse) on tag starvation situations. This is observable just looking at top output, with lots of kworkers active. For the !async runs, it just adds to the CPU overhead of blk-mq. Move the has-pending check into the run function instead of having callers do it. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b326208277ee..eb1e2cdffb31 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -266,7 +266,7 @@ void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_quiesce_queue(struct request_queue *q); void blk_mq_unquiesce_queue(struct request_queue *q); void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); +bool blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); void blk_mq_tagset_busy_iter(struct blk_mq_tag_set *tagset, -- cgit v1.2.3 From dd0bb688eaa241b5655d396d45366cba9225aed9 Mon Sep 17 00:00:00 2001 From: Josef Bacik Date: Tue, 7 Nov 2017 15:28:42 -0500 Subject: bpf: add a bpf_override_function helper Error injection is sloppy and very ad-hoc. BPF could fill this niche perfectly with it's kprobe functionality. We could make sure errors are only triggered in specific call chains that we care about with very specific situations. Accomplish this with the bpf_override_funciton helper. This will modify the probe'd callers return value to the specified value and set the PC to an override function that simply returns, bypassing the originally probed function. This gives us a nice clean way to implement systematic error injection for all of our code paths. Acked-by: Alexei Starovoitov Signed-off-by: Josef Bacik Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- include/linux/trace_events.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 0cd02ff4ae30..eaec066f99e8 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -459,7 +459,8 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1; /* Do we need dst entry? */ + dst_needed:1, /* Do we need dst entry? */ + kprobe_override:1; /* Do we override a kprobe? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 84014ecfa67f..17e5e820a84c 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -523,6 +523,7 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); +DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); -- cgit v1.2.3 From 2210d6b2f287d738eddf6b75f432126ce05450f8 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Maciej=20=C5=BBenczykowski?= Date: Tue, 7 Nov 2017 21:52:09 -0800 Subject: net: ipv6: sysctl to specify IPv6 ND traffic class MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a per-device sysctl to specify the default traffic class to use for kernel originated IPv6 Neighbour Discovery packets. Currently this includes: - Router Solicitation (ICMPv6 type 133) ndisc_send_rs() -> ndisc_send_skb() -> ip6_nd_hdr() - Neighbour Solicitation (ICMPv6 type 135) ndisc_send_ns() -> ndisc_send_skb() -> ip6_nd_hdr() - Neighbour Advertisement (ICMPv6 type 136) ndisc_send_na() -> ndisc_send_skb() -> ip6_nd_hdr() - Redirect (ICMPv6 type 137) ndisc_send_redirect() -> ndisc_send_skb() -> ip6_nd_hdr() and if the kernel ever gets around to generating RA's, it would presumably also include: - Router Advertisement (ICMPv6 type 134) (radvd daemon could pick up on the kernel setting and use it) Interface drivers may examine the Traffic Class value and translate the DiffServ Code Point into a link-layer appropriate traffic prioritization scheme. An example of mapping IETF DSCP values to IEEE 802.11 User Priority values can be found here: https://tools.ietf.org/html/draft-ietf-tsvwg-ieee-802-11 The expected primary use case is to properly prioritize ND over wifi. Testing: jzem22:~# cat /proc/sys/net/ipv6/conf/eth0/ndisc_tclass 0 jzem22:~# echo -1 > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass -bash: echo: write error: Invalid argument jzem22:~# echo 256 > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass -bash: echo: write error: Invalid argument jzem22:~# echo 0 > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass jzem22:~# echo 255 > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass jzem22:~# cat /proc/sys/net/ipv6/conf/eth0/ndisc_tclass 255 jzem22:~# echo 34 > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass jzem22:~# cat /proc/sys/net/ipv6/conf/eth0/ndisc_tclass 34 jzem22:~# echo $[0xDC] > /proc/sys/net/ipv6/conf/eth0/ndisc_tclass jzem22:~# tcpdump -v -i eth0 icmp6 and src host jzem22.pgc and dst host fe80::1 tcpdump: listening on eth0, link-type EN10MB (Ethernet), capture size 262144 bytes IP6 (class 0xdc, hlim 255, next-header ICMPv6 (58) payload length: 24) jzem22.pgc > fe80::1: [icmp6 sum ok] ICMP6, neighbor advertisement, length 24, tgt is jzem22.pgc, Flags [solicited] (based on original change written by Erik Kline, with minor changes) v2: fix 'suspicious rcu_dereference_check() usage' by explicitly grabbing the rcu_read_lock. Cc: Lorenzo Colitti Signed-off-by: Erik Kline Signed-off-by: Maciej Żenczykowski Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index ea04ca024f0d..cb18c6290ca8 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -73,6 +73,7 @@ struct ipv6_devconf { __u32 enhanced_dad; __u32 addr_gen_mode; __s32 disable_policy; + __s32 ndisc_tclass; struct ctl_table_header *sysctl_header; }; -- cgit v1.2.3 From f3edacbd697f94a743fff1a3d26910ab99948ba7 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Sat, 11 Nov 2017 18:24:55 +0900 Subject: bpf: Revert bpf_overrid_function() helper changes. NACK'd by x86 maintainer. Signed-off-by: David S. Miller --- include/linux/filter.h | 3 +-- include/linux/trace_events.h | 1 - 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index eaec066f99e8..0cd02ff4ae30 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -459,8 +459,7 @@ struct bpf_prog { locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ - dst_needed:1, /* Do we need dst entry? */ - kprobe_override:1; /* Do we override a kprobe? */ + dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 17e5e820a84c..84014ecfa67f 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -523,7 +523,6 @@ do { \ struct perf_event; DECLARE_PER_CPU(struct pt_regs, perf_trace_regs); -DECLARE_PER_CPU(int, bpf_kprobe_override); extern int perf_trace_init(struct perf_event *event); extern void perf_trace_destroy(struct perf_event *event); -- cgit v1.2.3 From 713bafea92920103cd3d361657406cf04d0e22dd Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 8 Nov 2017 13:01:26 -0800 Subject: tcp: retire FACK loss detection FACK loss detection has been disabled by default and the successor RACK subsumed FACK and can handle reordering better. This patch removes FACK to simplify TCP loss recovery. Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Soheil Hassas Yeganeh Reviewed-by: Priyaranjan Jha Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 22f40c96a15b..9574936fe041 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -85,7 +85,6 @@ struct tcp_sack_block { /*These are used to set the sack_ok field in struct tcp_options_received */ #define TCP_SACK_SEEN (1 << 0) /*1 = peer is SACK capable, */ -#define TCP_FACK_ENABLED (1 << 1) /*1 = FACK is enabled locally*/ #define TCP_DSACK_SEEN (1 << 2) /*1 = DSACK was received from peer*/ struct tcp_options_received { -- cgit v1.2.3 From 737ff314563ca27f044f9a3a041e9d42491ef7ce Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Wed, 8 Nov 2017 13:01:27 -0800 Subject: tcp: use sequence distance to detect reordering Replace the reordering distance measurement in packet unit with sequence based approach. Previously it trackes the number of "packets" toward the forward ACK (i.e. highest sacked sequence)in a state variable "fackets_out". Precisely measuring reordering degree on packet distance has not much benefit, as the degree constantly changes by factors like path, load, and congestion window. It is also complicated and prone to arcane bugs. This patch replaces with sequence-based approach that's much simpler. Signed-off-by: Yuchung Cheng Reviewed-by: Eric Dumazet Reviewed-by: Neal Cardwell Reviewed-by: Soheil Hassas Yeganeh Reviewed-by: Priyaranjan Jha Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 9574936fe041..df5d97a85e1a 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -293,7 +293,6 @@ struct tcp_sock { u32 pushed_seq; /* Last pushed seq, required to talk to windows */ u32 lost_out; /* Lost packets */ u32 sacked_out; /* SACK'd packets */ - u32 fackets_out; /* FACK'd packets */ struct hrtimer pacing_timer; -- cgit v1.2.3 From 39b175211053c7a6a4d794c42e225994f1c069c2 Mon Sep 17 00:00:00 2001 From: Mat Martineau Date: Fri, 10 Nov 2017 14:03:51 -0800 Subject: net: Remove unused skb_shared_info member ip6_frag_id was only used by UFO, which has been removed. ipv6_proxy_select_ident() only existed to set ip6_frag_id and has no in-tree callers. Signed-off-by: Mat Martineau Signed-off-by: David S. Miller --- include/linux/skbuff.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 57d712671081..54fe91183a8e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -500,7 +500,6 @@ struct skb_shared_info { struct skb_shared_hwtstamps hwtstamps; unsigned int gso_type; u32 tskey; - __be32 ip6_frag_id; /* * Warning : all fields before dataref are cleared in __alloc_skb() -- cgit v1.2.3 From b24591e2fcf852ad7ad2ccf745c8220bf378d312 Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 9 Nov 2017 12:35:07 +0000 Subject: timers: Add a function to start/reduce a timer Add a function, similar to mod_timer(), that will start a timer if it isn't running and will modify it if it is running and has an expiry time longer than the new time. If the timer is running with an expiry time that's the same or sooner, no change is made. The function looks like: int timer_reduce(struct timer_list *timer, unsigned long expires); This can be used by code such as networking code to make it easier to share a timer for multiple timeouts. For instance, in upcoming AF_RXRPC code, the rxrpc_call struct will maintain a number of timeouts: unsigned long ack_at; unsigned long resend_at; unsigned long ping_at; unsigned long expect_rx_by; unsigned long expect_req_by; unsigned long expect_term_by; each of which is set independently of the others. With timer reduction available, when the code needs to set one of the timeouts, it only needs to look at that timeout and then call timer_reduce() to modify the timer, starting it or bringing it forward if necessary. There is no need to refer to the other timeouts to see which is earliest and no need to take any lock other than, potentially, the timer lock inside timer_reduce(). Note, that this does not protect against concurrent invocations of any of the timer functions. As an example, the expect_rx_by timeout above, which terminates a call if we don't get a packet from the server within a certain time window, would be set something like this: unsigned long now = jiffies; unsigned long expect_rx_by = now + packet_receive_timeout; WRITE_ONCE(call->expect_rx_by, expect_rx_by); timer_reduce(&call->timer, expect_rx_by); The timer service code (which might, say, be in a work function) would then check all the timeouts to see which, if any, had triggered, deal with those: t = READ_ONCE(call->ack_at); if (time_after_eq(now, t)) { cmpxchg(&call->ack_at, t, now + MAX_JIFFY_OFFSET); set_bit(RXRPC_CALL_EV_ACK, &call->events); } and then restart the timer if necessary by finding the soonest timeout that hasn't yet passed and then calling timer_reduce(). The disadvantage of doing things this way rather than comparing the timers each time and calling mod_timer() is that you *will* take timer events unless you can finish what you're doing and delete the timer in time. The advantage of doing things this way is that you don't need to use a lock to work out when the next timer should be set, other than the timer's own lock - which you might not have to take. [ tglx: Fixed weird formatting and adopted it to pending changes ] Signed-off-by: David Howells Signed-off-by: Thomas Gleixner Cc: keyrings@vger.kernel.org Cc: linux-afs@lists.infradead.org Link: https://lkml.kernel.org/r/151023090769.23050.1801643667223880753.stgit@warthog.procyon.org.uk --- include/linux/timer.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 9f8895decb82..37b5e2f74d21 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -203,6 +203,7 @@ extern void add_timer_on(struct timer_list *timer, int cpu); extern int del_timer(struct timer_list * timer); extern int mod_timer(struct timer_list *timer, unsigned long expires); extern int mod_timer_pending(struct timer_list *timer, unsigned long expires); +extern int timer_reduce(struct timer_list *timer, unsigned long expires); /* * The jiffies value which is added to now, when there is no timer -- cgit v1.2.3 From 516fb7f2e73dcc303fb97fc3593209fcacf2d982 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 12 Nov 2017 18:44:23 -0800 Subject: /proc/module: use the same logic as /proc/kallsyms for address exposure The (alleged) users of the module addresses are the same: kernel profiling. So just expose the same helper and format macros, and unify the logic. Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 11dd93e42580..0a777c5216b1 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -14,6 +14,14 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) +/* How and when do we show kallsyms values? */ +extern int kallsyms_show_value(void); +#ifndef CONFIG_64BIT +# define KALLSYM_FMT "%08lx" +#else +# define KALLSYM_FMT "%016lx" +#endif + struct module; #ifdef CONFIG_KALLSYMS -- cgit v1.2.3 From fcdfafcb73be8fa45909327bbddca46fb362a675 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 13 Nov 2017 07:59:10 +0100 Subject: kprobes: Don't spam the build log with deprecation warnings The jprobes APIs are deprecated - but are still in occasional use for code that few people seem to care about, so stop generating deprecation warnings. Cc: Masami Hiramatsu Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/kprobes.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kprobes.h b/include/linux/kprobes.h index 56b2e698dbad..9440a2fc8893 100644 --- a/include/linux/kprobes.h +++ b/include/linux/kprobes.h @@ -468,18 +468,18 @@ static inline int enable_kprobe(struct kprobe *kp) return -ENOSYS; } #endif /* CONFIG_KPROBES */ -static inline int __deprecated register_jprobe(struct jprobe *p) +static inline int register_jprobe(struct jprobe *p) { return -ENOSYS; } -static inline int __deprecated register_jprobes(struct jprobe **jps, int num) +static inline int register_jprobes(struct jprobe **jps, int num) { return -ENOSYS; } -static inline void __deprecated unregister_jprobe(struct jprobe *p) +static inline void unregister_jprobe(struct jprobe *p) { } -static inline void __deprecated unregister_jprobes(struct jprobe **jps, int num) +static inline void unregister_jprobes(struct jprobe **jps, int num) { } static inline int disable_kretprobe(struct kretprobe *rp) @@ -490,11 +490,11 @@ static inline int enable_kretprobe(struct kretprobe *rp) { return enable_kprobe(&rp->kp); } -static inline int __deprecated disable_jprobe(struct jprobe *jp) +static inline int disable_jprobe(struct jprobe *jp) { return -ENOSYS; } -static inline int __deprecated enable_jprobe(struct jprobe *jp) +static inline int enable_jprobe(struct jprobe *jp) { return -ENOSYS; } -- cgit v1.2.3 From f4c09f87adfe31587aa4b2aea2cb2dbde2150f54 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 13 Nov 2017 09:39:01 +0100 Subject: cpu/hotplug: Get rid of CPU hotplug notifier leftovers The CPU hotplug notifiers are history. Remove the last reminders. Reported-by: Ingo Molnar Signed-off-by: Thomas Gleixner --- include/linux/cpu.h | 27 ++++++++++----------------- 1 file changed, 10 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index cd4771b772c0..b6e4a598b2cd 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -55,24 +55,17 @@ extern void unregister_cpu(struct cpu *cpu); extern ssize_t arch_cpu_probe(const char *, size_t); extern ssize_t arch_cpu_release(const char *, size_t); #endif -struct notifier_block; - -#define CPU_ONLINE 0x0002 /* CPU (unsigned)v is up */ -#define CPU_UP_PREPARE 0x0003 /* CPU (unsigned)v coming up */ -#define CPU_DEAD 0x0007 /* CPU (unsigned)v dead */ -#define CPU_POST_DEAD 0x0009 /* CPU (unsigned)v dead, cpu_hotplug - * lock is dropped */ -#define CPU_BROKEN 0x000B /* CPU (unsigned)v did not die properly, - * perhaps due to preemption. */ - -/* Used for CPU hotplug events occurring while tasks are frozen due to a suspend - * operation in progress - */ -#define CPU_TASKS_FROZEN 0x0010 -#define CPU_ONLINE_FROZEN (CPU_ONLINE | CPU_TASKS_FROZEN) -#define CPU_UP_PREPARE_FROZEN (CPU_UP_PREPARE | CPU_TASKS_FROZEN) -#define CPU_DEAD_FROZEN (CPU_DEAD | CPU_TASKS_FROZEN) +/* + * These states are not related to the core CPU hotplug mechanism. They are + * used by various (sub)architectures to track internal state + */ +#define CPU_ONLINE 0x0002 /* CPU is up */ +#define CPU_UP_PREPARE 0x0003 /* CPU coming up */ +#define CPU_DEAD 0x0007 /* CPU dead */ +#define CPU_DEAD_FROZEN 0x0008 /* CPU timed out on unplug */ +#define CPU_POST_DEAD 0x0009 /* CPU successfully unplugged */ +#define CPU_BROKEN 0x000B /* CPU did not die properly */ #ifdef CONFIG_SMP extern bool cpuhp_tasks_frozen; -- cgit v1.2.3 From 5e4def20381678ba3ce0a4e117f97e378ecd81bc Mon Sep 17 00:00:00 2001 From: David Howells Date: Thu, 2 Nov 2017 15:27:44 +0000 Subject: Pass mode to wait_on_atomic_t() action funcs and provide default actions Make wait_on_atomic_t() pass the TASK_* mode onto its action function as an extra argument and make it 'unsigned int throughout. Also, consolidate a bunch of identical action functions into a default function that can do the appropriate thing for the mode. Also, change the argument name in the bit_wait*() function declarations to reflect the fact that it's the mode and not the bit number. [Peter Z gives this a grudging ACK, but thinks that the whole atomic_t wait should be done differently, though he's not immediately sure as to how] Signed-off-by: David Howells Acked-by: Peter Zijlstra cc: Ingo Molnar --- include/linux/wait_bit.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait_bit.h b/include/linux/wait_bit.h index af0d495430d7..61b39eaf7cad 100644 --- a/include/linux/wait_bit.h +++ b/include/linux/wait_bit.h @@ -26,6 +26,8 @@ struct wait_bit_queue_entry { { .flags = p, .bit_nr = WAIT_ATOMIC_T_BIT_NR, } typedef int wait_bit_action_f(struct wait_bit_key *key, int mode); +typedef int wait_atomic_t_action_f(atomic_t *counter, unsigned int mode); + void __wake_up_bit(struct wait_queue_head *wq_head, void *word, int bit); int __wait_on_bit(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); int __wait_on_bit_lock(struct wait_queue_head *wq_head, struct wait_bit_queue_entry *wbq_entry, wait_bit_action_f *action, unsigned int mode); @@ -34,7 +36,7 @@ void wake_up_atomic_t(atomic_t *p); int out_of_line_wait_on_bit(void *word, int, wait_bit_action_f *action, unsigned int mode); int out_of_line_wait_on_bit_timeout(void *word, int, wait_bit_action_f *action, unsigned int mode, unsigned long timeout); int out_of_line_wait_on_bit_lock(void *word, int, wait_bit_action_f *action, unsigned int mode); -int out_of_line_wait_on_atomic_t(atomic_t *p, int (*)(atomic_t *), unsigned int mode); +int out_of_line_wait_on_atomic_t(atomic_t *p, wait_atomic_t_action_f action, unsigned int mode); struct wait_queue_head *bit_waitqueue(void *word, int bit); extern void __init wait_bit_init(void); @@ -51,10 +53,11 @@ int wake_bit_function(struct wait_queue_entry *wq_entry, unsigned mode, int sync }, \ } -extern int bit_wait(struct wait_bit_key *key, int bit); -extern int bit_wait_io(struct wait_bit_key *key, int bit); -extern int bit_wait_timeout(struct wait_bit_key *key, int bit); -extern int bit_wait_io_timeout(struct wait_bit_key *key, int bit); +extern int bit_wait(struct wait_bit_key *key, int mode); +extern int bit_wait_io(struct wait_bit_key *key, int mode); +extern int bit_wait_timeout(struct wait_bit_key *key, int mode); +extern int bit_wait_io_timeout(struct wait_bit_key *key, int mode); +extern int atomic_t_wait(atomic_t *counter, unsigned int mode); /** * wait_on_bit - wait for a bit to be cleared @@ -251,7 +254,7 @@ wait_on_bit_lock_action(unsigned long *word, int bit, wait_bit_action_f *action, * outside of the target 'word'. */ static inline -int wait_on_atomic_t(atomic_t *val, int (*action)(atomic_t *), unsigned mode) +int wait_on_atomic_t(atomic_t *val, wait_atomic_t_action_f action, unsigned mode) { might_sleep(); if (atomic_read(val) == 0) -- cgit v1.2.3 From 47f9d0bf526058b3fdc077698fcb19748d5700e4 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 13 Nov 2017 16:21:33 +0000 Subject: irqchip/gic-v4: Add forward definition of struct irq_domain_ops In some randconfig scenarios, including arm-gic-v4.h results in a spurious wawrning about the $SUBJECT structure not being defined. Adding a forward definition keeps it quiet. Reported-by: Arnd Bergmann Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v4.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v4.h b/include/linux/irqchip/arm-gic-v4.h index 447da8ca2156..fa683ea5c769 100644 --- a/include/linux/irqchip/arm-gic-v4.h +++ b/include/linux/irqchip/arm-gic-v4.h @@ -109,6 +109,7 @@ int its_get_vlpi(int irq, struct its_vlpi_map *map); int its_unmap_vlpi(int irq); int its_prop_update_vlpi(int irq, u8 config, bool inv); +struct irq_domain_ops; int its_init_v4(struct irq_domain *domain, const struct irq_domain_ops *ops); #endif -- cgit v1.2.3 From 8a7a8e1eab929eb3a5b735a788a23b9731139046 Mon Sep 17 00:00:00 2001 From: Dou Liyang Date: Mon, 13 Nov 2017 13:49:04 +0800 Subject: timekeeping: Eliminate the stale declaration of ktime_get_raw_and_real_ts64() Commit ba26621e63ce got rid of ktime_get_raw_and_real_ts64(), but left its declaration behind. Remove it. Fixes: ba26621e63ce ("time: Remove duplicated code in ktime_get_raw_and_real()") Signed-off-by: Dou Liyang Signed-off-by: Thomas Gleixner Cc: Christopher S. Hall Cc: joelaf@google.com Cc: arnd@arndb.de Cc: gregkh@linuxfoundation.org Cc: john.stultz@linaro.org Cc: deepa.kernel@gmail.com Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/1510552144-20831-1-git-send-email-douly.fnst@cn.fujitsu.com --- include/linux/timekeeping.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index 0021575fe871..51293e1aa4da 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -272,12 +272,6 @@ extern bool timekeeping_rtc_skipresume(void); extern void timekeeping_inject_sleeptime64(struct timespec64 *delta); -/* - * PPS accessor - */ -extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, - struct timespec64 *ts_real); - /* * struct system_time_snapshot - simultaneous raw/real time capture with * counter value -- cgit v1.2.3 From 01c313dded34a16ef69e3972ceca687ba8a7cdf2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Nov 2017 17:50:59 +0100 Subject: kallsyms: fix building without printk Building kallsyms fails without CONFIG_PRINTK due to a missing declaration: kernel/kallsyms.c: In function 'kallsyms_show_value': kernel/kallsyms.c:670:10: error: 'kptr_restrict' undeclared (first use in this function); did you mean 'keyring_restrict'? This moves the declaration outside of the #ifdef guard, the definition is already available without CONFIG_PRINTK. Fixes: c0f3ea158939 ("stop using '%pK' for /proc/kallsyms pointer values") Signed-off-by: Arnd Bergmann [ I clearly need to start doing "allnoconfig" builds too, or just have a test branch for the 0day robot - Linus ] Signed-off-by: Linus Torvalds --- include/linux/printk.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/printk.h b/include/linux/printk.h index 335926039adc..905bba92f015 100644 --- a/include/linux/printk.h +++ b/include/linux/printk.h @@ -189,7 +189,6 @@ extern bool printk_timed_ratelimit(unsigned long *caller_jiffies, extern int printk_delay_msec; extern int dmesg_restrict; -extern int kptr_restrict; extern int devkmsg_sysctl_set_loglvl(struct ctl_table *table, int write, void __user *buf, @@ -280,6 +279,8 @@ static inline void printk_safe_flush_on_panic(void) } #endif +extern int kptr_restrict; + extern asmlinkage void dump_stack(void) __cold; #ifndef pr_fmt -- cgit v1.2.3 From e4a8ca3baa5557fa54557d42b5910ed0d3316922 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 13 Nov 2017 17:51:00 +0100 Subject: /proc/module: fix building without kallsyms As reported by kernelci and other build bots, we now get a link failure without CONFIG_KALLSYMS: module.c:(.text+0xf2c): undefined reference to `kallsyms_show_value' This adds a dummy helper with the same name that can be used for compilation. It's not entirely clear to me what this should return for !CONFIG_KALLSYMS, I picked an unconditional 'false', which leads to the module address being unavailable to user space. Link: https://kernelci.org/build/mainline/branch/master/kernel/v4.14-5-g516fb7f2e73d/ Fixes: 516fb7f2e73d ("/proc/module: use the same logic as /proc/kallsyms for address exposure") Signed-off-by: Arnd Bergmann Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 0a777c5216b1..708f337d780b 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -14,8 +14,6 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) -/* How and when do we show kallsyms values? */ -extern int kallsyms_show_value(void); #ifndef CONFIG_64BIT # define KALLSYM_FMT "%08lx" #else @@ -54,6 +52,9 @@ extern void __print_symbol(const char *fmt, unsigned long address); int lookup_symbol_name(unsigned long addr, char *symname); int lookup_symbol_attrs(unsigned long addr, unsigned long *size, unsigned long *offset, char *modname, char *name); +/* How and when do we show kallsyms values? */ +extern int kallsyms_show_value(void); + #else /* !CONFIG_KALLSYMS */ static inline unsigned long kallsyms_lookup_name(const char *name) @@ -112,6 +113,11 @@ static inline int lookup_symbol_attrs(unsigned long addr, unsigned long *size, u return -ERANGE; } +static inline int kallsyms_show_value(void) +{ + return false; +} + /* Stupid that this does nothing, but I didn't create this mess. */ #define __print_symbol(fmt, addr) #endif /*CONFIG_KALLSYMS*/ -- cgit v1.2.3 From 9a5941080ef29f1a0347ac2766e4d93312123b21 Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Tue, 7 Nov 2017 22:29:48 +0100 Subject: mtd: remove the get_unmapped_area method It is now unused. Signed-off-by: Nicolas Pitre Reviewed-by: Richard Weinberger Acked-by: Boris Brezillon Tested-by: Chris Brandt Signed-off-by: Richard Weinberger --- include/linux/mtd/mtd.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 849543f1f233..cd55bf14ad51 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -297,10 +297,6 @@ struct mtd_info { int (*_point) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, void **virt, resource_size_t *phys); int (*_unpoint) (struct mtd_info *mtd, loff_t from, size_t len); - unsigned long (*_get_unmapped_area) (struct mtd_info *mtd, - unsigned long len, - unsigned long offset, - unsigned long flags); int (*_read) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); int (*_write) (struct mtd_info *mtd, loff_t to, size_t len, -- cgit v1.2.3 From 34d9a270e74a412f041b528c33b75e3e6bc7a242 Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:14 +0200 Subject: IB/mlx4: Exposing modify CQ callback to uverbs layer Exposed mlx4_ib_modify_cq to be called from ib device verb list. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx4/cq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/cq.h b/include/linux/mlx4/cq.h index 09cebe528488..508e8cc5ee86 100644 --- a/include/linux/mlx4/cq.h +++ b/include/linux/mlx4/cq.h @@ -136,6 +136,9 @@ enum { MLX4_CQE_BAD_FCS = 1 << 4, }; +#define MLX4_MAX_CQ_PERIOD (BIT(16) - 1) +#define MLX4_MAX_CQ_COUNT (BIT(16) - 1) + static inline void mlx4_cq_arm(struct mlx4_cq *cq, u32 cmd, void __iomem *uar_page, spinlock_t *doorbell_lock) -- cgit v1.2.3 From b0e9df6da25890448ebd134b7f647f16bced9abc Mon Sep 17 00:00:00 2001 From: Yonatan Cohen Date: Mon, 13 Nov 2017 10:51:15 +0200 Subject: IB/mlx5: Exposing modify CQ callback to uverbs layer Exposed mlx5_ib_modify_cq to be called from ib device verb list. Signed-off-by: Yonatan Cohen Reviewed-by: Majd Dibbiny Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/cq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/cq.h b/include/linux/mlx5/cq.h index cc718e245b1e..6be357b219ec 100644 --- a/include/linux/mlx5/cq.h +++ b/include/linux/mlx5/cq.h @@ -128,6 +128,9 @@ enum { CQE_SIZE_128_PAD = 2, }; +#define MLX5_MAX_CQ_PERIOD (BIT(__mlx5_bit_sz(cqc, cq_period)) - 1) +#define MLX5_MAX_CQ_COUNT (BIT(__mlx5_bit_sz(cqc, cq_max_count)) - 1) + static inline int cqe_sz_to_mlx_sz(u8 size, int padding_128_en) { return padding_128_en ? CQE_SIZE_128_PAD : -- cgit v1.2.3 From aea3706cfc4d952ed6d32b6d5845b5ecd99ed7f5 Mon Sep 17 00:00:00 2001 From: Miroslav Lichvar Date: Mon, 13 Nov 2017 14:51:31 -0800 Subject: timekeeping: Remove CONFIG_GENERIC_TIME_VSYSCALL_OLD As of d4d1fc61eb38f (ia64: Update fsyscall gettime to use modern vsyscall_update)the last user of CONFIG_GENERIC_TIME_VSYSCALL_OLD have been updated, the legacy support for old-style vsyscall implementations can be removed from the timekeeping code. (Thanks again to Tony Luck for helping remove the last user!) [jstultz: Commit message rework] Signed-off-by: Miroslav Lichvar Signed-off-by: John Stultz Signed-off-by: Thomas Gleixner Cc: Prarit Bhargava Cc: Tony Luck Cc: Richard Cochran Cc: Stephen Boyd Link: https://lkml.kernel.org/r/1510613491-16695-1-git-send-email-john.stultz@linaro.org --- include/linux/timekeeper_internal.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timekeeper_internal.h b/include/linux/timekeeper_internal.h index 7e9011101cb0..d315c3d6725c 100644 --- a/include/linux/timekeeper_internal.h +++ b/include/linux/timekeeper_internal.h @@ -136,13 +136,6 @@ struct timekeeper { extern void update_vsyscall(struct timekeeper *tk); extern void update_vsyscall_tz(void); -#elif defined(CONFIG_GENERIC_TIME_VSYSCALL_OLD) - -extern void update_vsyscall_old(struct timespec *ts, struct timespec *wtm, - struct clocksource *c, u32 mult, - u64 cycle_last); -extern void update_vsyscall_tz(void); - #else static inline void update_vsyscall(struct timekeeper *tk) -- cgit v1.2.3 From 096d1dd0f03211fb42d6c2457f248827604b7f0e Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 13 Nov 2017 16:19:46 +0100 Subject: netlink: remove unused NETLINK SKB flags These flags are unused, remove them to be less confusing. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netlink.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 6ddb4a5da371..49b4257ce1ea 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -17,9 +17,6 @@ static inline struct nlmsghdr *nlmsg_hdr(const struct sk_buff *skb) } enum netlink_skb_flags { - NETLINK_SKB_MMAPED = 0x1, /* Packet data is mmaped */ - NETLINK_SKB_TX = 0x2, /* Packet was sent by userspace */ - NETLINK_SKB_DELIVERED = 0x4, /* Packet was delivered */ NETLINK_SKB_DST = 0x8, /* Dst set in sendto or sendmsg */ }; -- cgit v1.2.3 From c7cdff0e864713a089d7cb3a2b1136ba9a54881a Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Fri, 13 Oct 2017 16:11:48 +0300 Subject: virtio_balloon: fix deadlock on OOM fill_balloon doing memory allocations under balloon_lock can cause a deadlock when leak_balloon is called from virtballoon_oom_notify and tries to take same lock. To fix, split page allocation and enqueue and do allocations outside the lock. Here's a detailed analysis of the deadlock by Tetsuo Handa: In leak_balloon(), mutex_lock(&vb->balloon_lock) is called in order to serialize against fill_balloon(). But in fill_balloon(), alloc_page(GFP_HIGHUSER[_MOVABLE] | __GFP_NOMEMALLOC | __GFP_NORETRY) is called with vb->balloon_lock mutex held. Since GFP_HIGHUSER[_MOVABLE] implies __GFP_DIRECT_RECLAIM | __GFP_IO | __GFP_FS, despite __GFP_NORETRY is specified, this allocation attempt might indirectly depend on somebody else's __GFP_DIRECT_RECLAIM memory allocation. And such indirect __GFP_DIRECT_RECLAIM memory allocation might call leak_balloon() via virtballoon_oom_notify() via blocking_notifier_call_chain() callback via out_of_memory() when it reached __alloc_pages_may_oom() and held oom_lock mutex. Since vb->balloon_lock mutex is already held by fill_balloon(), it will cause OOM lockup. Thread1 Thread2 fill_balloon() takes a balloon_lock balloon_page_enqueue() alloc_page(GFP_HIGHUSER_MOVABLE) direct reclaim (__GFP_FS context) takes a fs lock waits for that fs lock alloc_page(GFP_NOFS) __alloc_pages_may_oom() takes the oom_lock out_of_memory() blocking_notifier_call_chain() leak_balloon() tries to take that balloon_lock and deadlocks Reported-by: Tetsuo Handa Cc: Michal Hocko Cc: Wei Wang Signed-off-by: Michael S. Tsirkin --- include/linux/balloon_compaction.h | 35 ++++++++++++++++++++++++++++++++++- 1 file changed, 34 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/balloon_compaction.h b/include/linux/balloon_compaction.h index fbbe6da40fed..53051f3d8f25 100644 --- a/include/linux/balloon_compaction.h +++ b/include/linux/balloon_compaction.h @@ -50,6 +50,7 @@ #include #include #include +#include /* * Balloon device information descriptor. @@ -67,7 +68,9 @@ struct balloon_dev_info { struct inode *inode; }; -extern struct page *balloon_page_enqueue(struct balloon_dev_info *b_dev_info); +extern struct page *balloon_page_alloc(void); +extern void balloon_page_enqueue(struct balloon_dev_info *b_dev_info, + struct page *page); extern struct page *balloon_page_dequeue(struct balloon_dev_info *b_dev_info); static inline void balloon_devinfo_init(struct balloon_dev_info *balloon) @@ -193,4 +196,34 @@ static inline gfp_t balloon_mapping_gfp_mask(void) } #endif /* CONFIG_BALLOON_COMPACTION */ + +/* + * balloon_page_push - insert a page into a page list. + * @head : pointer to list + * @page : page to be added + * + * Caller must ensure the page is private and protect the list. + */ +static inline void balloon_page_push(struct list_head *pages, struct page *page) +{ + list_add(&page->lru, pages); +} + +/* + * balloon_page_pop - remove a page from a page list. + * @head : pointer to list + * @page : page to be added + * + * Caller must ensure the page is private and protect the list. + */ +static inline struct page *balloon_page_pop(struct list_head *pages) +{ + struct page *page = list_first_entry_or_null(pages, struct page, lru); + + if (!page) + return NULL; + + list_del(&page->lru); + return page; +} #endif /* _LINUX_BALLOON_COMPACTION_H */ -- cgit v1.2.3 From 074d58989569b39f04294c90ef36dd82b8c2cc1a Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 15 Nov 2017 16:38:45 +0000 Subject: security: keys: Replace time_t/timespec with time64_t The 'struct key' will use 'time_t' which we try to remove in the kernel, since 'time_t' is not year 2038 safe on 32bit systems. Also the 'struct keyring_search_context' will use 'timespec' type to record current time, which is also not year 2038 safe on 32bit systems. Thus this patch replaces 'time_t' with 'time64_t' which is year 2038 safe for 'struct key', and replace 'timespec' with 'time64_t' for the 'struct keyring_search_context', since we only look at the the seconds part of 'timespec' variable. Moreover we also change the codes where using the 'time_t' and 'timespec', and we can get current time by ktime_get_real_seconds() instead of current_kernel_time(), and use 'TIME64_MAX' macro to initialize the 'time64_t' type variable. Especially in proc.c file, we have replaced 'unsigned long' and 'timespec' type with 'u64' and 'time64_t' type to save the timeout value, which means user will get one 'u64' type timeout value by issuing proc_keys_show() function. Signed-off-by: Baolin Wang Reviewed-by: Arnd Bergmann Signed-off-by: David Howells Reviewed-by: James Morris --- include/linux/key.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key.h b/include/linux/key.h index 8a15cabe928d..e58ee10f6e58 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -24,6 +24,7 @@ #include #include #include +#include #ifdef __KERNEL__ #include @@ -162,10 +163,10 @@ struct key { struct key_user *user; /* owner of this key */ void *security; /* security data for this key */ union { - time_t expiry; /* time at which key expires (or 0) */ - time_t revoked_at; /* time at which key was revoked */ + time64_t expiry; /* time at which key expires (or 0) */ + time64_t revoked_at; /* time at which key was revoked */ }; - time_t last_used_at; /* last time used for LRU keyring discard */ + time64_t last_used_at; /* last time used for LRU keyring discard */ kuid_t uid; kgid_t gid; key_perm_t perm; /* access permissions */ -- cgit v1.2.3 From 0a9dd0e0711e58aa8d19ae4446cb3fe2906a8514 Mon Sep 17 00:00:00 2001 From: Baolin Wang Date: Wed, 15 Nov 2017 16:38:45 +0000 Subject: security: keys: Replace time_t with time64_t for struct key_preparsed_payload The 'struct key_preparsed_payload' will use 'time_t' which we will try to remove in the kernel, since 'time_t' is not year 2038 safe on 32bits systems. Thus this patch replaces 'time_t' with 'time64_t' which is year 2038 safe on 32 bits system for 'struct key_preparsed_payload', moreover we should use the 'TIME64_MAX' macro to initialize the 'time64_t' type variable. Signed-off-by: Baolin Wang Reviewed-by: Arnd Bergmann Signed-off-by: David Howells Reviewed-by: James Morris --- include/linux/key-type.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/key-type.h b/include/linux/key-type.h index 9520fc3c3b9a..05d8fb5a06c4 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -44,7 +44,7 @@ struct key_preparsed_payload { const void *data; /* Raw data */ size_t datalen; /* Raw datalen */ size_t quotalen; /* Quota length for proposed payload */ - time_t expiry; /* Expiry time of key */ + time64_t expiry; /* Expiry time of key */ } __randomize_layout; typedef int (*request_key_actor_t)(struct key_construction *key, -- cgit v1.2.3 From 7d5905dc14a87805a59f3c5bf70173aac2bb18f8 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 15 Nov 2017 02:13:40 +0100 Subject: x86 / CPU: Always show current CPU frequency in /proc/cpuinfo After commit 890da9cf0983 (Revert "x86: do not use cpufreq_quick_get() for /proc/cpuinfo "cpu MHz"") the "cpu MHz" number in /proc/cpuinfo on x86 can be either the nominal CPU frequency (which is constant) or the frequency most recently requested by a scaling governor in cpufreq, depending on the cpufreq configuration. That is somewhat inconsistent and is different from what it was before 4.13, so in order to restore the previous behavior, make it report the current CPU frequency like the scaling_cur_freq sysfs file in cpufreq. To that end, modify the /proc/cpuinfo implementation on x86 to use aperfmperf_snapshot_khz() to snapshot the APERF and MPERF feedback registers, if available, and use their values to compute the CPU frequency to be reported as "cpu MHz". However, do that carefully enough to avoid accumulating delays that lead to unacceptable access times for /proc/cpuinfo on systems with many CPUs. Run aperfmperf_snapshot_khz() once on all CPUs asynchronously at the /proc/cpuinfo open time, add a single delay upfront (if necessary) at that point and simply compute the current frequency while running show_cpuinfo() for each individual CPU. Also, to avoid slowing down /proc/cpuinfo accesses too much, reduce the default delay between consecutive APERF and MPERF reads to 10 ms, which should be sufficient to get large enough numbers for the frequency computation in all cases. Fixes: 890da9cf0983 (Revert "x86: do not use cpufreq_quick_get() for /proc/cpuinfo "cpu MHz"") Signed-off-by: Rafael J. Wysocki Acked-by: Thomas Gleixner Tested-by: Thomas Gleixner Acked-by: Ingo Molnar --- include/linux/cpufreq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 28734ee185a7..065f3a8eb486 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -917,6 +917,7 @@ static inline bool policy_has_boost_freq(struct cpufreq_policy *policy) } #endif +extern void arch_freq_prepare_all(void); extern unsigned int arch_freq_get_on_cpu(int cpu); extern void arch_set_freq_scale(struct cpumask *cpus, unsigned long cur_freq, -- cgit v1.2.3 From d50112edde1d0c621520e53747044009f11c656b Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 15 Nov 2017 17:32:18 -0800 Subject: slab, slub, slob: add slab_flags_t Add sparse-checked slab_flags_t for struct kmem_cache::flags (SLAB_POISON, etc). SLAB is bloated temporarily by switching to "unsigned long", but only temporarily. Link: http://lkml.kernel.org/r/20171021100225.GA22428@avx2 Signed-off-by: Alexey Dobriyan Acked-by: Pekka Enberg Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kasan.h | 4 ++-- include/linux/kmemleak.h | 8 +++---- include/linux/slab.h | 60 +++++++++++++++++++++++++++++------------------- include/linux/slab_def.h | 2 +- include/linux/slub_def.h | 2 +- include/linux/types.h | 1 + 6 files changed, 46 insertions(+), 31 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kasan.h b/include/linux/kasan.h index 5017269e3f04..e3eb834c9a35 100644 --- a/include/linux/kasan.h +++ b/include/linux/kasan.h @@ -46,7 +46,7 @@ void kasan_alloc_pages(struct page *page, unsigned int order); void kasan_free_pages(struct page *page, unsigned int order); void kasan_cache_create(struct kmem_cache *cache, size_t *size, - unsigned long *flags); + slab_flags_t *flags); void kasan_cache_shrink(struct kmem_cache *cache); void kasan_cache_shutdown(struct kmem_cache *cache); @@ -95,7 +95,7 @@ static inline void kasan_free_pages(struct page *page, unsigned int order) {} static inline void kasan_cache_create(struct kmem_cache *cache, size_t *size, - unsigned long *flags) {} + slab_flags_t *flags) {} static inline void kasan_cache_shrink(struct kmem_cache *cache) {} static inline void kasan_cache_shutdown(struct kmem_cache *cache) {} diff --git a/include/linux/kmemleak.h b/include/linux/kmemleak.h index 590343f6c1b1..5ac416e2d339 100644 --- a/include/linux/kmemleak.h +++ b/include/linux/kmemleak.h @@ -48,14 +48,14 @@ extern void kmemleak_not_leak_phys(phys_addr_t phys) __ref; extern void kmemleak_ignore_phys(phys_addr_t phys) __ref; static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, - int min_count, unsigned long flags, + int min_count, slab_flags_t flags, gfp_t gfp) { if (!(flags & SLAB_NOLEAKTRACE)) kmemleak_alloc(ptr, size, min_count, gfp); } -static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) +static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags) { if (!(flags & SLAB_NOLEAKTRACE)) kmemleak_free(ptr); @@ -76,7 +76,7 @@ static inline void kmemleak_alloc(const void *ptr, size_t size, int min_count, { } static inline void kmemleak_alloc_recursive(const void *ptr, size_t size, - int min_count, unsigned long flags, + int min_count, slab_flags_t flags, gfp_t gfp) { } @@ -94,7 +94,7 @@ static inline void kmemleak_free(const void *ptr) static inline void kmemleak_free_part(const void *ptr, size_t size) { } -static inline void kmemleak_free_recursive(const void *ptr, unsigned long flags) +static inline void kmemleak_free_recursive(const void *ptr, slab_flags_t flags) { } static inline void kmemleak_free_percpu(const void __percpu *ptr) diff --git a/include/linux/slab.h b/include/linux/slab.h index af5aa65c7c18..0c4c579f52ed 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -21,13 +21,20 @@ * Flags to pass to kmem_cache_create(). * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. */ -#define SLAB_CONSISTENCY_CHECKS 0x00000100UL /* DEBUG: Perform (expensive) checks on alloc/free */ -#define SLAB_RED_ZONE 0x00000400UL /* DEBUG: Red zone objs in a cache */ -#define SLAB_POISON 0x00000800UL /* DEBUG: Poison objects */ -#define SLAB_HWCACHE_ALIGN 0x00002000UL /* Align objs on cache lines */ -#define SLAB_CACHE_DMA 0x00004000UL /* Use GFP_DMA memory */ -#define SLAB_STORE_USER 0x00010000UL /* DEBUG: Store the last owner for bug hunting */ -#define SLAB_PANIC 0x00040000UL /* Panic if kmem_cache_create() fails */ +/* DEBUG: Perform (expensive) checks on alloc/free */ +#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100UL) +/* DEBUG: Red zone objs in a cache */ +#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400UL) +/* DEBUG: Poison objects */ +#define SLAB_POISON ((slab_flags_t __force)0x00000800UL) +/* Align objs on cache lines */ +#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000UL) +/* Use GFP_DMA memory */ +#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000UL) +/* DEBUG: Store the last owner for bug hunting */ +#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000UL) +/* Panic if kmem_cache_create() fails */ +#define SLAB_PANIC ((slab_flags_t __force)0x00040000UL) /* * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * @@ -65,44 +72,51 @@ * * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ -#define SLAB_TYPESAFE_BY_RCU 0x00080000UL /* Defer freeing slabs to RCU */ -#define SLAB_MEM_SPREAD 0x00100000UL /* Spread some memory over cpuset */ -#define SLAB_TRACE 0x00200000UL /* Trace allocations and frees */ +/* Defer freeing slabs to RCU */ +#define SLAB_TYPESAFE_BY_RCU ((slab_flags_t __force)0x00080000UL) +/* Spread some memory over cpuset */ +#define SLAB_MEM_SPREAD ((slab_flags_t __force)0x00100000UL) +/* Trace allocations and frees */ +#define SLAB_TRACE ((slab_flags_t __force)0x00200000UL) /* Flag to prevent checks on free */ #ifdef CONFIG_DEBUG_OBJECTS -# define SLAB_DEBUG_OBJECTS 0x00400000UL +# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00400000UL) #else -# define SLAB_DEBUG_OBJECTS 0x00000000UL +# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00000000UL) #endif -#define SLAB_NOLEAKTRACE 0x00800000UL /* Avoid kmemleak tracing */ +/* Avoid kmemleak tracing */ +#define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000UL) /* Don't track use of uninitialized memory */ #ifdef CONFIG_KMEMCHECK -# define SLAB_NOTRACK 0x01000000UL +# define SLAB_NOTRACK ((slab_flags_t __force)0x01000000UL) #else -# define SLAB_NOTRACK 0x00000000UL +# define SLAB_NOTRACK ((slab_flags_t __force)0x00000000UL) #endif +/* Fault injection mark */ #ifdef CONFIG_FAILSLAB -# define SLAB_FAILSLAB 0x02000000UL /* Fault injection mark */ +# define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000UL) #else -# define SLAB_FAILSLAB 0x00000000UL +# define SLAB_FAILSLAB ((slab_flags_t __force)0x00000000UL) #endif +/* Account to memcg */ #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) -# define SLAB_ACCOUNT 0x04000000UL /* Account to memcg */ +# define SLAB_ACCOUNT ((slab_flags_t __force)0x04000000UL) #else -# define SLAB_ACCOUNT 0x00000000UL +# define SLAB_ACCOUNT ((slab_flags_t __force)0x00000000UL) #endif #ifdef CONFIG_KASAN -#define SLAB_KASAN 0x08000000UL +#define SLAB_KASAN ((slab_flags_t __force)0x08000000UL) #else -#define SLAB_KASAN 0x00000000UL +#define SLAB_KASAN ((slab_flags_t __force)0x00000000UL) #endif /* The following flags affect the page allocator grouping pages by mobility */ -#define SLAB_RECLAIM_ACCOUNT 0x00020000UL /* Objects are reclaimable */ +/* Objects are reclaimable */ +#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000UL) #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. @@ -128,7 +142,7 @@ void __init kmem_cache_init(void); bool slab_is_available(void); struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, - unsigned long, + slab_flags_t, void (*)(void *)); void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); diff --git a/include/linux/slab_def.h b/include/linux/slab_def.h index 8f7d2b1656d2..072e46e9e1d5 100644 --- a/include/linux/slab_def.h +++ b/include/linux/slab_def.h @@ -20,7 +20,7 @@ struct kmem_cache { struct reciprocal_value reciprocal_buffer_size; /* 2) touched by every alloc & free from the backend */ - unsigned int flags; /* constant flags */ + slab_flags_t flags; /* constant flags */ unsigned int num; /* # of objs per slab */ /* 3) cache_grow/shrink */ diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index 39fa09bcde23..0adae162dc8f 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -82,7 +82,7 @@ struct kmem_cache_order_objects { struct kmem_cache { struct kmem_cache_cpu __percpu *cpu_slab; /* Used for retriving partial slabs etc */ - unsigned long flags; + slab_flags_t flags; unsigned long min_partial; int size; /* The size of an object including meta data */ int object_size; /* The size of an object without meta data */ diff --git a/include/linux/types.h b/include/linux/types.h index 34fce54e4f1b..732b52c2eae4 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -156,6 +156,7 @@ typedef u32 dma_addr_t; #endif typedef unsigned __bitwise gfp_t; +typedef unsigned long __bitwise slab_flags_t; typedef unsigned __bitwise fmode_t; #ifdef CONFIG_PHYS_ADDR_T_64BIT -- cgit v1.2.3 From 4fd0b46e898791009b03b2fdd6510044fa8730a6 Mon Sep 17 00:00:00 2001 From: Alexey Dobriyan Date: Wed, 15 Nov 2017 17:32:21 -0800 Subject: slab, slub, slob: convert slab_flags_t to 32-bit struct kmem_cache::flags is "unsigned long" which is unnecessary on 64-bit as no flags are defined in the higher bits. Switch the field to 32-bit and save some space on x86_64 until such flags appear: add/remove: 0/0 grow/shrink: 0/107 up/down: 0/-657 (-657) function old new delta sysfs_slab_add 720 719 -1 ... check_object 699 676 -23 [akpm@linux-foundation.org: fix printk warning] Link: http://lkml.kernel.org/r/20171021100635.GA8287@avx2 Signed-off-by: Alexey Dobriyan Acked-by: Pekka Enberg Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 44 ++++++++++++++++++++++---------------------- include/linux/types.h | 2 +- 2 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 0c4c579f52ed..f37cb93768ab 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -22,19 +22,19 @@ * The ones marked DEBUG are only valid if CONFIG_DEBUG_SLAB is set. */ /* DEBUG: Perform (expensive) checks on alloc/free */ -#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100UL) +#define SLAB_CONSISTENCY_CHECKS ((slab_flags_t __force)0x00000100U) /* DEBUG: Red zone objs in a cache */ -#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400UL) +#define SLAB_RED_ZONE ((slab_flags_t __force)0x00000400U) /* DEBUG: Poison objects */ -#define SLAB_POISON ((slab_flags_t __force)0x00000800UL) +#define SLAB_POISON ((slab_flags_t __force)0x00000800U) /* Align objs on cache lines */ -#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000UL) +#define SLAB_HWCACHE_ALIGN ((slab_flags_t __force)0x00002000U) /* Use GFP_DMA memory */ -#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000UL) +#define SLAB_CACHE_DMA ((slab_flags_t __force)0x00004000U) /* DEBUG: Store the last owner for bug hunting */ -#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000UL) +#define SLAB_STORE_USER ((slab_flags_t __force)0x00010000U) /* Panic if kmem_cache_create() fails */ -#define SLAB_PANIC ((slab_flags_t __force)0x00040000UL) +#define SLAB_PANIC ((slab_flags_t __force)0x00040000U) /* * SLAB_TYPESAFE_BY_RCU - **WARNING** READ THIS! * @@ -73,50 +73,50 @@ * Note that SLAB_TYPESAFE_BY_RCU was originally named SLAB_DESTROY_BY_RCU. */ /* Defer freeing slabs to RCU */ -#define SLAB_TYPESAFE_BY_RCU ((slab_flags_t __force)0x00080000UL) +#define SLAB_TYPESAFE_BY_RCU ((slab_flags_t __force)0x00080000U) /* Spread some memory over cpuset */ -#define SLAB_MEM_SPREAD ((slab_flags_t __force)0x00100000UL) +#define SLAB_MEM_SPREAD ((slab_flags_t __force)0x00100000U) /* Trace allocations and frees */ -#define SLAB_TRACE ((slab_flags_t __force)0x00200000UL) +#define SLAB_TRACE ((slab_flags_t __force)0x00200000U) /* Flag to prevent checks on free */ #ifdef CONFIG_DEBUG_OBJECTS -# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00400000UL) +# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00400000U) #else -# define SLAB_DEBUG_OBJECTS ((slab_flags_t __force)0x00000000UL) +# define SLAB_DEBUG_OBJECTS 0 #endif /* Avoid kmemleak tracing */ -#define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000UL) +#define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000U) /* Don't track use of uninitialized memory */ #ifdef CONFIG_KMEMCHECK -# define SLAB_NOTRACK ((slab_flags_t __force)0x01000000UL) +# define SLAB_NOTRACK ((slab_flags_t __force)0x01000000U) #else -# define SLAB_NOTRACK ((slab_flags_t __force)0x00000000UL) +# define SLAB_NOTRACK 0 #endif /* Fault injection mark */ #ifdef CONFIG_FAILSLAB -# define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000UL) +# define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000U) #else -# define SLAB_FAILSLAB ((slab_flags_t __force)0x00000000UL) +# define SLAB_FAILSLAB 0 #endif /* Account to memcg */ #if defined(CONFIG_MEMCG) && !defined(CONFIG_SLOB) -# define SLAB_ACCOUNT ((slab_flags_t __force)0x04000000UL) +# define SLAB_ACCOUNT ((slab_flags_t __force)0x04000000U) #else -# define SLAB_ACCOUNT ((slab_flags_t __force)0x00000000UL) +# define SLAB_ACCOUNT 0 #endif #ifdef CONFIG_KASAN -#define SLAB_KASAN ((slab_flags_t __force)0x08000000UL) +#define SLAB_KASAN ((slab_flags_t __force)0x08000000U) #else -#define SLAB_KASAN ((slab_flags_t __force)0x00000000UL) +#define SLAB_KASAN 0 #endif /* The following flags affect the page allocator grouping pages by mobility */ /* Objects are reclaimable */ -#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000UL) +#define SLAB_RECLAIM_ACCOUNT ((slab_flags_t __force)0x00020000U) #define SLAB_TEMPORARY SLAB_RECLAIM_ACCOUNT /* Objects are short-lived */ /* * ZERO_SIZE_PTR will be returned for zero sized kmalloc requests. diff --git a/include/linux/types.h b/include/linux/types.h index 732b52c2eae4..c94d59ef96cc 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -156,7 +156,7 @@ typedef u32 dma_addr_t; #endif typedef unsigned __bitwise gfp_t; -typedef unsigned long __bitwise slab_flags_t; +typedef unsigned __bitwise slab_flags_t; typedef unsigned __bitwise fmode_t; #ifdef CONFIG_PHYS_ADDR_T_64BIT -- cgit v1.2.3 From 5799b255c491d853b73fb9e0e1760210315d06cd Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 15 Nov 2017 17:32:29 -0800 Subject: include/linux/slab.h: add kmalloc_array_node() and kcalloc_node() Patch series "Add kmalloc_array_node() and kcalloc_node()". Our current memeory allocation routines suffer form an API imbalance, for one we have kmalloc_array() and kcalloc() which check for overflows in size multiplication and we have kmalloc_node() and kzalloc_node() which allow for memory allocation on a certain NUMA node but don't check for eventual overflows. This patch (of 6): We have kmalloc_array() and kcalloc() wrappers on top of kmalloc() which ensure us overflow free multiplication for the size of a memory allocation but these implementations are not NUMA-aware. Likewise we have kmalloc_node() which is a NUMA-aware version of kmalloc() but the implementation is not aware of any possible overflows in eventual size calculations. Introduce a combination of the two above cases to have a NUMA-node aware version of kmalloc_array() and kcalloc(). Link: http://lkml.kernel.org/r/20170927082038.3782-2-jthumshirn@suse.de Signed-off-by: Johannes Thumshirn Acked-by: Vlastimil Babka Cc: Christoph Hellwig Cc: Christoph Lameter Cc: Damien Le Moal Cc: David Rientjes Cc: "David S. Miller" Cc: Doug Ledford Cc: Hal Rosenstock Cc: Jens Axboe Cc: Joonsoo Kim Cc: Mike Marciniszyn Cc: Pekka Enberg Cc: Santosh Shilimkar Cc: Sean Hefty Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index f37cb93768ab..ebddfca9e24b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -650,6 +650,22 @@ extern void *__kmalloc_track_caller(size_t, gfp_t, unsigned long); #define kmalloc_track_caller(size, flags) \ __kmalloc_track_caller(size, flags, _RET_IP_) +static inline void *kmalloc_array_node(size_t n, size_t size, gfp_t flags, + int node) +{ + if (size != 0 && n > SIZE_MAX / size) + return NULL; + if (__builtin_constant_p(n) && __builtin_constant_p(size)) + return kmalloc_node(n * size, flags, node); + return __kmalloc_node(n * size, flags, node); +} + +static inline void *kcalloc_node(size_t n, size_t size, gfp_t flags, int node) +{ + return kmalloc_array_node(n, size, flags | __GFP_ZERO, node); +} + + #ifdef CONFIG_NUMA extern void *__kmalloc_node_track_caller(size_t, gfp_t, int, unsigned long); #define kmalloc_node_track_caller(size, flags, node) \ -- cgit v1.2.3 From 41710443f790b5f7f5305eba99dacce88e259f4c Mon Sep 17 00:00:00 2001 From: Changbin Du Date: Wed, 15 Nov 2017 17:32:53 -0800 Subject: mm: update comments for struct page.mapping struct page.mapping can be NULL or points to one object of type address_space, anon_vma or KSM private structure. Link: http://lkml.kernel.org/r/1506485067-15954-1-git-send-email-changbin.du@intel.com Signed-off-by: Changbin Du Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c85f11dafd56..d1b8e8f97fc2 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -48,8 +48,10 @@ struct page { * inode address_space, or NULL. * If page mapped as anonymous * memory, low bit is set, and - * it points to anon_vma object: - * see PAGE_MAPPING_ANON below. + * it points to anon_vma object + * or KSM private structure. See + * PAGE_MAPPING_ANON and + * PAGE_MAPPING_KSM. */ void *s_mem; /* slab first object */ atomic_t compound_mapcount; /* first tail page */ -- cgit v1.2.3 From 23c47d2ada9f96731492a67b28c0072715075baa Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Nov 2017 17:33:00 -0800 Subject: bdi: introduce BDI_CAP_SYNCHRONOUS_IO As discussed at https://lkml.kernel.org/r/<20170728165604.10455-1-ross.zwisler@linux.intel.com> someday we will remove rw_page(). If so, we need something to detect such super-fast storage on which synchronous IO operations like the current rw_page are always a win. Introduces BDI_CAP_SYNCHRONOUS_IO to indicate such devices. With it, we could use various optimization techniques. Link: http://lkml.kernel.org/r/1505886205-9671-3-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Christoph Hellwig Cc: Dan Williams Cc: Ross Zwisler Cc: Hugh Dickins Cc: Ilya Dryomov Cc: Jens Axboe Cc: Sergey Senozhatsky Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index f41ca8486e02..7360e79e9a2f 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -122,6 +122,8 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); * BDI_CAP_STRICTLIMIT: Keep number of dirty pages below bdi threshold. * * BDI_CAP_CGROUP_WRITEBACK: Supports cgroup-aware writeback. + * BDI_CAP_SYNCHRONOUS_IO: Device is so fast that asynchronous IO would be + * inefficient. */ #define BDI_CAP_NO_ACCT_DIRTY 0x00000001 #define BDI_CAP_NO_WRITEBACK 0x00000002 @@ -129,6 +131,7 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); #define BDI_CAP_STABLE_WRITES 0x00000008 #define BDI_CAP_STRICTLIMIT 0x00000010 #define BDI_CAP_CGROUP_WRITEBACK 0x00000020 +#define BDI_CAP_SYNCHRONOUS_IO 0x00000040 #define BDI_CAP_NO_ACCT_AND_WRITEBACK \ (BDI_CAP_NO_WRITEBACK | BDI_CAP_NO_ACCT_DIRTY | BDI_CAP_NO_ACCT_WB) @@ -174,6 +177,11 @@ static inline int wb_congested(struct bdi_writeback *wb, int cong_bits) long congestion_wait(int sync, long timeout); long wait_iff_congested(struct pglist_data *pgdat, int sync, long timeout); +static inline bool bdi_cap_synchronous_io(struct backing_dev_info *bdi) +{ + return bdi->capabilities & BDI_CAP_SYNCHRONOUS_IO; +} + static inline bool bdi_cap_stable_pages_required(struct backing_dev_info *bdi) { return bdi->capabilities & BDI_CAP_STABLE_WRITES; -- cgit v1.2.3 From 539a6fea7fdcade532bd3e77be2862a683f8f0c9 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Nov 2017 17:33:04 -0800 Subject: mm, swap: introduce SWP_SYNCHRONOUS_IO If rw-page based fast storage is used for swap devices, we need to detect it to enhance swap IO operations. This patch is preparation for optimizing of swap-in operation with next patch. Link: http://lkml.kernel.org/r/1505886205-9671-4-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Hugh Dickins Cc: Christoph Hellwig Cc: Dan Williams Cc: Ilya Dryomov Cc: Jens Axboe Cc: Ross Zwisler Cc: Sergey Senozhatsky Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index f02fb5db8914..933d7c0c3542 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -171,8 +171,9 @@ enum { SWP_AREA_DISCARD = (1 << 8), /* single-time swap area discards */ SWP_PAGE_DISCARD = (1 << 9), /* freed swap page-cluster discards */ SWP_STABLE_WRITES = (1 << 10), /* no overwrite PG_writeback pages */ + SWP_SYNCHRONOUS_IO = (1 << 11), /* synchronous IO is efficient */ /* add others here before... */ - SWP_SCANNING = (1 << 11), /* refcount in scan_swap_map */ + SWP_SCANNING = (1 << 12), /* refcount in scan_swap_map */ }; #define SWAP_CLUSTER_MAX 32UL -- cgit v1.2.3 From 0bcac06f27d7528591c27ac2b093ccd71c5d0168 Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Nov 2017 17:33:07 -0800 Subject: mm, swap: skip swapcache for swapin of synchronous device With fast swap storage, the platforms want to use swap more aggressively and swap-in is crucial to application latency. The rw_page() based synchronous devices like zram, pmem and btt are such fast storage. When I profile swapin performance with zram lz4 decompress test, S/W overhead is more than 70%. Maybe, it would be bigger in nvdimm. This patch aims to reduce swap-in latency by skipping swapcache if the swap device is synchronous device like rw_page based device. It enhances 45% my swapin test(5G sequential swapin, no readahead, from 2.41sec to 1.64sec). Link: http://lkml.kernel.org/r/1505886205-9671-5-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Dan Williams Cc: Ross Zwisler Cc: Hugh Dickins Cc: Christoph Hellwig Cc: Ilya Dryomov Cc: Jens Axboe Cc: Sergey Senozhatsky Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 933d7c0c3542..32c06f028c7b 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -466,6 +466,7 @@ extern int page_swapcount(struct page *); extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); +extern struct swap_info_struct *swp_swap_info(swp_entry_t entry); extern bool reuse_swap_page(struct page *, int *); extern int try_to_free_swap(struct page *); struct backing_dev_info; @@ -474,6 +475,16 @@ extern void exit_swap_address_space(unsigned int type); #else /* CONFIG_SWAP */ +static inline int swap_readpage(struct page *page, bool do_poll) +{ + return 0; +} + +static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) +{ + return NULL; +} + #define swap_address_space(entry) (NULL) #define get_nr_swap_pages() 0L #define total_swap_pages 0L -- cgit v1.2.3 From aa8d22a11da933dbf880b4933b58931f4aefe91c Mon Sep 17 00:00:00 2001 From: Minchan Kim Date: Wed, 15 Nov 2017 17:33:11 -0800 Subject: mm: swap: SWP_SYNCHRONOUS_IO: skip swapcache only if swapped page has no other reference When SWP_SYNCHRONOUS_IO swapped-in pages are shared by several processes, it can cause unnecessary memory wastage by skipping swap cache. Because, with swapin fault by read, they could share a page if the page were in swap cache. Thus, it avoids allocating same content new pages. This patch makes the swapcache skipping work only if the swap pte is non-sharable. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/1507620825-5537-1-git-send-email-minchan@kernel.org Signed-off-by: Minchan Kim Cc: Dan Williams Cc: Ross Zwisler Cc: Hugh Dickins Cc: Christoph Hellwig Cc: Ilya Dryomov Cc: Jens Axboe Cc: Sergey Senozhatsky Cc: Huang Ying Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swap.h b/include/linux/swap.h index 32c06f028c7b..8b8a6f965785 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -463,6 +463,7 @@ extern unsigned int count_swap_pages(int, int); extern sector_t map_swap_page(struct page *, struct block_device **); extern sector_t swapdev_block(int, pgoff_t); extern int page_swapcount(struct page *); +extern int __swap_count(struct swap_info_struct *si, swp_entry_t entry); extern int __swp_swapcount(swp_entry_t entry); extern int swp_swapcount(swp_entry_t entry); extern struct swap_info_struct *page_swap_info(struct page *); @@ -589,6 +590,11 @@ static inline int page_swapcount(struct page *page) return 0; } +static inline int __swap_count(struct swap_info_struct *si, swp_entry_t entry) +{ + return 0; +} + static inline int __swp_swapcount(swp_entry_t entry) { return 0; -- cgit v1.2.3 From 4da2ce250f986060750fcc5b29112914e31803ba Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 15 Nov 2017 17:33:26 -0800 Subject: mm: distinguish CMA and MOVABLE isolation in has_unmovable_pages() Joonsoo has noticed that "mm: drop migrate type checks from has_unmovable_pages" would break CMA allocator because it relies on has_unmovable_pages returning false even for CMA pageblocks which in fact don't have to be movable: alloc_contig_range start_isolate_page_range set_migratetype_isolate has_unmovable_pages This is a result of the code sharing between CMA and memory hotplug while each one has a different idea of what has_unmovable_pages should return. This is unfortunate but fixing it properly would require a lot of code duplication. Fix the issue by introducing the requested migrate type argument and special case MIGRATE_CMA case where CMA page blocks are handled properly. This will work for memory hotplug because it requires MIGRATE_MOVABLE. Link: http://lkml.kernel.org/r/20171019122118.y6cndierwl2vnguj@dhcp22.suse.cz Signed-off-by: Michal Hocko Reported-by: Joonsoo Kim Tested-by: Stefan Wahren Tested-by: Ran Wang Cc: Michael Ellerman Cc: Vlastimil Babka Cc: Igor Mammedov Cc: KAMEZAWA Hiroyuki Cc: Reza Arbab Cc: Vitaly Kuznetsov Cc: Xishi Qiu Cc: Yasuaki Ishimatsu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-isolation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-isolation.h b/include/linux/page-isolation.h index 05a04e603686..cdad58bbfd8b 100644 --- a/include/linux/page-isolation.h +++ b/include/linux/page-isolation.h @@ -31,7 +31,7 @@ static inline bool is_migrate_isolate(int migratetype) #endif bool has_unmovable_pages(struct zone *zone, struct page *page, int count, - bool skip_hwpoisoned_pages); + int migratetype, bool skip_hwpoisoned_pages); void set_pageblock_migratetype(struct page *page, int migratetype); int move_freepages_block(struct zone *zone, struct page *page, int migratetype, int *num_movable); -- cgit v1.2.3 From 66e8b438bd5c75498cfe915c4219049eaebcb869 Mon Sep 17 00:00:00 2001 From: Gioh Kim Date: Wed, 15 Nov 2017 17:33:42 -0800 Subject: mm/memblock.c: make the index explicit argument of for_each_memblock_type for_each_memblock_type macro function relies on idx variable defined in the caller context. Silent macro arguments are almost always wrong thing to do. They make code harder to read and easier to get wrong. Let's use an explicit iterator parameter for for_each_memblock_type and make the code more obious. This patch is a mere cleanup and it shouldn't introduce any functional change. Link: http://lkml.kernel.org/r/20170913133029.28911-1-gi-oh.kim@profitbricks.com Signed-off-by: Gioh Kim Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index bae11c7e7bf3..ce0e5634c2f9 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -389,10 +389,10 @@ static inline unsigned long memblock_region_reserved_end_pfn(const struct memblo region < (memblock.memblock_type.regions + memblock.memblock_type.cnt); \ region++) -#define for_each_memblock_type(memblock_type, rgn) \ - for (idx = 0, rgn = &memblock_type->regions[0]; \ - idx < memblock_type->cnt; \ - idx++, rgn = &memblock_type->regions[idx]) +#define for_each_memblock_type(i, memblock_type, rgn) \ + for (i = 0, rgn = &memblock_type->regions[0]; \ + i < memblock_type->cnt; \ + i++, rgn = &memblock_type->regions[i]) #ifdef CONFIG_MEMTEST extern void early_memtest(phys_addr_t start, phys_addr_t end); -- cgit v1.2.3 From 0bea803e9e6bf3836d5368a6426c30a8c0e5eab5 Mon Sep 17 00:00:00 2001 From: Ralph Campbell Date: Wed, 15 Nov 2017 17:34:00 -0800 Subject: mm/hmm: constify hmm_devmem_page_get_drvdata() parameter MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Constify pointer parameter to avoid issue when use from code that only has const struct page pointer to use in the first place. Link: http://lkml.kernel.org/r/1506972774-10191-1-git-send-email-jglisse@redhat.com Signed-off-by: Ralph Campbell Signed-off-by: Jérôme Glisse Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hmm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hmm.h b/include/linux/hmm.h index 96e69979f84d..325017ad9311 100644 --- a/include/linux/hmm.h +++ b/include/linux/hmm.h @@ -471,9 +471,9 @@ static inline void hmm_devmem_page_set_drvdata(struct page *page, * @page: pointer to struct page * Return: driver data value */ -static inline unsigned long hmm_devmem_page_get_drvdata(struct page *page) +static inline unsigned long hmm_devmem_page_get_drvdata(const struct page *page) { - unsigned long *drvdata = (unsigned long *)&page->pgmap; + const unsigned long *drvdata = (const unsigned long *)&page->pgmap; return drvdata[1]; } -- cgit v1.2.3 From 0f10851ea475e08896ee5d9a2036d1bb46a8f3a4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Wed, 15 Nov 2017 17:34:07 -0800 Subject: mm/mmu_notifier: avoid double notification when it is useless MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This patch only affects users of mmu_notifier->invalidate_range callback which are device drivers related to ATS/PASID, CAPI, IOMMUv2, SVM ... and it is an optimization for those users. Everyone else is unaffected by it. When clearing a pte/pmd we are given a choice to notify the event under the page table lock (notify version of *_clear_flush helpers do call the mmu_notifier_invalidate_range). But that notification is not necessary in all cases. This patch removes almost all cases where it is useless to have a call to mmu_notifier_invalidate_range before mmu_notifier_invalidate_range_end. It also adds documentation in all those cases explaining why. Below is a more in depth analysis of why this is fine to do this: For secondary TLB (non CPU TLB) like IOMMU TLB or device TLB (when device use thing like ATS/PASID to get the IOMMU to walk the CPU page table to access a process virtual address space). There is only 2 cases when you need to notify those secondary TLB while holding page table lock when clearing a pte/pmd: A) page backing address is free before mmu_notifier_invalidate_range_end B) a page table entry is updated to point to a new page (COW, write fault on zero page, __replace_page(), ...) Case A is obvious you do not want to take the risk for the device to write to a page that might now be used by something completely different. Case B is more subtle. For correctness it requires the following sequence to happen: - take page table lock - clear page table entry and notify (pmd/pte_huge_clear_flush_notify()) - set page table entry to point to new page If clearing the page table entry is not followed by a notify before setting the new pte/pmd value then you can break memory model like C11 or C++11 for the device. Consider the following scenario (device use a feature similar to ATS/ PASID): Two address addrA and addrB such that |addrA - addrB| >= PAGE_SIZE we assume they are write protected for COW (other case of B apply too). [Time N] ----------------------------------------------------------------- CPU-thread-0 {try to write to addrA} CPU-thread-1 {try to write to addrB} CPU-thread-2 {} CPU-thread-3 {} DEV-thread-0 {read addrA and populate device TLB} DEV-thread-2 {read addrB and populate device TLB} [Time N+1] --------------------------------------------------------------- CPU-thread-0 {COW_step0: {mmu_notifier_invalidate_range_start(addrA)}} CPU-thread-1 {COW_step0: {mmu_notifier_invalidate_range_start(addrB)}} CPU-thread-2 {} CPU-thread-3 {} DEV-thread-0 {} DEV-thread-2 {} [Time N+2] --------------------------------------------------------------- CPU-thread-0 {COW_step1: {update page table point to new page for addrA}} CPU-thread-1 {COW_step1: {update page table point to new page for addrB}} CPU-thread-2 {} CPU-thread-3 {} DEV-thread-0 {} DEV-thread-2 {} [Time N+3] --------------------------------------------------------------- CPU-thread-0 {preempted} CPU-thread-1 {preempted} CPU-thread-2 {write to addrA which is a write to new page} CPU-thread-3 {} DEV-thread-0 {} DEV-thread-2 {} [Time N+3] --------------------------------------------------------------- CPU-thread-0 {preempted} CPU-thread-1 {preempted} CPU-thread-2 {} CPU-thread-3 {write to addrB which is a write to new page} DEV-thread-0 {} DEV-thread-2 {} [Time N+4] --------------------------------------------------------------- CPU-thread-0 {preempted} CPU-thread-1 {COW_step3: {mmu_notifier_invalidate_range_end(addrB)}} CPU-thread-2 {} CPU-thread-3 {} DEV-thread-0 {} DEV-thread-2 {} [Time N+5] --------------------------------------------------------------- CPU-thread-0 {preempted} CPU-thread-1 {} CPU-thread-2 {} CPU-thread-3 {} DEV-thread-0 {read addrA from old page} DEV-thread-2 {read addrB from new page} So here because at time N+2 the clear page table entry was not pair with a notification to invalidate the secondary TLB, the device see the new value for addrB before seing the new value for addrA. This break total memory ordering for the device. When changing a pte to write protect or to point to a new write protected page with same content (KSM) it is ok to delay invalidate_range callback to mmu_notifier_invalidate_range_end() outside the page table lock. This is true even if the thread doing page table update is preempted right after releasing page table lock before calling mmu_notifier_invalidate_range_end Thanks to Andrea for thinking of a problematic scenario for COW. [jglisse@redhat.com: v2] Link: http://lkml.kernel.org/r/20171017031003.7481-2-jglisse@redhat.com Link: http://lkml.kernel.org/r/20170901173011.10745-1-jglisse@redhat.com Signed-off-by: Jérôme Glisse Cc: Andrea Arcangeli Cc: Nadav Amit Cc: Joerg Roedel Cc: Suravee Suthikulpanit Cc: David Woodhouse Cc: Alistair Popple Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Stephen Rothwell Cc: Andrew Donnellan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 2cf1c3c807f6..130831718e95 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -156,7 +156,8 @@ struct mmu_notifier_ops { * shared page-tables, it not necessary to implement the * invalidate_range_start()/end() notifiers, as * invalidate_range() alread catches the points in time when an - * external TLB range needs to be flushed. + * external TLB range needs to be flushed. For more in depth + * discussion on this see Documentation/vm/mmu_notifier.txt * * The invalidate_range() function is called under the ptl * spin-lock and not allowed to sleep. -- cgit v1.2.3 From 4645b9fe84bf4878f04c7959a75df7c3c2d1bbb9 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=A9r=C3=B4me=20Glisse?= Date: Wed, 15 Nov 2017 17:34:11 -0800 Subject: mm/mmu_notifier: avoid call to invalidate_range() in range_end() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is an optimization patch that only affect mmu_notifier users which rely on the invalidate_range() callback. This patch avoids calling that callback twice in a row from inside __mmu_notifier_invalidate_range_end Existing pattern (before this patch): mmu_notifier_invalidate_range_start() pte/pmd/pud_clear_flush_notify() mmu_notifier_invalidate_range() mmu_notifier_invalidate_range_end() mmu_notifier_invalidate_range() New pattern (after this patch): mmu_notifier_invalidate_range_start() pte/pmd/pud_clear_flush_notify() mmu_notifier_invalidate_range() mmu_notifier_invalidate_range_only_end() We call the invalidate_range callback after clearing the page table under the page table lock and we skip the call to invalidate_range inside the __mmu_notifier_invalidate_range_end() function. Idea from Andrea Arcangeli Link: http://lkml.kernel.org/r/20171017031003.7481-3-jglisse@redhat.com Signed-off-by: Jérôme Glisse Cc: Andrea Arcangeli Cc: Joerg Roedel Cc: Suravee Suthikulpanit Cc: David Woodhouse Cc: Alistair Popple Cc: Michael Ellerman Cc: Benjamin Herrenschmidt Cc: Stephen Rothwell Cc: Andrew Donnellan Cc: Nadav Amit Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmu_notifier.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmu_notifier.h b/include/linux/mmu_notifier.h index 130831718e95..b25dc9db19fc 100644 --- a/include/linux/mmu_notifier.h +++ b/include/linux/mmu_notifier.h @@ -214,7 +214,8 @@ extern void __mmu_notifier_change_pte(struct mm_struct *mm, extern void __mmu_notifier_invalidate_range_start(struct mm_struct *mm, unsigned long start, unsigned long end); extern void __mmu_notifier_invalidate_range_end(struct mm_struct *mm, - unsigned long start, unsigned long end); + unsigned long start, unsigned long end, + bool only_end); extern void __mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end); @@ -268,7 +269,14 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, unsigned long start, unsigned long end) { if (mm_has_notifiers(mm)) - __mmu_notifier_invalidate_range_end(mm, start, end); + __mmu_notifier_invalidate_range_end(mm, start, end, false); +} + +static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ + if (mm_has_notifiers(mm)) + __mmu_notifier_invalidate_range_end(mm, start, end, true); } static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, @@ -439,6 +447,11 @@ static inline void mmu_notifier_invalidate_range_end(struct mm_struct *mm, { } +static inline void mmu_notifier_invalidate_range_only_end(struct mm_struct *mm, + unsigned long start, unsigned long end) +{ +} + static inline void mmu_notifier_invalidate_range(struct mm_struct *mm, unsigned long start, unsigned long end) { -- cgit v1.2.3 From 3a50d14d0df5776e002a8683a290c87eeac93a21 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 15 Nov 2017 17:34:15 -0800 Subject: mm: remove unused pgdat->inactive_ratio Since commit 59dc76b0d4df ("mm: vmscan: reduce size of inactive file list") 'pgdat->inactive_ratio' is not used, except for printing "node_inactive_ratio: 0" in /proc/zoneinfo output. Remove it. Link: http://lkml.kernel.org/r/20171003152611.27483-1-aryabinin@virtuozzo.com Signed-off-by: Andrey Ryabinin Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index a507f43ad221..39ccaa9c428b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -712,12 +712,6 @@ typedef struct pglist_data { /* Fields commonly accessed by the page reclaim scanner */ struct lruvec lruvec; - /* - * The target ratio of ACTIVE_ANON to INACTIVE_ANON pages on - * this node's LRU. Maintained by the pageout code. - */ - unsigned int inactive_ratio; - unsigned long flags; ZONE_PADDING(_pad2_) -- cgit v1.2.3 From 8745808fda84c638e45cc860c8fb600bf4b0a2a6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 15 Nov 2017 17:34:22 -0800 Subject: mm, arch: remove empty_bad_page* empty_bad_page() and empty_bad_pte_table() seem to be relics from old days which is not used by any code for a long time. I have tried to find when exactly but this is not really all that straightforward due to many code movements - traces disappear around 2.4 times. Anyway no code really references neither empty_bad_page nor empty_bad_pte_table. We only allocate the storage which is not used by anybody so remove them. Link: http://lkml.kernel.org/r/20171004150045.30755-1-mhocko@kernel.org Signed-off-by: Michal Hocko Acked-by: Ralf Baechle Acked-by: Ingo Molnar Cc: Yoshinori Sato Cc: David Howells Cc: Rich Felker Cc: Jeff Dike Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 584b14c774c1..3ec44e27aa9d 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -18,7 +18,7 @@ * Various page->flags bits: * * PG_reserved is set for special pages, which can never be swapped out. Some - * of them might not even exist (eg empty_bad_page)... + * of them might not even exist... * * The PG_private bitflag is set on pagecache pages if they contain filesystem * specific data (which is normally at page->private). It can be used by -- cgit v1.2.3 From 72b045aecdd856b083521f2a963705b4c2e59680 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Nov 2017 17:34:33 -0800 Subject: mm: implement find_get_pages_range_tag() Patch series "Ranged pagevec tagged lookup", v3. In this series I provide a ranged variant of pagevec_lookup_tag() and use it in places where it makes sense. This series removes some common code and it also has a potential for speeding up some operations similarly as for pagevec_lookup_range() (but for now I can think of only artificial cases where this happens). This patch (of 16): Implement a variant of find_get_pages_tag() that stops iterating at given index. Lots of users of this function (through pagevec_lookup()) actually want a range lookup and all of them are currently open-coding this. Also create corresponding pagevec_lookup_range_tag() function. Link: http://lkml.kernel.org/r/20171009151359.31984-2-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Daniel Jordan Cc: Bob Peterson Cc: Chao Yu Cc: David Howells Cc: David Sterba Cc: Ilya Dryomov Cc: Jaegeuk Kim Cc: Ryusuke Konishi Cc: Steve French Cc: "Theodore Ts'o" Cc: "Yan, Zheng" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 12 ++++++++++-- include/linux/pagevec.h | 11 +++++++++-- 2 files changed, 19 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e08b5339023c..956d6a80e126 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -366,8 +366,16 @@ static inline unsigned find_get_pages(struct address_space *mapping, } unsigned find_get_pages_contig(struct address_space *mapping, pgoff_t start, unsigned int nr_pages, struct page **pages); -unsigned find_get_pages_tag(struct address_space *mapping, pgoff_t *index, - int tag, unsigned int nr_pages, struct page **pages); +unsigned find_get_pages_range_tag(struct address_space *mapping, pgoff_t *index, + pgoff_t end, int tag, unsigned int nr_pages, + struct page **pages); +static inline unsigned find_get_pages_tag(struct address_space *mapping, + pgoff_t *index, int tag, unsigned int nr_pages, + struct page **pages) +{ + return find_get_pages_range_tag(mapping, index, (pgoff_t)-1, tag, + nr_pages, pages); +} unsigned find_get_entries_tag(struct address_space *mapping, pgoff_t start, int tag, unsigned int nr_entries, struct page **entries, pgoff_t *indices); diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 2636c0c0f279..afc718f586f8 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -38,9 +38,16 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec, return pagevec_lookup_range(pvec, mapping, start, (pgoff_t)-1); } -unsigned pagevec_lookup_tag(struct pagevec *pvec, +unsigned pagevec_lookup_range_tag(struct pagevec *pvec, + struct address_space *mapping, pgoff_t *index, pgoff_t end, + int tag, unsigned nr_pages); +static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, int tag, - unsigned nr_pages); + unsigned nr_pages) +{ + return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag, + nr_pages); +} static inline void pagevec_init(struct pagevec *pvec, int cold) { -- cgit v1.2.3 From 93d3b7140ad379885849ad2674b4290c9e8273da Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Nov 2017 17:35:12 -0800 Subject: mm: add variant of pagevec_lookup_range_tag() taking number of pages Currently pagevec_lookup_range_tag() takes number of pages to look up but most users don't need this. Create a new function pagevec_lookup_range_nr_tag() that takes maximum number of pages to lookup for Ceph which wants this functionality so that we can drop nr_pages argument from pagevec_lookup_range_tag(). Link: http://lkml.kernel.org/r/20171009151359.31984-13-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Daniel Jordan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index afc718f586f8..87a2dfd62f5e 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -41,6 +41,9 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec, unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, int tag, unsigned nr_pages); +unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, + struct address_space *mapping, pgoff_t *index, pgoff_t end, + int tag, unsigned max_pages); static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, int tag, unsigned nr_pages) -- cgit v1.2.3 From 67fd707f468142d0f689a6240044bb45c1913003 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Nov 2017 17:35:19 -0800 Subject: mm: remove nr_pages argument from pagevec_lookup_{,range}_tag() All users of pagevec_lookup() and pagevec_lookup_range() now pass PAGEVEC_SIZE as a desired number of pages. Just drop the argument. Link: http://lkml.kernel.org/r/20171009151359.31984-15-jack@suse.cz Signed-off-by: Jan Kara Reviewed-by: Daniel Jordan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 87a2dfd62f5e..4f56e0ad9d00 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -40,16 +40,14 @@ static inline unsigned pagevec_lookup(struct pagevec *pvec, unsigned pagevec_lookup_range_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, - int tag, unsigned nr_pages); + int tag); unsigned pagevec_lookup_range_nr_tag(struct pagevec *pvec, struct address_space *mapping, pgoff_t *index, pgoff_t end, int tag, unsigned max_pages); static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, - struct address_space *mapping, pgoff_t *index, int tag, - unsigned nr_pages) + struct address_space *mapping, pgoff_t *index, int tag) { - return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag, - nr_pages); + return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag); } static inline void pagevec_init(struct pagevec *pvec, int cold) -- cgit v1.2.3 From b4e98d9ac775907cc53fb08fcb6776deb7694e30 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Nov 2017 17:35:33 -0800 Subject: mm: account pud page tables On a machine with 5-level paging support a process can allocate significant amount of memory and stay unnoticed by oom-killer and memory cgroup. The trick is to allocate a lot of PUD page tables. We don't account PUD page tables, only PMD and PTE. We already addressed the same issue for PMD page tables, see commit dc6c9a35b66b ("mm: account pmd page tables to the process"). Introduction of 5-level paging brings the same issue for PUD page tables. The patch expands accounting to PUD level. [kirill.shutemov@linux.intel.com: s/pmd_t/pud_t/] Link: http://lkml.kernel.org/r/20171004074305.x35eh5u7ybbt5kar@black.fi.intel.com [heiko.carstens@de.ibm.com: s390/mm: fix pud table accounting] Link: http://lkml.kernel.org/r/20171103090551.18231-1-heiko.carstens@de.ibm.com Link: http://lkml.kernel.org/r/20171002080427.3320-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Signed-off-by: Heiko Carstens Acked-by: Rik van Riel Acked-by: Michal Hocko Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 36 +++++++++++++++++++++++++++++++++--- include/linux/mm_types.h | 3 +++ 2 files changed, 36 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 91b46f99b4d2..9af86f39d928 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1599,14 +1599,44 @@ static inline int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, int __p4d_alloc(struct mm_struct *mm, pgd_t *pgd, unsigned long address); #endif -#ifdef __PAGETABLE_PUD_FOLDED +#if defined(__PAGETABLE_PUD_FOLDED) || !defined(CONFIG_MMU) static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address) { return 0; } + +static inline unsigned long mm_nr_puds(const struct mm_struct *mm) +{ + return 0; +} + +static inline void mm_nr_puds_init(struct mm_struct *mm) {} +static inline void mm_inc_nr_puds(struct mm_struct *mm) {} +static inline void mm_dec_nr_puds(struct mm_struct *mm) {} + #else int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); + +static inline void mm_nr_puds_init(struct mm_struct *mm) +{ + atomic_long_set(&mm->nr_puds, 0); +} + +static inline unsigned long mm_nr_puds(const struct mm_struct *mm) +{ + return atomic_long_read(&mm->nr_puds); +} + +static inline void mm_inc_nr_puds(struct mm_struct *mm) +{ + atomic_long_inc(&mm->nr_puds); +} + +static inline void mm_dec_nr_puds(struct mm_struct *mm) +{ + atomic_long_dec(&mm->nr_puds); +} #endif #if defined(__PAGETABLE_PMD_FOLDED) || !defined(CONFIG_MMU) @@ -1618,7 +1648,7 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, static inline void mm_nr_pmds_init(struct mm_struct *mm) {} -static inline unsigned long mm_nr_pmds(struct mm_struct *mm) +static inline unsigned long mm_nr_pmds(const struct mm_struct *mm) { return 0; } @@ -1634,7 +1664,7 @@ static inline void mm_nr_pmds_init(struct mm_struct *mm) atomic_long_set(&mm->nr_pmds, 0); } -static inline unsigned long mm_nr_pmds(struct mm_struct *mm) +static inline unsigned long mm_nr_pmds(const struct mm_struct *mm) { return atomic_long_read(&mm->nr_pmds); } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index d1b8e8f97fc2..e9e561e02d22 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -404,6 +404,9 @@ struct mm_struct { atomic_long_t nr_ptes; /* PTE page table pages */ #if CONFIG_PGTABLE_LEVELS > 2 atomic_long_t nr_pmds; /* PMD page table pages */ +#endif +#if CONFIG_PGTABLE_LEVELS > 3 + atomic_long_t nr_puds; /* PUD page table pages */ #endif int map_count; /* number of VMAs */ -- cgit v1.2.3 From c4812909f5d5a9b7f1c85a2d95be388a066cda52 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Nov 2017 17:35:37 -0800 Subject: mm: introduce wrappers to access mm->nr_ptes Let's add wrappers for ->nr_ptes with the same interface as for nr_pmd and nr_pud. The patch also makes nr_ptes accounting dependent onto CONFIG_MMU. Page table accounting doesn't make sense if you don't have page tables. It's preparation for consolidation of page-table counters in mm_struct. Link: http://lkml.kernel.org/r/20171006100651.44742-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 32 ++++++++++++++++++++++++++++++++ include/linux/mm_types.h | 2 ++ 2 files changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9af86f39d928..2ca799f0d762 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1680,6 +1680,38 @@ static inline void mm_dec_nr_pmds(struct mm_struct *mm) } #endif +#ifdef CONFIG_MMU +static inline void mm_nr_ptes_init(struct mm_struct *mm) +{ + atomic_long_set(&mm->nr_ptes, 0); +} + +static inline unsigned long mm_nr_ptes(const struct mm_struct *mm) +{ + return atomic_long_read(&mm->nr_ptes); +} + +static inline void mm_inc_nr_ptes(struct mm_struct *mm) +{ + atomic_long_inc(&mm->nr_ptes); +} + +static inline void mm_dec_nr_ptes(struct mm_struct *mm) +{ + atomic_long_dec(&mm->nr_ptes); +} +#else +static inline void mm_nr_ptes_init(struct mm_struct *mm) {} + +static inline unsigned long mm_nr_ptes(const struct mm_struct *mm) +{ + return 0; +} + +static inline void mm_inc_nr_ptes(struct mm_struct *mm) {} +static inline void mm_dec_nr_ptes(struct mm_struct *mm) {} +#endif + int __pte_alloc(struct mm_struct *mm, pmd_t *pmd, unsigned long address); int __pte_alloc_kernel(pmd_t *pmd, unsigned long address); diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e9e561e02d22..e42048020664 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -401,7 +401,9 @@ struct mm_struct { */ atomic_t mm_count; +#ifdef CONFIG_MMU atomic_long_t nr_ptes; /* PTE page table pages */ +#endif #if CONFIG_PGTABLE_LEVELS > 2 atomic_long_t nr_pmds; /* PMD page table pages */ #endif -- cgit v1.2.3 From af5b0f6a09e42c9f4fa87735f2a366748767b686 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Wed, 15 Nov 2017 17:35:40 -0800 Subject: mm: consolidate page table accounting Currently, we account page tables separately for each page table level, but that's redundant -- we only make use of total memory allocated to page tables for oom_badness calculation. We also provide the information to userspace, but it has dubious value there too. This patch switches page table accounting to single counter. mm->pgtables_bytes is now used to account all page table levels. We use bytes, because page table size for different levels of page table tree may be different. The change has user-visible effect: we don't have VmPMD and VmPUD reported in /proc/[pid]/status. Not sure if anybody uses them. (As alternative, we can always report 0 kB for them.) OOM-killer report is also slightly changed: we now report pgtables_bytes instead of nr_ptes, nr_pmd, nr_puds. Apart from reducing number of counters per-mm, the benefit is that we now calculate oom_badness() more correctly for machines which have different size of page tables depending on level or where page tables are less than a page in size. The only downside can be debuggability because we do not know which page table level could leak. But I do not remember many bugs that would be caught by separate counters so I wouldn't lose sleep over this. [akpm@linux-foundation.org: fix mm/huge_memory.c] Link: http://lkml.kernel.org/r/20171006100651.44742-2-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko [kirill.shutemov@linux.intel.com: fix build] Link: http://lkml.kernel.org/r/20171016150113.ikfxy3e7zzfvsr4w@black.fi.intel.com Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 58 ++++++++++-------------------------------------- include/linux/mm_types.h | 8 +------ 2 files changed, 13 insertions(+), 53 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 2ca799f0d762..7c1e82a1aa77 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1605,37 +1605,20 @@ static inline int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, { return 0; } - -static inline unsigned long mm_nr_puds(const struct mm_struct *mm) -{ - return 0; -} - -static inline void mm_nr_puds_init(struct mm_struct *mm) {} static inline void mm_inc_nr_puds(struct mm_struct *mm) {} static inline void mm_dec_nr_puds(struct mm_struct *mm) {} #else int __pud_alloc(struct mm_struct *mm, p4d_t *p4d, unsigned long address); -static inline void mm_nr_puds_init(struct mm_struct *mm) -{ - atomic_long_set(&mm->nr_puds, 0); -} - -static inline unsigned long mm_nr_puds(const struct mm_struct *mm) -{ - return atomic_long_read(&mm->nr_puds); -} - static inline void mm_inc_nr_puds(struct mm_struct *mm) { - atomic_long_inc(&mm->nr_puds); + atomic_long_add(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes); } static inline void mm_dec_nr_puds(struct mm_struct *mm) { - atomic_long_dec(&mm->nr_puds); + atomic_long_sub(PTRS_PER_PUD * sizeof(pud_t), &mm->pgtables_bytes); } #endif @@ -1646,64 +1629,47 @@ static inline int __pmd_alloc(struct mm_struct *mm, pud_t *pud, return 0; } -static inline void mm_nr_pmds_init(struct mm_struct *mm) {} - -static inline unsigned long mm_nr_pmds(const struct mm_struct *mm) -{ - return 0; -} - static inline void mm_inc_nr_pmds(struct mm_struct *mm) {} static inline void mm_dec_nr_pmds(struct mm_struct *mm) {} #else int __pmd_alloc(struct mm_struct *mm, pud_t *pud, unsigned long address); -static inline void mm_nr_pmds_init(struct mm_struct *mm) -{ - atomic_long_set(&mm->nr_pmds, 0); -} - -static inline unsigned long mm_nr_pmds(const struct mm_struct *mm) -{ - return atomic_long_read(&mm->nr_pmds); -} - static inline void mm_inc_nr_pmds(struct mm_struct *mm) { - atomic_long_inc(&mm->nr_pmds); + atomic_long_add(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes); } static inline void mm_dec_nr_pmds(struct mm_struct *mm) { - atomic_long_dec(&mm->nr_pmds); + atomic_long_sub(PTRS_PER_PMD * sizeof(pmd_t), &mm->pgtables_bytes); } #endif #ifdef CONFIG_MMU -static inline void mm_nr_ptes_init(struct mm_struct *mm) +static inline void mm_pgtables_bytes_init(struct mm_struct *mm) { - atomic_long_set(&mm->nr_ptes, 0); + atomic_long_set(&mm->pgtables_bytes, 0); } -static inline unsigned long mm_nr_ptes(const struct mm_struct *mm) +static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm) { - return atomic_long_read(&mm->nr_ptes); + return atomic_long_read(&mm->pgtables_bytes); } static inline void mm_inc_nr_ptes(struct mm_struct *mm) { - atomic_long_inc(&mm->nr_ptes); + atomic_long_add(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes); } static inline void mm_dec_nr_ptes(struct mm_struct *mm) { - atomic_long_dec(&mm->nr_ptes); + atomic_long_sub(PTRS_PER_PTE * sizeof(pte_t), &mm->pgtables_bytes); } #else -static inline void mm_nr_ptes_init(struct mm_struct *mm) {} -static inline unsigned long mm_nr_ptes(const struct mm_struct *mm) +static inline void mm_pgtables_bytes_init(struct mm_struct *mm) {} +static inline unsigned long mm_pgtables_bytes(const struct mm_struct *mm) { return 0; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e42048020664..09643e0472fc 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -402,13 +402,7 @@ struct mm_struct { atomic_t mm_count; #ifdef CONFIG_MMU - atomic_long_t nr_ptes; /* PTE page table pages */ -#endif -#if CONFIG_PGTABLE_LEVELS > 2 - atomic_long_t nr_pmds; /* PMD page table pages */ -#endif -#if CONFIG_PGTABLE_LEVELS > 3 - atomic_long_t nr_puds; /* PUD page table pages */ + atomic_long_t pgtables_bytes; /* PTE page table pages */ #endif int map_count; /* number of VMAs */ -- cgit v1.2.3 From 4950276672fce5c241857540f8561c440663673d Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:35:51 -0800 Subject: kmemcheck: remove annotations Patch series "kmemcheck: kill kmemcheck", v2. As discussed at LSF/MM, kill kmemcheck. KASan is a replacement that is able to work without the limitation of kmemcheck (single CPU, slow). KASan is already upstream. We are also not aware of any users of kmemcheck (or users who don't consider KASan as a suitable replacement). The only objection was that since KASAN wasn't supported by all GCC versions provided by distros at that time we should hold off for 2 years, and try again. Now that 2 years have passed, and all distros provide gcc that supports KASAN, kill kmemcheck again for the very same reasons. This patch (of 4): Remove kmemcheck annotations, and calls to kmemcheck from the kernel. [alexander.levin@verizon.com: correctly remove kmemcheck call from dma_map_sg_attrs] Link: http://lkml.kernel.org/r/20171012192151.26531-1-alexander.levin@verizon.com Link: http://lkml.kernel.org/r/20171007030159.22241-2-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Alexander Potapenko Cc: Eric W. Biederman Cc: Michal Hocko Cc: Pekka Enberg Cc: Steven Rostedt Cc: Tim Hansen Cc: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/c2port.h | 4 ---- include/linux/dma-mapping.h | 8 +------- include/linux/filter.h | 2 -- include/linux/mm_types.h | 8 -------- include/linux/net.h | 3 --- include/linux/ring_buffer.h | 3 --- include/linux/skbuff.h | 3 --- 7 files changed, 1 insertion(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/c2port.h b/include/linux/c2port.h index 4efabcb51347..f2736348ca26 100644 --- a/include/linux/c2port.h +++ b/include/linux/c2port.h @@ -9,8 +9,6 @@ * the Free Software Foundation */ -#include - #define C2PORT_NAME_LEN 32 struct device; @@ -22,10 +20,8 @@ struct device; /* Main struct */ struct c2port_ops; struct c2port_device { - kmemcheck_bitfield_begin(flags); unsigned int access:1; unsigned int flash_access:1; - kmemcheck_bitfield_end(flags); int id; char name[C2PORT_NAME_LEN]; diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index eee1499db396..e8f8e8fb244d 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -9,7 +9,6 @@ #include #include #include -#include #include #include @@ -232,7 +231,6 @@ static inline dma_addr_t dma_map_single_attrs(struct device *dev, void *ptr, const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; - kmemcheck_mark_initialized(ptr, size); BUG_ON(!valid_dma_direction(dir)); addr = ops->map_page(dev, virt_to_page(ptr), offset_in_page(ptr), size, @@ -265,11 +263,8 @@ static inline int dma_map_sg_attrs(struct device *dev, struct scatterlist *sg, unsigned long attrs) { const struct dma_map_ops *ops = get_dma_ops(dev); - int i, ents; - struct scatterlist *s; + int ents; - for_each_sg(sg, s, nents, i) - kmemcheck_mark_initialized(sg_virt(s), s->length); BUG_ON(!valid_dma_direction(dir)); ents = ops->map_sg(dev, sg, nents, dir, attrs); BUG_ON(ents < 0); @@ -299,7 +294,6 @@ static inline dma_addr_t dma_map_page_attrs(struct device *dev, const struct dma_map_ops *ops = get_dma_ops(dev); dma_addr_t addr; - kmemcheck_mark_initialized(page_address(page) + offset, size); BUG_ON(!valid_dma_direction(dir)); addr = ops->map_page(dev, page, offset, size, dir, attrs); debug_dma_map_page(dev, page, offset, size, dir, addr, false); diff --git a/include/linux/filter.h b/include/linux/filter.h index 48ec57e70f9f..42197b16dd78 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -454,13 +454,11 @@ struct bpf_binary_header { struct bpf_prog { u16 pages; /* Number of allocated pages */ - kmemcheck_bitfield_begin(meta); u16 jited:1, /* Is our filter JIT'ed? */ locked:1, /* Program image locked? */ gpl_compatible:1, /* Is filter GPL compatible? */ cb_access:1, /* Is control block accessed? */ dst_needed:1; /* Do we need dst entry? */ - kmemcheck_bitfield_end(meta); enum bpf_prog_type type; /* Type of BPF program */ u32 len; /* Number of filter blocks */ u32 jited_len; /* Size of jited insns in bytes */ diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 09643e0472fc..cfd0ac4e5e0e 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -209,14 +209,6 @@ struct page { not kmapped, ie. highmem) */ #endif /* WANT_PAGE_VIRTUAL */ -#ifdef CONFIG_KMEMCHECK - /* - * kmemcheck wants to track the status of each byte in a page; this - * is a pointer to such a status block. NULL if not tracked. - */ - void *shadow; -#endif - #ifdef LAST_CPUPID_NOT_IN_PAGE_FLAGS int _last_cpupid; #endif diff --git a/include/linux/net.h b/include/linux/net.h index d97d80d7fdf8..caeb159abda5 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -22,7 +22,6 @@ #include #include #include /* For O_CLOEXEC and O_NONBLOCK */ -#include #include #include #include @@ -111,9 +110,7 @@ struct socket_wq { struct socket { socket_state state; - kmemcheck_bitfield_begin(type); short type; - kmemcheck_bitfield_end(type); unsigned long flags; diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index fa6ace66fea5..289e4d54e3e0 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -2,7 +2,6 @@ #ifndef _LINUX_RING_BUFFER_H #define _LINUX_RING_BUFFER_H -#include #include #include #include @@ -14,9 +13,7 @@ struct ring_buffer_iter; * Don't refer to this struct directly, use functions below. */ struct ring_buffer_event { - kmemcheck_bitfield_begin(bitfield); u32 type_len:5, time_delta:27; - kmemcheck_bitfield_end(bitfield); u32 array[]; }; diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index d448a4804aea..aa1341474916 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -15,7 +15,6 @@ #define _LINUX_SKBUFF_H #include -#include #include #include #include @@ -704,7 +703,6 @@ struct sk_buff { /* Following fields are _not_ copied in __copy_skb_header() * Note that queue_mapping is here mostly to fill a hole. */ - kmemcheck_bitfield_begin(flags1); __u16 queue_mapping; /* if you move cloned around you also must adapt those constants */ @@ -723,7 +721,6 @@ struct sk_buff { head_frag:1, xmit_more:1, __unused:1; /* one bit hole */ - kmemcheck_bitfield_end(flags1); /* fields enclosed in headers_start/headers_end are copied * using a single memcpy() in __copy_skb_header() -- cgit v1.2.3 From 75f296d93bcebcfe375884ddac79e30263a31766 Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:35:54 -0800 Subject: kmemcheck: stop using GFP_NOTRACK and SLAB_NOTRACK Convert all allocations that used a NOTRACK flag to stop using it. Link: http://lkml.kernel.org/r/20171007030159.22241-3-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Alexander Potapenko Cc: Eric W. Biederman Cc: Michal Hocko Cc: Pekka Enberg Cc: Steven Rostedt Cc: Tim Hansen Cc: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/thread_info.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thread_info.h b/include/linux/thread_info.h index 4bcdf00c110f..34f053a150a9 100644 --- a/include/linux/thread_info.h +++ b/include/linux/thread_info.h @@ -44,10 +44,9 @@ enum { #endif #if IS_ENABLED(CONFIG_DEBUG_STACK_USAGE) || IS_ENABLED(CONFIG_DEBUG_KMEMLEAK) -# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK | \ - __GFP_ZERO) +# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_ZERO) #else -# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT | __GFP_NOTRACK) +# define THREADINFO_GFP (GFP_KERNEL_ACCOUNT) #endif /* -- cgit v1.2.3 From d8be75663cec0069b85f80191abd2682ce4a512f Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:35:58 -0800 Subject: kmemcheck: remove whats left of NOTRACK flags Now that kmemcheck is gone, we don't need the NOTRACK flags. Link: http://lkml.kernel.org/r/20171007030159.22241-5-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Alexander Potapenko Cc: Eric W. Biederman Cc: Michal Hocko Cc: Pekka Enberg Cc: Steven Rostedt Cc: Tim Hansen Cc: Vegard Nossum Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 9 --------- include/linux/slab.h | 6 ------ 2 files changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 710143741eb5..b041f94678de 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -37,7 +37,6 @@ struct vm_area_struct; #define ___GFP_THISNODE 0x40000u #define ___GFP_ATOMIC 0x80000u #define ___GFP_ACCOUNT 0x100000u -#define ___GFP_NOTRACK 0x200000u #define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_WRITE 0x800000u #define ___GFP_KSWAPD_RECLAIM 0x1000000u @@ -201,19 +200,11 @@ struct vm_area_struct; * __GFP_COMP address compound page metadata. * * __GFP_ZERO returns a zeroed page on success. - * - * __GFP_NOTRACK avoids tracking with kmemcheck. - * - * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of - * distinguishing in the source between false positives and allocations that - * cannot be supported (e.g. page tables). */ #define __GFP_COLD ((__force gfp_t)___GFP_COLD) #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) -#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) -#define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) /* Disable lockdep for GFP context tracking */ #define __GFP_NOLOCKDEP ((__force gfp_t)___GFP_NOLOCKDEP) diff --git a/include/linux/slab.h b/include/linux/slab.h index ebddfca9e24b..79f6532f8a0b 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -89,12 +89,6 @@ /* Avoid kmemleak tracing */ #define SLAB_NOLEAKTRACE ((slab_flags_t __force)0x00800000U) -/* Don't track use of uninitialized memory */ -#ifdef CONFIG_KMEMCHECK -# define SLAB_NOTRACK ((slab_flags_t __force)0x01000000U) -#else -# define SLAB_NOTRACK 0 -#endif /* Fault injection mark */ #ifdef CONFIG_FAILSLAB # define SLAB_FAILSLAB ((slab_flags_t __force)0x02000000U) -- cgit v1.2.3 From 4675ff05de2d76d167336b368bd07f3fef6ed5a6 Mon Sep 17 00:00:00 2001 From: "Levin, Alexander (Sasha Levin)" Date: Wed, 15 Nov 2017 17:36:02 -0800 Subject: kmemcheck: rip it out Fix up makefiles, remove references, and git rm kmemcheck. Link: http://lkml.kernel.org/r/20171007030159.22241-4-alexander.levin@verizon.com Signed-off-by: Sasha Levin Cc: Steven Rostedt Cc: Vegard Nossum Cc: Pekka Enberg Cc: Michal Hocko Cc: Eric W. Biederman Cc: Alexander Potapenko Cc: Tim Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/interrupt.h | 15 ---- include/linux/kmemcheck.h | 171 ---------------------------------------------- 2 files changed, 186 deletions(-) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index baeb872283d9..69c238210325 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -594,21 +594,6 @@ static inline void tasklet_hi_schedule(struct tasklet_struct *t) __tasklet_hi_schedule(t); } -extern void __tasklet_hi_schedule_first(struct tasklet_struct *t); - -/* - * This version avoids touching any other tasklets. Needed for kmemcheck - * in order not to take any page faults while enqueueing this tasklet; - * consider VERY carefully whether you really need this or - * tasklet_hi_schedule()... - */ -static inline void tasklet_hi_schedule_first(struct tasklet_struct *t) -{ - if (!test_and_set_bit(TASKLET_STATE_SCHED, &t->state)) - __tasklet_hi_schedule_first(t); -} - - static inline void tasklet_disable_nosync(struct tasklet_struct *t) { atomic_inc(&t->count); diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h index 7b1d7bead7d9..ea32a7d3cf1b 100644 --- a/include/linux/kmemcheck.h +++ b/include/linux/kmemcheck.h @@ -1,172 +1 @@ /* SPDX-License-Identifier: GPL-2.0 */ -#ifndef LINUX_KMEMCHECK_H -#define LINUX_KMEMCHECK_H - -#include -#include - -#ifdef CONFIG_KMEMCHECK -extern int kmemcheck_enabled; - -/* The slab-related functions. */ -void kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node); -void kmemcheck_free_shadow(struct page *page, int order); -void kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, - size_t size); -void kmemcheck_slab_free(struct kmem_cache *s, void *object, size_t size); - -void kmemcheck_pagealloc_alloc(struct page *p, unsigned int order, - gfp_t gfpflags); - -void kmemcheck_show_pages(struct page *p, unsigned int n); -void kmemcheck_hide_pages(struct page *p, unsigned int n); - -bool kmemcheck_page_is_tracked(struct page *p); - -void kmemcheck_mark_unallocated(void *address, unsigned int n); -void kmemcheck_mark_uninitialized(void *address, unsigned int n); -void kmemcheck_mark_initialized(void *address, unsigned int n); -void kmemcheck_mark_freed(void *address, unsigned int n); - -void kmemcheck_mark_unallocated_pages(struct page *p, unsigned int n); -void kmemcheck_mark_uninitialized_pages(struct page *p, unsigned int n); -void kmemcheck_mark_initialized_pages(struct page *p, unsigned int n); - -int kmemcheck_show_addr(unsigned long address); -int kmemcheck_hide_addr(unsigned long address); - -bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size); - -/* - * Bitfield annotations - * - * How to use: If you have a struct using bitfields, for example - * - * struct a { - * int x:8, y:8; - * }; - * - * then this should be rewritten as - * - * struct a { - * kmemcheck_bitfield_begin(flags); - * int x:8, y:8; - * kmemcheck_bitfield_end(flags); - * }; - * - * Now the "flags_begin" and "flags_end" members may be used to refer to the - * beginning and end, respectively, of the bitfield (and things like - * &x.flags_begin is allowed). As soon as the struct is allocated, the bit- - * fields should be annotated: - * - * struct a *a = kmalloc(sizeof(struct a), GFP_KERNEL); - * kmemcheck_annotate_bitfield(a, flags); - */ -#define kmemcheck_bitfield_begin(name) \ - int name##_begin[0]; - -#define kmemcheck_bitfield_end(name) \ - int name##_end[0]; - -#define kmemcheck_annotate_bitfield(ptr, name) \ - do { \ - int _n; \ - \ - if (!ptr) \ - break; \ - \ - _n = (long) &((ptr)->name##_end) \ - - (long) &((ptr)->name##_begin); \ - BUILD_BUG_ON(_n < 0); \ - \ - kmemcheck_mark_initialized(&((ptr)->name##_begin), _n); \ - } while (0) - -#define kmemcheck_annotate_variable(var) \ - do { \ - kmemcheck_mark_initialized(&(var), sizeof(var)); \ - } while (0) \ - -#else -#define kmemcheck_enabled 0 - -static inline void -kmemcheck_alloc_shadow(struct page *page, int order, gfp_t flags, int node) -{ -} - -static inline void -kmemcheck_free_shadow(struct page *page, int order) -{ -} - -static inline void -kmemcheck_slab_alloc(struct kmem_cache *s, gfp_t gfpflags, void *object, - size_t size) -{ -} - -static inline void kmemcheck_slab_free(struct kmem_cache *s, void *object, - size_t size) -{ -} - -static inline void kmemcheck_pagealloc_alloc(struct page *p, - unsigned int order, gfp_t gfpflags) -{ -} - -static inline bool kmemcheck_page_is_tracked(struct page *p) -{ - return false; -} - -static inline void kmemcheck_mark_unallocated(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_uninitialized(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_initialized(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_freed(void *address, unsigned int n) -{ -} - -static inline void kmemcheck_mark_unallocated_pages(struct page *p, - unsigned int n) -{ -} - -static inline void kmemcheck_mark_uninitialized_pages(struct page *p, - unsigned int n) -{ -} - -static inline void kmemcheck_mark_initialized_pages(struct page *p, - unsigned int n) -{ -} - -static inline bool kmemcheck_is_obj_initialized(unsigned long addr, size_t size) -{ - return true; -} - -#define kmemcheck_bitfield_begin(name) -#define kmemcheck_bitfield_end(name) -#define kmemcheck_annotate_bitfield(ptr, name) \ - do { \ - } while (0) - -#define kmemcheck_annotate_variable(var) \ - do { \ - } while (0) - -#endif /* CONFIG_KMEMCHECK */ - -#endif /* LINUX_KMEMCHECK_H */ -- cgit v1.2.3 From ea1f5f3712afe895dfa4176ec87376b4a9ac23be Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 15 Nov 2017 17:36:27 -0800 Subject: mm: define memblock_virt_alloc_try_nid_raw * A new variant of memblock_virt_alloc_* allocations: memblock_virt_alloc_try_nid_raw() - Does not zero the allocated memory - Does not panic if request cannot be satisfied * optimize early system hash allocations Clients can call alloc_large_system_hash() with flag: HASH_ZERO to specify that memory that was allocated for system hash needs to be zeroed, otherwise the memory does not need to be zeroed, and client will initialize it. If memory does not need to be zero'd, call the new memblock_virt_alloc_raw() interface, and thus improve the boot performance. * debug for raw alloctor When CONFIG_DEBUG_VM is enabled, this patch sets all the memory that is returned by memblock_virt_alloc_try_nid_raw() to ones to ensure that no places excpect zeroed memory. Link: http://lkml.kernel.org/r/20171013173214.27300-6-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Reviewed-by: Steven Sistare Reviewed-by: Daniel Jordan Reviewed-by: Bob Picco Tested-by: Bob Picco Acked-by: Michal Hocko Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Christian Borntraeger Cc: David S. Miller Cc: Dmitry Vyukov Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Mark Rutland Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Sam Ravnborg Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bootmem.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bootmem.h b/include/linux/bootmem.h index fdf40ca04b3c..a53063e9d7d8 100644 --- a/include/linux/bootmem.h +++ b/include/linux/bootmem.h @@ -161,6 +161,9 @@ extern void *__alloc_bootmem_low_node(pg_data_t *pgdat, #define BOOTMEM_ALLOC_ANYWHERE (~(phys_addr_t)0) /* FIXME: Move to memblock.h at a point where we remove nobootmem.c */ +void *memblock_virt_alloc_try_nid_raw(phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, + phys_addr_t max_addr, int nid); void *memblock_virt_alloc_try_nid_nopanic(phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid); @@ -177,6 +180,14 @@ static inline void * __init memblock_virt_alloc( NUMA_NO_NODE); } +static inline void * __init memblock_virt_alloc_raw( + phys_addr_t size, phys_addr_t align) +{ + return memblock_virt_alloc_try_nid_raw(size, align, BOOTMEM_LOW_LIMIT, + BOOTMEM_ALLOC_ACCESSIBLE, + NUMA_NO_NODE); +} + static inline void * __init memblock_virt_alloc_nopanic( phys_addr_t size, phys_addr_t align) { @@ -258,6 +269,14 @@ static inline void * __init memblock_virt_alloc( return __alloc_bootmem(size, align, BOOTMEM_LOW_LIMIT); } +static inline void * __init memblock_virt_alloc_raw( + phys_addr_t size, phys_addr_t align) +{ + if (!align) + align = SMP_CACHE_BYTES; + return __alloc_bootmem_nopanic(size, align, BOOTMEM_LOW_LIMIT); +} + static inline void * __init memblock_virt_alloc_nopanic( phys_addr_t size, phys_addr_t align) { @@ -310,6 +329,14 @@ static inline void * __init memblock_virt_alloc_try_nid(phys_addr_t size, min_addr); } +static inline void * __init memblock_virt_alloc_try_nid_raw( + phys_addr_t size, phys_addr_t align, + phys_addr_t min_addr, phys_addr_t max_addr, int nid) +{ + return ___alloc_bootmem_node_nopanic(NODE_DATA(nid), size, align, + min_addr, max_addr); +} + static inline void * __init memblock_virt_alloc_try_nid_nopanic( phys_addr_t size, phys_addr_t align, phys_addr_t min_addr, phys_addr_t max_addr, int nid) -- cgit v1.2.3 From a4a3ede2132ae0863e2d43e06f9b5697c51a7a3b Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 15 Nov 2017 17:36:31 -0800 Subject: mm: zero reserved and unavailable struct pages Some memory is reserved but unavailable: not present in memblock.memory (because not backed by physical pages), but present in memblock.reserved. Such memory has backing struct pages, but they are not initialized by going through __init_single_page(). In some cases these struct pages are accessed even if they do not contain any data. One example is page_to_pfn() might access page->flags if this is where section information is stored (CONFIG_SPARSEMEM, SECTION_IN_PAGE_FLAGS). One example of such memory: trim_low_memory_range() unconditionally reserves from pfn 0, but e820__memblock_setup() might provide the exiting memory from pfn 1 (i.e. KVM). Since struct pages are zeroed in __init_single_page(), and not during allocation time, we must zero such struct pages explicitly. The patch involves adding a new memblock iterator: for_each_resv_unavail_range(i, p_start, p_end) Which iterates through reserved && !memory lists, and we zero struct pages explicitly by calling mm_zero_struct_page(). === Here is more detailed example of problem that this patch is addressing: Run tested on qemu with the following arguments: -enable-kvm -cpu kvm64 -m 512 -smp 2 This patch reports that there are 98 unavailable pages. They are: pfn 0 and pfns in range [159, 255]. Note, trim_low_memory_range() reserves only pfns in range [0, 15], it does not reserve [159, 255] ones. e820__memblock_setup() reports linux that the following physical ranges are available: [1 , 158] [256, 130783] Notice, that exactly unavailable pfns are missing! Now, lets check what we have in zone 0: [1, 131039] pfn 0, is not part of the zone, but pfns [1, 158], are. However, the bigger problem we have if we do not initialize these struct pages is with memory hotplug. Because, that path operates at 2M boundaries (section_nr). And checks if 2M range of pages is hot removable. It starts with first pfn from zone, rounds it down to 2M boundary (sturct pages are allocated at 2M boundaries when vmemmap is created), and checks if that section is hot removable. In this case start with pfn 1 and convert it down to pfn 0. Later pfn is converted to struct page, and some fields are checked. Now, if we do not zero struct pages, we get unpredictable results. In fact when CONFIG_VM_DEBUG is enabled, and we explicitly set all vmemmap memory to ones, the following panic is observed with kernel test without this patch applied: BUG: unable to handle kernel NULL pointer dereference at (null) IP: is_pageblock_removable_nolock+0x35/0x90 PGD 0 P4D 0 Oops: 0000 [#1] PREEMPT ... task: ffff88001f4e2900 task.stack: ffffc90000314000 RIP: 0010:is_pageblock_removable_nolock+0x35/0x90 Call Trace: ? is_mem_section_removable+0x5a/0xd0 show_mem_removable+0x6b/0xa0 dev_attr_show+0x1b/0x50 sysfs_kf_seq_show+0xa1/0x100 kernfs_seq_show+0x22/0x30 seq_read+0x1ac/0x3a0 kernfs_fop_read+0x36/0x190 ? security_file_permission+0x90/0xb0 __vfs_read+0x16/0x30 vfs_read+0x81/0x130 SyS_read+0x44/0xa0 entry_SYSCALL_64_fastpath+0x1f/0xbd Link: http://lkml.kernel.org/r/20171013173214.27300-7-pasha.tatashin@oracle.com Signed-off-by: Pavel Tatashin Reviewed-by: Steven Sistare Reviewed-by: Daniel Jordan Reviewed-by: Bob Picco Tested-by: Bob Picco Acked-by: Michal Hocko Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Ard Biesheuvel Cc: Catalin Marinas Cc: Christian Borntraeger Cc: David S. Miller Cc: Dmitry Vyukov Cc: Heiko Carstens Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Mark Rutland Cc: Matthew Wilcox Cc: Mel Gorman Cc: Michal Hocko Cc: Sam Ravnborg Cc: Thomas Gleixner Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 16 ++++++++++++++++ include/linux/mm.h | 15 +++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index ce0e5634c2f9..7ed0f7782d16 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -237,6 +237,22 @@ unsigned long memblock_next_valid_pfn(unsigned long pfn, unsigned long max_pfn); for_each_mem_range_rev(i, &memblock.memory, &memblock.reserved, \ nid, flags, p_start, p_end, p_nid) +/** + * for_each_resv_unavail_range - iterate through reserved and unavailable memory + * @i: u64 used as loop variable + * @flags: pick from blocks based on memory attributes + * @p_start: ptr to phys_addr_t for start address of the range, can be %NULL + * @p_end: ptr to phys_addr_t for end address of the range, can be %NULL + * + * Walks over unavailable but reserved (reserved && !memory) areas of memblock. + * Available as soon as memblock is initialized. + * Note: because this memory does not belong to any physical node, flags and + * nid arguments do not make sense and thus not exported as arguments. + */ +#define for_each_resv_unavail_range(i, p_start, p_end) \ + for_each_mem_range(i, &memblock.reserved, &memblock.memory, \ + NUMA_NO_NODE, MEMBLOCK_NONE, p_start, p_end, NULL) + static inline void memblock_set_region_flags(struct memblock_region *r, unsigned long flags) { diff --git a/include/linux/mm.h b/include/linux/mm.h index 7c1e82a1aa77..703599fa8288 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -95,6 +95,15 @@ extern int mmap_rnd_compat_bits __read_mostly; #define mm_forbids_zeropage(X) (0) #endif +/* + * On some architectures it is expensive to call memset() for small sizes. + * Those architectures should provide their own implementation of "struct page" + * zeroing by defining this macro in . + */ +#ifndef mm_zero_struct_page +#define mm_zero_struct_page(pp) ((void)memset((pp), 0, sizeof(struct page))) +#endif + /* * Default maximum number of active map areas, this limits the number of vmas * per mm struct. Users can overwrite this number by sysctl but there is a @@ -2030,6 +2039,12 @@ extern int __meminit __early_pfn_to_nid(unsigned long pfn, struct mminit_pfnnid_cache *state); #endif +#ifdef CONFIG_HAVE_MEMBLOCK +void zero_resv_unavail(void); +#else +static inline void zero_resv_unavail(void) {} +#endif + extern void set_dma_reserve(unsigned long new_dma_reserve); extern void memmap_init_zone(unsigned long, int, unsigned long, unsigned long, enum memmap_context); -- cgit v1.2.3 From 736304f3245f39392895ff3392e1325d3e49e7d2 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Nov 2017 17:37:11 -0800 Subject: mm: speed up cancel_dirty_page() for clean pages Patch series "Speed up page cache truncation", v1. When rebasing our enterprise distro to a newer kernel (from 4.4 to 4.12) we have noticed a regression in bonnie++ benchmark when deleting files. Eventually we have tracked this down to a fact that page cache truncation got slower by about 10%. There were both gains and losses in the above interval of kernels but we have been able to identify that commit 83929372f629 ("filemap: prepare find and delete operations for huge pages") caused about 10% regression on its own. After some investigation it didn't seem easily possible to fix the regression while maintaining the THP in page cache functionality so we've decided to optimize the page cache truncation path instead to make up for the change. This series is a result of that effort. Patch 1 is an easy speedup of cancel_dirty_page(). Patches 2-6 refactor page cache truncation code so that it is easier to batch radix tree operations. Patch 7 implements batching of deletes from the radix tree which more than makes up for the original regression. This patch (of 7): cancel_dirty_page() does quite some work even for clean pages (fetching of mapping, locking of memcg, atomic bit op on page flags) so it accounts for ~2.5% of cost of truncation of a clean page. That is not much but still dumb for something we don't need at all. Check whether a page is actually dirty and avoid any work if not. Link: http://lkml.kernel.org/r/20171010151937.26984-2-jack@suse.cz Signed-off-by: Jan Kara Acked-by: Mel Gorman Reviewed-by: Andi Kleen Cc: Dave Hansen Cc: Dave Chinner Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 703599fa8288..c7b1d617dff6 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1440,7 +1440,13 @@ void account_page_cleaned(struct page *page, struct address_space *mapping, struct bdi_writeback *wb); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); -void cancel_dirty_page(struct page *page); +void __cancel_dirty_page(struct page *page); +static inline void cancel_dirty_page(struct page *page) +{ + /* Avoid atomic ops, locking, etc. when not actually needed. */ + if (PageDirty(page)) + __cancel_dirty_page(page); +} int clear_page_dirty_for_io(struct page *page); int get_cmdline(struct task_struct *task, char *buffer, int buflen); -- cgit v1.2.3 From aa65c29ce1b6e1990cd2c7d8004bbea7ff3aff38 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Wed, 15 Nov 2017 17:37:33 -0800 Subject: mm: batch radix tree operations when truncating pages Currently we remove pages from the radix tree one by one. To speed up page cache truncation, lock several pages at once and free them in one go. This allows us to batch radix tree operations in a more efficient way and also save round-trips on mapping->tree_lock. As a result we gain about 20% speed improvement in page cache truncation. Data from a simple benchmark timing 10000 truncates of 1024 pages (on ext4 on ramdisk but the filesystem is barely visible in the profiles). The range shows 1% and 95% percentiles of the measured times: 4.14-rc2 4.14-rc2 + batched truncation 248-256 209-219 249-258 209-217 248-255 211-239 248-255 209-217 247-256 210-218 [jack@suse.cz: convert delete_from_page_cache_batch() to pagevec] Link: http://lkml.kernel.org/r/20171018111648.13714-1-jack@suse.cz [akpm@linux-foundation.org: move struct pagevec forward declaration to top-of-file] Link: http://lkml.kernel.org/r/20171010151937.26984-8-jack@suse.cz Signed-off-by: Jan Kara Acked-by: Mel Gorman Reviewed-by: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: "Kirill A. Shutemov" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 956d6a80e126..e0f7181118fe 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -16,6 +16,8 @@ #include /* for in_interrupt() */ #include +struct pagevec; + /* * Bits in mapping->flags. */ @@ -624,6 +626,8 @@ int add_to_page_cache_lru(struct page *page, struct address_space *mapping, extern void delete_from_page_cache(struct page *page); extern void __delete_from_page_cache(struct page *page, void *shadow); int replace_page_cache_page(struct page *old, struct page *new, gfp_t gfp_mask); +void delete_from_page_cache_batch(struct address_space *mapping, + struct pagevec *pvec); /* * Like add_to_page_cache_locked, but used to add newly allocated pages: -- cgit v1.2.3 From c7df8ad2910e965a6241b6d8f52fd122e26b0315 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:37:41 -0800 Subject: mm, truncate: do not check mapping for every page being truncated During truncation, the mapping has already been checked for shmem and dax so it's known that workingset_update_node is required. This patch avoids the checks on mapping for each page being truncated. In all other cases, a lookup helper is used to determine if workingset_update_node() needs to be called. The one danger is that the API is slightly harder to use as calling workingset_update_node directly without checking for dax or shmem mappings could lead to surprises. However, the API rarely needs to be used and hopefully the comment is enough to give people the hint. sparsetruncate (tiny) 4.14.0-rc4 4.14.0-rc4 oneirq-v1r1 pickhelper-v1r1 Min Time 141.00 ( 0.00%) 140.00 ( 0.71%) 1st-qrtle Time 142.00 ( 0.00%) 141.00 ( 0.70%) 2nd-qrtle Time 142.00 ( 0.00%) 142.00 ( 0.00%) 3rd-qrtle Time 143.00 ( 0.00%) 143.00 ( 0.00%) Max-90% Time 144.00 ( 0.00%) 144.00 ( 0.00%) Max-95% Time 147.00 ( 0.00%) 145.00 ( 1.36%) Max-99% Time 195.00 ( 0.00%) 191.00 ( 2.05%) Max Time 230.00 ( 0.00%) 205.00 ( 10.87%) Amean Time 144.37 ( 0.00%) 143.82 ( 0.38%) Stddev Time 10.44 ( 0.00%) 9.00 ( 13.74%) Coeff Time 7.23 ( 0.00%) 6.26 ( 13.41%) Best99%Amean Time 143.72 ( 0.00%) 143.34 ( 0.26%) Best95%Amean Time 142.37 ( 0.00%) 142.00 ( 0.26%) Best90%Amean Time 142.19 ( 0.00%) 141.85 ( 0.24%) Best75%Amean Time 141.92 ( 0.00%) 141.58 ( 0.24%) Best50%Amean Time 141.69 ( 0.00%) 141.31 ( 0.27%) Best25%Amean Time 141.38 ( 0.00%) 140.97 ( 0.29%) As you'd expect, the gain is marginal but it can be detected. The differences in bonnie are all within the noise which is not surprising given the impact on the microbenchmark. radix_tree_update_node_t is a callback for some radix operations that optionally passes in a private field. The only user of the callback is workingset_update_node and as it no longer requires a mapping, the private field is removed. Link: http://lkml.kernel.org/r/20171018075952.10627-3-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Johannes Weiner Reviewed-by: Jan Kara Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 7 +++---- include/linux/swap.h | 13 ++++++++++++- 2 files changed, 15 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 567ebb5eaab0..0ca448c1cb42 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -301,18 +301,17 @@ void *__radix_tree_lookup(const struct radix_tree_root *, unsigned long index, void *radix_tree_lookup(const struct radix_tree_root *, unsigned long); void __rcu **radix_tree_lookup_slot(const struct radix_tree_root *, unsigned long index); -typedef void (*radix_tree_update_node_t)(struct radix_tree_node *, void *); +typedef void (*radix_tree_update_node_t)(struct radix_tree_node *); void __radix_tree_replace(struct radix_tree_root *, struct radix_tree_node *, void __rcu **slot, void *entry, - radix_tree_update_node_t update_node, void *private); + radix_tree_update_node_t update_node); void radix_tree_iter_replace(struct radix_tree_root *, const struct radix_tree_iter *, void __rcu **slot, void *entry); void radix_tree_replace_slot(struct radix_tree_root *, void __rcu **slot, void *entry); void __radix_tree_delete_node(struct radix_tree_root *, struct radix_tree_node *, - radix_tree_update_node_t update_node, - void *private); + radix_tree_update_node_t update_node); void radix_tree_iter_delete(struct radix_tree_root *, struct radix_tree_iter *iter, void __rcu **slot); void *radix_tree_delete_item(struct radix_tree_root *, unsigned long, void *); diff --git a/include/linux/swap.h b/include/linux/swap.h index 8b8a6f965785..454f042bcdd5 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -298,7 +298,18 @@ struct vma_swap_readahead { void *workingset_eviction(struct address_space *mapping, struct page *page); bool workingset_refault(void *shadow); void workingset_activation(struct page *page); -void workingset_update_node(struct radix_tree_node *node, void *private); + +/* Do not use directly, use workingset_lookup_update */ +void workingset_update_node(struct radix_tree_node *node); + +/* Returns workingset_update_node() if the mapping has shadow entries. */ +#define workingset_lookup_update(mapping) \ +({ \ + radix_tree_update_node_t __helper = workingset_update_node; \ + if (dax_mapping(mapping) || shmem_mapping(mapping)) \ + __helper = NULL; \ + __helper; \ +}) /* linux/mm/page_alloc.c */ extern unsigned long totalram_pages; -- cgit v1.2.3 From d9ed0d08b6c6a882da1d8e75bb3162fc889fd199 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:37:48 -0800 Subject: mm: only drain per-cpu pagevecs once per pagevec usage When a pagevec is initialised on the stack, it is generally used multiple times over a range of pages, looking up entries and then releasing them. On each pagevec_release, the per-cpu deferred LRU pagevecs are drained on the grounds the page being released may be on those queues and the pages may be cache hot. In many cases only the first drain is necessary as it's unlikely that the range of pages being walked is racing against LRU addition. Even if there is such a race, the impact is marginal where as constantly redraining the lru pagevecs costs. This patch ensures that pagevec is only drained once in a given lifecycle without increasing the cache footprint of the pagevec structure. Only sparsetruncate tiny is shown here as large files have many exceptional entries and calls pagecache_release less frequently. sparsetruncate (tiny) 4.14.0-rc4 4.14.0-rc4 batchshadow-v1r1 onedrain-v1r1 Min Time 141.00 ( 0.00%) 141.00 ( 0.00%) 1st-qrtle Time 142.00 ( 0.00%) 142.00 ( 0.00%) 2nd-qrtle Time 142.00 ( 0.00%) 142.00 ( 0.00%) 3rd-qrtle Time 143.00 ( 0.00%) 143.00 ( 0.00%) Max-90% Time 144.00 ( 0.00%) 144.00 ( 0.00%) Max-95% Time 146.00 ( 0.00%) 145.00 ( 0.68%) Max-99% Time 198.00 ( 0.00%) 194.00 ( 2.02%) Max Time 254.00 ( 0.00%) 208.00 ( 18.11%) Amean Time 145.12 ( 0.00%) 144.30 ( 0.56%) Stddev Time 12.74 ( 0.00%) 9.62 ( 24.49%) Coeff Time 8.78 ( 0.00%) 6.67 ( 24.06%) Best99%Amean Time 144.29 ( 0.00%) 143.82 ( 0.32%) Best95%Amean Time 142.68 ( 0.00%) 142.31 ( 0.26%) Best90%Amean Time 142.52 ( 0.00%) 142.19 ( 0.24%) Best75%Amean Time 142.26 ( 0.00%) 141.98 ( 0.20%) Best50%Amean Time 141.90 ( 0.00%) 141.71 ( 0.13%) Best25%Amean Time 141.80 ( 0.00%) 141.43 ( 0.26%) The impact on bonnie is marginal and within the noise because a significant percentage of the file being truncated has been reclaimed and consists of shadow entries which reduce the hotness of the pagevec_release path. Link: http://lkml.kernel.org/r/20171018075952.10627-5-mgorman@techsingularity.net Signed-off-by: Mel Gorman Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Jan Kara Cc: Johannes Weiner Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 4f56e0ad9d00..95c75c858d1f 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -17,7 +17,8 @@ struct address_space; struct pagevec { unsigned long nr; - unsigned long cold; + bool cold; + bool drained; struct page *pages[PAGEVEC_SIZE]; }; @@ -54,6 +55,7 @@ static inline void pagevec_init(struct pagevec *pvec, int cold) { pvec->nr = 0; pvec->cold = cold; + pvec->drained = false; } static inline void pagevec_reinit(struct pagevec *pvec) -- cgit v1.2.3 From 8667982014d6048e0b5e286b6247ff24f48d4cc6 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:37:52 -0800 Subject: mm, pagevec: remove cold parameter for pagevecs Every pagevec_init user claims the pages being released are hot even in cases where it is unlikely the pages are hot. As no one cares about the hotness of pages being released to the allocator, just ditch the parameter. No performance impact is expected as the overhead is marginal. The parameter is removed simply because it is a bit stupid to have a useless parameter copied everywhere. Link: http://lkml.kernel.org/r/20171018075952.10627-6-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Jan Kara Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index 95c75c858d1f..eebefd209424 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -17,7 +17,6 @@ struct address_space; struct pagevec { unsigned long nr; - bool cold; bool drained; struct page *pages[PAGEVEC_SIZE]; }; @@ -51,10 +50,9 @@ static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, return pagevec_lookup_range_tag(pvec, mapping, index, (pgoff_t)-1, tag); } -static inline void pagevec_init(struct pagevec *pvec, int cold) +static inline void pagevec_init(struct pagevec *pvec) { pvec->nr = 0; - pvec->cold = cold; pvec->drained = false; } -- cgit v1.2.3 From c6f92f9fbe7dbcc8903a67229aa88b4077ae4422 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:37:55 -0800 Subject: mm: remove cold parameter for release_pages All callers of release_pages claim the pages being released are cache hot. As no one cares about the hotness of pages being released to the allocator, just ditch the parameter. No performance impact is expected as the overhead is marginal. The parameter is removed simply because it is a bit stupid to have a useless parameter copied everywhere. Link: http://lkml.kernel.org/r/20171018075952.10627-7-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Jan Kara Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 2 +- include/linux/swap.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index e0f7181118fe..4c6790bb7afb 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -118,7 +118,7 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) m->gfp_mask = mask; } -void release_pages(struct page **pages, int nr, bool cold); +void release_pages(struct page **pages, int nr); /* * speculatively take a reference to a page. diff --git a/include/linux/swap.h b/include/linux/swap.h index 454f042bcdd5..c2b8128799c1 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -510,7 +510,7 @@ static inline struct swap_info_struct *swp_swap_info(swp_entry_t entry) #define free_page_and_swap_cache(page) \ put_page(page) #define free_pages_and_swap_cache(pages, nr) \ - release_pages((pages), (nr), false); + release_pages((pages), (nr)); static inline void show_swap_cache_info(void) { -- cgit v1.2.3 From 2d4894b5d2ae0fe1725ea7abd57b33bfbbe45492 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:37:59 -0800 Subject: mm: remove cold parameter from free_hot_cold_page* Most callers users of free_hot_cold_page claim the pages being released are cache hot. The exception is the page reclaim paths where it is likely that enough pages will be freed in the near future that the per-cpu lists are going to be recycled and the cache hotness information is lost. As no one really cares about the hotness of pages being released to the allocator, just ditch the parameter. The APIs are renamed to indicate that it's no longer about hot/cold pages. It should also be less confusing as there are subtle differences between them. __free_pages drops a reference and frees a page when the refcount reaches zero. free_hot_cold_page handled pages whose refcount was already zero which is non-obvious from the name. free_unref_page should be more obvious. No performance impact is expected as the overhead is marginal. The parameter is removed simply because it is a bit stupid to have a useless parameter copied everywhere. [mgorman@techsingularity.net: add pages to head, not tail] Link: http://lkml.kernel.org/r/20171019154321.qtpzaeftoyyw4iey@techsingularity.net Link: http://lkml.kernel.org/r/20171018075952.10627-8-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Jan Kara Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b041f94678de..f7e62d9096fe 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -530,8 +530,8 @@ void * __meminit alloc_pages_exact_nid(int nid, size_t size, gfp_t gfp_mask); extern void __free_pages(struct page *page, unsigned int order); extern void free_pages(unsigned long addr, unsigned int order); -extern void free_hot_cold_page(struct page *page, bool cold); -extern void free_hot_cold_page_list(struct list_head *list, bool cold); +extern void free_unref_page(struct page *page); +extern void free_unref_page_list(struct list_head *list); struct page_frag_cache; extern void __page_frag_cache_drain(struct page *page, unsigned int count); -- cgit v1.2.3 From 453f85d43fa9ee243f0fc3ac4e1be45615301e3f Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:38:03 -0800 Subject: mm: remove __GFP_COLD As the page free path makes no distinction between cache hot and cold pages, there is no real useful ordering of pages in the free list that allocation requests can take advantage of. Juding from the users of __GFP_COLD, it is likely that a number of them are the result of copying other sites instead of actually measuring the impact. Remove the __GFP_COLD parameter which simplifies a number of paths in the page allocator. This is potentially controversial but bear in mind that the size of the per-cpu pagelists versus modern cache sizes means that the whole per-cpu list can often fit in the L3 cache. Hence, there is only a potential benefit for microbenchmarks that alloc/free pages in a tight loop. It's even worse when THP is taken into account which has little or no chance of getting a cache-hot page as the per-cpu list is bypassed and the zeroing of multiple pages will thrash the cache anyway. The truncate microbenchmarks are not shown as this patch affects the allocation path and not the free path. A page fault microbenchmark was tested but it showed no sigificant difference which is not surprising given that the __GFP_COLD branches are a miniscule percentage of the fault path. Link: http://lkml.kernel.org/r/20171018075952.10627-9-mgorman@techsingularity.net Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Jan Kara Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 5 ----- include/linux/pagemap.h | 8 +------- include/linux/skbuff.h | 2 +- include/linux/slab.h | 3 --- 4 files changed, 2 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f7e62d9096fe..1a4582b44d32 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -24,7 +24,6 @@ struct vm_area_struct; #define ___GFP_HIGH 0x20u #define ___GFP_IO 0x40u #define ___GFP_FS 0x80u -#define ___GFP_COLD 0x100u #define ___GFP_NOWARN 0x200u #define ___GFP_RETRY_MAYFAIL 0x400u #define ___GFP_NOFAIL 0x800u @@ -192,16 +191,12 @@ struct vm_area_struct; /* * Action modifiers * - * __GFP_COLD indicates that the caller does not expect to be used in the near - * future. Where possible, a cache-cold page will be returned. - * * __GFP_NOWARN suppresses allocation failure reports. * * __GFP_COMP address compound page metadata. * * __GFP_ZERO returns a zeroed page on success. */ -#define __GFP_COLD ((__force gfp_t)___GFP_COLD) #define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) #define __GFP_COMP ((__force gfp_t)___GFP_COMP) #define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 4c6790bb7afb..34ce3ebf97d5 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -234,15 +234,9 @@ static inline struct page *page_cache_alloc(struct address_space *x) return __page_cache_alloc(mapping_gfp_mask(x)); } -static inline struct page *page_cache_alloc_cold(struct address_space *x) -{ - return __page_cache_alloc(mapping_gfp_mask(x)|__GFP_COLD); -} - static inline gfp_t readahead_gfp_mask(struct address_space *x) { - return mapping_gfp_mask(x) | - __GFP_COLD | __GFP_NORETRY | __GFP_NOWARN; + return mapping_gfp_mask(x) | __GFP_NORETRY | __GFP_NOWARN; } typedef int filler_t(void *, struct page *); diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index aa1341474916..7c46fd0b8b64 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2672,7 +2672,7 @@ static inline struct page *__dev_alloc_pages(gfp_t gfp_mask, * 4. __GFP_MEMALLOC is ignored if __GFP_NOMEMALLOC is set due to * code in gfp_to_alloc_flags that should be enforcing this. */ - gfp_mask |= __GFP_COLD | __GFP_COMP | __GFP_MEMALLOC; + gfp_mask |= __GFP_COMP | __GFP_MEMALLOC; return alloc_pages_node(NUMA_NO_NODE, gfp_mask, order); } diff --git a/include/linux/slab.h b/include/linux/slab.h index 79f6532f8a0b..50697a1d6621 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -467,9 +467,6 @@ static __always_inline void *kmalloc_large(size_t size, gfp_t flags) * Also it is possible to set different flags by OR'ing * in one or more of the following additional @flags: * - * %__GFP_COLD - Request cache-cold pages instead of - * trying to return cache-warm pages. - * * %__GFP_HIGH - This allocation has high priority and may use emergency pools. * * %__GFP_NOFAIL - Indicate that this allocation is in no way allowed to fail -- cgit v1.2.3 From 7f0b5fb953e750a7410cc96c67a656d79db48bcb Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Wed, 15 Nov 2017 17:38:10 -0800 Subject: mm, pagevec: rename pagevec drained field According to Vlastimil Babka, the drained field in pagevec is potentially misleading because it might be interpreted as draining this pagevec instead of the percpu lru pagevecs. Rename the field for clarity. Link: http://lkml.kernel.org/r/20171019093346.ylahzdpzmoriyf4v@techsingularity.net Signed-off-by: Mel Gorman Suggested-by: Vlastimil Babka Cc: Andi Kleen Cc: Dave Chinner Cc: Dave Hansen Cc: Jan Kara Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagevec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagevec.h b/include/linux/pagevec.h index eebefd209424..5fb6580f7f23 100644 --- a/include/linux/pagevec.h +++ b/include/linux/pagevec.h @@ -17,7 +17,7 @@ struct address_space; struct pagevec { unsigned long nr; - bool drained; + bool percpu_pvec_drained; struct page *pages[PAGEVEC_SIZE]; }; @@ -53,7 +53,7 @@ static inline unsigned pagevec_lookup_tag(struct pagevec *pvec, static inline void pagevec_init(struct pagevec *pvec) { pvec->nr = 0; - pvec->drained = false; + pvec->percpu_pvec_drained = false; } static inline void pagevec_reinit(struct pagevec *pvec) -- cgit v1.2.3 From 4518085e127dff97e74f74a8780d7564e273bec8 Mon Sep 17 00:00:00 2001 From: Kemi Wang Date: Wed, 15 Nov 2017 17:38:22 -0800 Subject: mm, sysctl: make NUMA stats configurable This is the second step which introduces a tunable interface that allow numa stats configurable for optimizing zone_statistics(), as suggested by Dave Hansen and Ying Huang. ========================================================================= When page allocation performance becomes a bottleneck and you can tolerate some possible tool breakage and decreased numa counter precision, you can do: echo 0 > /proc/sys/vm/numa_stat In this case, numa counter update is ignored. We can see about *4.8%*(185->176) drop of cpu cycles per single page allocation and reclaim on Jesper's page_bench01 (single thread) and *8.1%*(343->315) drop of cpu cycles per single page allocation and reclaim on Jesper's page_bench03 (88 threads) running on a 2-Socket Broadwell-based server (88 threads, 126G memory). Benchmark link provided by Jesper D Brouer (increase loop times to 10000000): https://github.com/netoptimizer/prototype-kernel/tree/master/kernel/mm/bench ========================================================================= When page allocation performance is not a bottleneck and you want all tooling to work, you can do: echo 1 > /proc/sys/vm/numa_stat This is system default setting. Many thanks to Michal Hocko, Dave Hansen, Ying Huang and Vlastimil Babka for comments to help improve the original patch. [keescook@chromium.org: make sure mutex is a global static] Link: http://lkml.kernel.org/r/20171107213809.GA4314@beast Link: http://lkml.kernel.org/r/1508290927-8518-1-git-send-email-kemi.wang@intel.com Signed-off-by: Kemi Wang Signed-off-by: Kees Cook Reported-by: Jesper Dangaard Brouer Suggested-by: Dave Hansen Suggested-by: Ying Huang Acked-by: Vlastimil Babka Acked-by: Michal Hocko Cc: "Luis R . Rodriguez" Cc: Kees Cook Cc: Jonathan Corbet Cc: Mel Gorman Cc: Johannes Weiner Cc: Christopher Lameter Cc: Sebastian Andrzej Siewior Cc: Andrey Ryabinin Cc: Tim Chen Cc: Andi Kleen Cc: Aaron Lu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 1e0cb72e0598..1779c9817b39 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -7,9 +7,19 @@ #include #include #include +#include extern int sysctl_stat_interval; +#ifdef CONFIG_NUMA +#define ENABLE_NUMA_STAT 1 +#define DISABLE_NUMA_STAT 0 +extern int sysctl_vm_numa_stat; +DECLARE_STATIC_KEY_TRUE(vm_numa_stat_key); +extern int sysctl_vm_numa_stat_handler(struct ctl_table *table, + int write, void __user *buffer, size_t *length, loff_t *ppos); +#endif + #ifdef CONFIG_VM_EVENT_COUNTERS /* * Light weight per cpu counter implementation. -- cgit v1.2.3 From d135e5750205a21a212a19dbb05aeb339e2cbea7 Mon Sep 17 00:00:00 2001 From: Pavel Tatashin Date: Wed, 15 Nov 2017 17:38:41 -0800 Subject: mm/page_alloc.c: broken deferred calculation In reset_deferred_meminit() we determine number of pages that must not be deferred. We initialize pages for at least 2G of memory, but also pages for reserved memory in this node. The reserved memory is determined in this function: memblock_reserved_memory_within(), which operates over physical addresses, and returns size in bytes. However, reset_deferred_meminit() assumes that that this function operates with pfns, and returns page count. The result is that in the best case machine boots slower than expected due to initializing more pages than needed in single thread, and in the worst case panics because fewer than needed pages are initialized early. Link: http://lkml.kernel.org/r/20171021011707.15191-1-pasha.tatashin@oracle.com Fixes: 864b9a393dcb ("mm: consider memblock reservations for deferred memory initialization sizing") Signed-off-by: Pavel Tatashin Acked-by: Michal Hocko Cc: Mel Gorman Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 39ccaa9c428b..67f2e3c38939 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -700,7 +700,8 @@ typedef struct pglist_data { * is the first PFN that needs to be initialised. */ unsigned long first_deferred_pfn; - unsigned long static_init_size; + /* Number of non-deferred pages */ + unsigned long static_init_pgcnt; #endif /* CONFIG_DEFERRED_STRUCT_PAGE_INIT */ #ifdef CONFIG_TRANSPARENT_HUGEPAGE -- cgit v1.2.3 From 2bce774e8245e95db81872ec39522cde8b486fc8 Mon Sep 17 00:00:00 2001 From: Wang Long Date: Wed, 15 Nov 2017 17:39:03 -0800 Subject: writeback: remove unused function parameter The parameter `struct bdi_writeback *wb` is not been used in the function body. Remove it. Link: http://lkml.kernel.org/r/1509685485-15278-1-git-send-email-wanglong19@meituan.com Signed-off-by: Wang Long Reviewed-by: Jan Kara Acked-by: Tejun Heo Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/backing-dev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 7360e79e9a2f..e54e7e0033eb 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -93,7 +93,7 @@ extern void wb_writeout_inc(struct bdi_writeback *wb); /* * maximal error of a stat counter. */ -static inline unsigned long wb_stat_error(struct bdi_writeback *wb) +static inline unsigned long wb_stat_error(void) { #ifdef CONFIG_SMP return nr_cpu_ids * WB_STAT_BATCH; -- cgit v1.2.3 From 0205f75571e3a70c35f0dd5e608773cce97d9dbb Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 15 Nov 2017 17:39:14 -0800 Subject: mm: simplify nodemask printing alloc_warn() and dump_header() have to explicitly handle NULL nodemask which forces both paths to use pr_cont. We can do better. printk already handles NULL pointers properly so all we need is to teach nodemask_pr_args to handle NULL nodemask carefully. This allows simplification of both alloc_warn() and dump_header() and gets rid of pr_cont altogether. This patch has been motivated by patch from Joe Perches http://lkml.kernel.org/r/b31236dfe3fc924054fd7842bde678e71d193638.1509991345.git.joe@perches.com [akpm@linux-foundation.org: fix tile warning, per Arnd] Link: http://lkml.kernel.org/r/20171109100531.3cn2hcqnuj7mjaju@dhcp22.suse.cz Signed-off-by: Michal Hocko Acked-by: Joe Perches Cc: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nodemask.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index de1c50b93c61..15cab3967d6d 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -104,7 +104,9 @@ extern nodemask_t _unused_nodemask_arg_; * * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. */ -#define nodemask_pr_args(maskp) MAX_NUMNODES, (maskp)->bits +#define nodemask_pr_args(maskp) \ + ((maskp) != NULL) ? MAX_NUMNODES : 0, \ + ((maskp) != NULL) ? (maskp)->bits : NULL /* * The inline keyword gives the compiler room to decide to inline, or -- cgit v1.2.3 From 2f62b5aa4814be2c511553fd6afb4d35b6c2503b Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:37 +0300 Subject: fs, nfs: convert nfs_lock_context.count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs_lock_context.count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- include/linux/nfs_fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 453f491a5fda..e6706913e6c2 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -22,6 +22,7 @@ #include #include #include +#include #include #include @@ -55,7 +56,7 @@ struct nfs_access_entry { }; struct nfs_lock_context { - atomic_t count; + refcount_t count; struct list_head list; struct nfs_open_context *open_context; fl_owner_t lockowner; -- cgit v1.2.3 From 212bf41d88c06afc23e03f9b274eebf1e8dba197 Mon Sep 17 00:00:00 2001 From: Elena Reshetova Date: Fri, 20 Oct 2017 12:53:38 +0300 Subject: fs, nfs: convert nfs_client.cl_count from atomic_t to refcount_t atomic_t variables are currently used to implement reference counters with the following properties: - counter is initialized to 1 using atomic_set() - a resource is freed upon counter reaching zero - once counter reaches zero, its further increments aren't allowed - counter schema uses basic atomic operations (set, inc, inc_not_zero, dec_and_test, etc.) Such atomic variables should be converted to a newly provided refcount_t type and API that prevents accidental counter overflows and underflows. This is important since overflows and underflows can lead to use-after-free situation and be exploitable. The variable nfs_client.cl_count is used as pure reference counter. Convert it to refcount_t and fix up the operations. Suggested-by: Kees Cook Reviewed-by: David Windsor Reviewed-by: Hans Liljestrand Signed-off-by: Elena Reshetova Signed-off-by: Anna Schumaker --- include/linux/nfs_fs_sb.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 74c44665e6d3..efcfe9ded9ea 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -9,6 +9,7 @@ #include #include +#include struct nfs4_session; struct nfs_iostats; @@ -24,7 +25,7 @@ struct nfs41_impl_id; * The nfs_client identifies our client state to the server. */ struct nfs_client { - atomic_t cl_count; + refcount_t cl_count; atomic_t cl_mds_count; int cl_cons_state; /* current construction state (-ve: init error) */ #define NFS_CS_READY 0 /* ready to be used */ -- cgit v1.2.3 From 2232df5ece121fd7049ccff95cbb3acfab278d75 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Oct 2017 16:21:57 -0400 Subject: rpcrdma: Remove C structure definitions of XDR data items Clean up: C-structure style XDR encoding and decoding logic has been replaced over the past several merge windows on both the client and server. These data structures are no longer used. Signed-off-by: Chuck Lever Reviewed-by: Devesh Sharma Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rpc_rdma.h | 59 ----------------------------------------- 1 file changed, 59 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index b7e85b341a54..840afac16272 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -50,65 +50,6 @@ enum { RPCRDMA_V1_DEF_INLINE_SIZE = 1024, }; -struct rpcrdma_segment { - __be32 rs_handle; /* Registered memory handle */ - __be32 rs_length; /* Length of the chunk in bytes */ - __be64 rs_offset; /* Chunk virtual address or offset */ -}; - -/* - * read chunk(s), encoded as a linked list. - */ -struct rpcrdma_read_chunk { - __be32 rc_discrim; /* 1 indicates presence */ - __be32 rc_position; /* Position in XDR stream */ - struct rpcrdma_segment rc_target; -}; - -/* - * write chunk, and reply chunk. - */ -struct rpcrdma_write_chunk { - struct rpcrdma_segment wc_target; -}; - -/* - * write chunk(s), encoded as a counted array. - */ -struct rpcrdma_write_array { - __be32 wc_discrim; /* 1 indicates presence */ - __be32 wc_nchunks; /* Array count */ - struct rpcrdma_write_chunk wc_array[0]; -}; - -struct rpcrdma_msg { - __be32 rm_xid; /* Mirrors the RPC header xid */ - __be32 rm_vers; /* Version of this protocol */ - __be32 rm_credit; /* Buffers requested/granted */ - __be32 rm_type; /* Type of message (enum rpcrdma_proc) */ - union { - - struct { /* no chunks */ - __be32 rm_empty[3]; /* 3 empty chunk lists */ - } rm_nochunks; - - struct { /* no chunks and padded */ - __be32 rm_align; /* Padding alignment */ - __be32 rm_thresh; /* Padding threshold */ - __be32 rm_pempty[3]; /* 3 empty chunk lists */ - } rm_padded; - - struct { - __be32 rm_err; - __be32 rm_vers_low; - __be32 rm_vers_high; - } rm_error; - - __be32 rm_chunks[0]; /* read, write and reply chunks */ - - } rm_body; -}; - /* * XDR sizes, in quads */ -- cgit v1.2.3 From 62b56a675565a2e40f2cdf50455977448fd87413 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Mon, 30 Oct 2017 16:22:14 -0400 Subject: xprtrdma: Update copyright notices Credit work contributed by Oracle engineers since 2014. Signed-off-by: Chuck Lever Signed-off-by: Anna Schumaker --- include/linux/sunrpc/rpc_rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/rpc_rdma.h b/include/linux/sunrpc/rpc_rdma.h index 840afac16272..8f144db73e38 100644 --- a/include/linux/sunrpc/rpc_rdma.h +++ b/include/linux/sunrpc/rpc_rdma.h @@ -1,4 +1,5 @@ /* + * Copyright (c) 2015-2017 Oracle. All rights reserved. * Copyright (c) 2003-2007 Network Appliance, Inc. All rights reserved. * * This software is available to you under a choice of one of two -- cgit v1.2.3 From 1334be3657dd02af0591d6d8adf0e6a60a7710a6 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 17 Nov 2017 15:26:12 -0800 Subject: mm: fix nodemask printing The cleanup caused build warnings for constant mask pointers: mm/mempolicy.c: In function `mpol_to_str': ./include/linux/nodemask.h:108:11: warning: the comparison will always evaluate as `true' for the address of `nodes' will never be NULL [-Waddress] An earlier workaround I suggested was incorporated in the version that got merged, but that only solved the problem for gcc-7 and higher, while gcc-4.6 through gcc-6.x still warn. This changes the printing again to use inline functions that make it clear to the compiler that the line that does the NULL check has no idea whether the argument is a constant NULL. Link: http://lkml.kernel.org/r/20171117101545.119689-1-arnd@arndb.de Fixes: 0205f75571e3 ("mm: simplify nodemask printing") Signed-off-by: Arnd Bergmann Cc: Michal Hocko Cc: Stephen Rothwell Cc: Zhangshaokun Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nodemask.h | 13 ++++++++++--- 1 file changed, 10 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nodemask.h b/include/linux/nodemask.h index 15cab3967d6d..1fbde8a880d9 100644 --- a/include/linux/nodemask.h +++ b/include/linux/nodemask.h @@ -104,9 +104,16 @@ extern nodemask_t _unused_nodemask_arg_; * * Can be used to provide arguments for '%*pb[l]' when printing a nodemask. */ -#define nodemask_pr_args(maskp) \ - ((maskp) != NULL) ? MAX_NUMNODES : 0, \ - ((maskp) != NULL) ? (maskp)->bits : NULL +#define nodemask_pr_args(maskp) __nodemask_pr_numnodes(maskp), \ + __nodemask_pr_bits(maskp) +static inline unsigned int __nodemask_pr_numnodes(const nodemask_t *m) +{ + return m ? MAX_NUMNODES : 0; +} +static inline const unsigned long *__nodemask_pr_bits(const nodemask_t *m) +{ + return m ? m->bits : NULL; +} /* * The inline keyword gives the compiler room to decide to inline, or -- cgit v1.2.3 From 21dc7e023611fbcf8e38f255731bcf3cc38e7638 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Fri, 17 Nov 2017 15:26:30 -0800 Subject: mm, compaction: persistently skip hugetlbfs pageblocks It is pointless to migrate hugetlb memory as part of memory compaction if the hugetlb size is equal to the pageblock order. No defragmentation is occurring in this condition. It is also pointless to for the freeing scanner to scan a pageblock where a hugetlb page is pinned. Unconditionally skip these pageblocks, and do so peristently so that they are not rescanned until it is observed that these hugepages are no longer pinned. It would also be possible to do this by involving the hugetlb subsystem in marking pageblocks to no longer be skipped when they hugetlb pages are freed. This is a simple solution that doesn't involve any additional subsystems in pageblock skip manipulation. [rientjes@google.com: fix build] Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1708201734390.117182@chino.kir.corp.google.com Link: http://lkml.kernel.org/r/alpine.DEB.2.10.1708151639130.106658@chino.kir.corp.google.com Signed-off-by: David Rientjes Tested-by: Michal Hocko Cc: Vlastimil Babka Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pageblock-flags.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index e942558b3585..9132c5cb41f1 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -96,6 +96,17 @@ void set_pfnblock_flags_mask(struct page *page, #define set_pageblock_skip(page) \ set_pageblock_flags_group(page, 1, PB_migrate_skip, \ PB_migrate_skip) +#else +static inline bool get_pageblock_skip(struct page *page) +{ + return false; +} +static inline void clear_pageblock_skip(struct page *page) +{ +} +static inline void set_pageblock_skip(struct page *page) +{ +} #endif /* CONFIG_COMPACTION */ #endif /* PAGEBLOCK_FLAGS_H */ -- cgit v1.2.3 From aaf5dcfb223617ac2d16113e4b500199c65689de Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Fri, 17 Nov 2017 15:27:06 -0800 Subject: kernel debug: support resetting WARN_ONCE for all architectures Some architectures store the WARN_ONCE state in the flags field of the bug_entry. Clear that one too when resetting once state through /sys/kernel/debug/clear_warn_once Pointed out by Michael Ellerman Improves the earlier patch that add clear_warn_once. [ak@linux.intel.com: add a missing ifdef CONFIG_MODULES] Link: http://lkml.kernel.org/r/20171020170633.9593-1-andi@firstfloor.org [akpm@linux-foundation.org: fix unused var warning] [akpm@linux-foundation.org: Use 0200 for clear_warn_once file, per mpe] [akpm@linux-foundation.org: clear BUGFLAG_DONE in clear_once_table(), per mpe] Link: http://lkml.kernel.org/r/20171019204642.7404-1-andi@firstfloor.org Signed-off-by: Andi Kleen Tested-by: Michael Ellerman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bug.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bug.h b/include/linux/bug.h index da4231c905c8..fe5916550da8 100644 --- a/include/linux/bug.h +++ b/include/linux/bug.h @@ -43,6 +43,8 @@ enum bug_trap_type report_bug(unsigned long bug_addr, struct pt_regs *regs); /* These are defined by the architecture */ int is_valid_bugaddr(unsigned long addr); +void generic_bug_clear_once(void); + #else /* !CONFIG_GENERIC_BUG */ static inline enum bug_trap_type report_bug(unsigned long bug_addr, @@ -51,6 +53,9 @@ static inline enum bug_trap_type report_bug(unsigned long bug_addr, return BUG_TRAP_TYPE_BUG; } + +static inline void generic_bug_clear_once(void) {} + #endif /* CONFIG_GENERIC_BUG */ /* -- cgit v1.2.3 From d15809f3649e2ee04713a9dba9aa7bd2c208ad82 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 17 Nov 2017 15:27:13 -0800 Subject: iopoll: avoid -Wint-in-bool-context warning When we pass the result of a multiplication as the timeout or the delay, we can get a warning from gcc-7: drivers/mmc/host/bcm2835.c:596:149: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] drivers/mfd/arizona-core.c:247:195: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] drivers/gpu/drm/sun4i/sun4i_hdmi_i2c.c:49:27: error: '*' in boolean context, suggest '&&' instead [-Werror=int-in-bool-context] The warning is a bit questionable inside of a macro, but this is intentional on the side of the gcc developers. It is also an indication of another problem: we evaluate the timeout and sleep arguments multiple times, which can have undesired side-effects when those are complex expressions. This changes the two iopoll variants to use local variables for storing copies of the timeouts. This adds some more type safety, and avoids both the double-evaluation and the gcc warning. Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=81484 Link: http://lkml.kernel.org/r/20170726133756.2161367-1-arnd@arndb.de Link: http://lkml.kernel.org/r/20171102114048.1526955-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Reviewed-by: Mark Brown Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/iopoll.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iopoll.h b/include/linux/iopoll.h index d29e1e21bf3f..b1d861caca16 100644 --- a/include/linux/iopoll.h +++ b/include/linux/iopoll.h @@ -42,18 +42,21 @@ */ #define readx_poll_timeout(op, addr, val, cond, sleep_us, timeout_us) \ ({ \ - ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ - might_sleep_if(sleep_us); \ + u64 __timeout_us = (timeout_us); \ + unsigned long __sleep_us = (sleep_us); \ + ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ + might_sleep_if((__sleep_us) != 0); \ for (;;) { \ (val) = op(addr); \ if (cond) \ break; \ - if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + if (__timeout_us && \ + ktime_compare(ktime_get(), __timeout) > 0) { \ (val) = op(addr); \ break; \ } \ - if (sleep_us) \ - usleep_range((sleep_us >> 2) + 1, sleep_us); \ + if (__sleep_us) \ + usleep_range((__sleep_us >> 2) + 1, __sleep_us); \ } \ (cond) ? 0 : -ETIMEDOUT; \ }) @@ -77,17 +80,20 @@ */ #define readx_poll_timeout_atomic(op, addr, val, cond, delay_us, timeout_us) \ ({ \ - ktime_t timeout = ktime_add_us(ktime_get(), timeout_us); \ + u64 __timeout_us = (timeout_us); \ + unsigned long __delay_us = (delay_us); \ + ktime_t __timeout = ktime_add_us(ktime_get(), __timeout_us); \ for (;;) { \ (val) = op(addr); \ if (cond) \ break; \ - if (timeout_us && ktime_compare(ktime_get(), timeout) > 0) { \ + if (__timeout_us && \ + ktime_compare(ktime_get(), __timeout) > 0) { \ (val) = op(addr); \ break; \ } \ - if (delay_us) \ - udelay(delay_us); \ + if (__delay_us) \ + udelay(__delay_us); \ } \ (cond) ? 0 : -ETIMEDOUT; \ }) -- cgit v1.2.3 From 4ca59b14e588f873795a11cdc77a25c686a29d23 Mon Sep 17 00:00:00 2001 From: Sandipan Das Date: Fri, 17 Nov 2017 15:27:28 -0800 Subject: include/linux/compiler-clang.h: handle randomizable anonymous structs The GCC randomize layout plugin can randomize the member offsets of sensitive kernel data structures. To use this feature, certain annotations and members are added to the structures which affect the member offsets even if this plugin is not used. All of these structures are completely randomized, except for task_struct which leaves out some of its members. All the other members are wrapped within an anonymous struct with the __randomize_layout attribute. This is done using the randomized_struct_fields_start and randomized_struct_fields_end defines. When the plugin is disabled, the behaviour of this attribute can vary based on the GCC version. For GCC 5.1+, this attribute maps to __designated_init otherwise it is just an empty define but the anonymous structure is still present. For other compilers, both randomized_struct_fields_start and randomized_struct_fields_end default to empty defines meaning the anonymous structure is not introduced at all. So, if a module compiled with Clang, such as a BPF program, needs to access task_struct fields such as pid and comm, the offsets of these members as recognized by Clang are different from those recognized by modules compiled with GCC. If GCC 4.6+ is used to build the kernel, this can be solved by introducing appropriate defines for Clang so that the anonymous structure is seen when determining the offsets for the members. Link: http://lkml.kernel.org/r/20171109064645.25581-1-sandipan@linux.vnet.ibm.com Signed-off-by: Sandipan Das Cc: David Rientjes Cc: Greg Kroah-Hartman Cc: Kate Stewart Cc: Thomas Gleixner Cc: Naveen N. Rao Cc: Alexei Starovoitov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-clang.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h index a06583e41f80..3b609edffa8f 100644 --- a/include/linux/compiler-clang.h +++ b/include/linux/compiler-clang.h @@ -16,3 +16,6 @@ * with any version that can compile the kernel */ #define __UNIQUE_ID(prefix) __PASTE(__PASTE(__UNIQUE_ID_, prefix), __COUNTER__) + +#define randomized_struct_fields_start struct { +#define randomized_struct_fields_end }; -- cgit v1.2.3 From 8001541cc333b1a3c2c38e3b34475fa446b053da Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 17 Nov 2017 15:27:49 -0800 Subject: include/linux/bitfield.h: include instead of Since commit bc6245e5efd7 ("bug: split BUILD_BUG stuff out into "), #include is better to pull minimal headers needed for BUILG_BUG() family. Link: http://lkml.kernel.org/r/1505700775-19826-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Acked-by: Jakub Kicinski Cc: Dinan Gunawardena Cc: Kalle Valo Cc: Ian Abbott Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitfield.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index f2deb71958b2..1030651f8309 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -15,7 +15,7 @@ #ifndef _LINUX_BITFIELD_H #define _LINUX_BITFIELD_H -#include +#include /* * Bitfield access macros -- cgit v1.2.3 From f5bba9d11a256ad2a1c2f8e7fc6aabe6416b7890 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Fri, 17 Nov 2017 15:27:53 -0800 Subject: include/linux/radix-tree.h: remove unneeded #include This include was added by commit 187f1882b5b0 ("BUG: headers with BUG/BUG_ON etc. need linux/bug.h") because BUG_ON() was used in this header at that time. Some time later, commit 6d75f366b924 ("lib: radix-tree: check accounting of existing slot replacement users") removed the use of BUG_ON() from this header. Since then, there is no reason to include . Link: http://lkml.kernel.org/r/1505660151-4383-1-git-send-email-yamada.masahiro@socionext.com Signed-off-by: Masahiro Yamada Cc: Matthew Wilcox Cc: Masahiro Yamada Cc: Jan Kara Cc: Johannes Weiner Cc: Chris Mi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/radix-tree.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/radix-tree.h b/include/linux/radix-tree.h index 0ca448c1cb42..23a9c89c7ad9 100644 --- a/include/linux/radix-tree.h +++ b/include/linux/radix-tree.h @@ -22,7 +22,6 @@ #define _LINUX_RADIX_TREE_H #include -#include #include #include #include -- cgit v1.2.3 From 36a3d1dd4e16bcd0d2ddfb4a2ec7092f0ae0d931 Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Fri, 17 Nov 2017 15:28:16 -0800 Subject: lib/genalloc.c: make the avail variable an atomic_long_t If the amount of resources allocated to a gen_pool exceeds 2^32 then the avail atomic overflows and this causes problems when clients try and borrow resources from the pool. This is only expected to be an issue on 64 bit systems. Add the header to pull in atomic_long* operations. So that 32 bit systems continue to use atomic32_t but 64 bit systems can use atomic64_t. Link: http://lkml.kernel.org/r/1509033843-25667-1-git-send-email-sbates@raithlin.com Signed-off-by: Stephen Bates Reviewed-by: Logan Gunthorpe Reviewed-by: Mathieu Desnoyers Reviewed-by: Daniel Mentz Cc: Jonathan Corbet Cc: Andrew Morton Cc: Will Deacon Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/genalloc.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genalloc.h b/include/linux/genalloc.h index 6dfec4d638df..872f930f1b06 100644 --- a/include/linux/genalloc.h +++ b/include/linux/genalloc.h @@ -32,6 +32,7 @@ #include #include +#include struct device; struct device_node; @@ -71,7 +72,7 @@ struct gen_pool { */ struct gen_pool_chunk { struct list_head next_chunk; /* next chunk in pool */ - atomic_t avail; + atomic_long_t avail; phys_addr_t phys_addr; /* physical starting address of memory chunk */ unsigned long start_addr; /* start address of memory chunk */ unsigned long end_addr; /* end address of memory chunk (inclusive) */ -- cgit v1.2.3 From 7a8d181949fb2c16be00f8cdb354794a30e46b39 Mon Sep 17 00:00:00 2001 From: Joe Lawrence Date: Fri, 17 Nov 2017 15:29:24 -0800 Subject: pipe: add proc_dopipe_max_size() to safely assign pipe_max_size pipe_max_size is assigned directly via procfs sysctl: static struct ctl_table fs_table[] = { ... { .procname = "pipe-max-size", .data = &pipe_max_size, .maxlen = sizeof(int), .mode = 0644, .proc_handler = &pipe_proc_fn, .extra1 = &pipe_min_size, }, ... int pipe_proc_fn(struct ctl_table *table, int write, void __user *buf, size_t *lenp, loff_t *ppos) { ... ret = proc_dointvec_minmax(table, write, buf, lenp, ppos) ... and then later rounded in-place a few statements later: ... pipe_max_size = round_pipe_size(pipe_max_size); ... This leaves a window of time between initial assignment and rounding that may be visible to other threads. (For example, one thread sets a non-rounded value to pipe_max_size while another reads its value.) Similar reads of pipe_max_size are potentially racy: pipe.c :: alloc_pipe_info() pipe.c :: pipe_set_size() Add a new proc_dopipe_max_size() that consolidates reading the new value from the user buffer, verifying bounds, and calling round_pipe_size() with a single assignment to pipe_max_size. Link: http://lkml.kernel.org/r/1507658689-11669-4-git-send-email-joe.lawrence@redhat.com Signed-off-by: Joe Lawrence Reported-by: Mikulas Patocka Reviewed-by: Mikulas Patocka Cc: Al Viro Cc: Jens Axboe Cc: Michael Kerrisk Cc: Randy Dunlap Cc: Josh Poimboeuf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pipe_fs_i.h | 1 + include/linux/sysctl.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index 6a80cfc63e0c..2dc5e9870fcd 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -191,5 +191,6 @@ long pipe_fcntl(struct file *, unsigned int, unsigned long arg); struct pipe_inode_info *get_pipe_info(struct file *file); int create_pipe_files(struct file **, int); +unsigned int round_pipe_size(unsigned int size); #endif diff --git a/include/linux/sysctl.h b/include/linux/sysctl.h index b769ecfcc3bd..992bc9948232 100644 --- a/include/linux/sysctl.h +++ b/include/linux/sysctl.h @@ -51,6 +51,9 @@ extern int proc_dointvec_minmax(struct ctl_table *, int, extern int proc_douintvec_minmax(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); +extern int proc_dopipe_max_size(struct ctl_table *table, int write, + void __user *buffer, size_t *lenp, + loff_t *ppos); extern int proc_dointvec_jiffies(struct ctl_table *, int, void __user *, size_t *, loff_t *); extern int proc_dointvec_userhz_jiffies(struct ctl_table *, int, -- cgit v1.2.3 From 95846ecf9dac5089aed4b144d912225f8ef86ae4 Mon Sep 17 00:00:00 2001 From: Gargi Sharma Date: Fri, 17 Nov 2017 15:30:30 -0800 Subject: pid: replace pid bitmap implementation with IDR API Patch series "Replacing PID bitmap implementation with IDR API", v4. This series replaces kernel bitmap implementation of PID allocation with IDR API. These patches are written to simplify the kernel by replacing custom code with calls to generic code. The following are the stats for pid and pid_namespace object files before and after the replacement. There is a noteworthy change between the IDR and bitmap implementation. Before text data bss dec hex filename 8447 3894 64 12405 3075 kernel/pid.o After text data bss dec hex filename 3397 304 0 3701 e75 kernel/pid.o Before text data bss dec hex filename 5692 1842 192 7726 1e2e kernel/pid_namespace.o After text data bss dec hex filename 2854 216 16 3086 c0e kernel/pid_namespace.o The following are the stats for ps, pstree and calling readdir on /proc for 10,000 processes. ps: With IDR API With bitmap real 0m1.479s 0m2.319s user 0m0.070s 0m0.060s sys 0m0.289s 0m0.516s pstree: With IDR API With bitmap real 0m1.024s 0m1.794s user 0m0.348s 0m0.612s sys 0m0.184s 0m0.264s proc: With IDR API With bitmap real 0m0.059s 0m0.074s user 0m0.000s 0m0.004s sys 0m0.016s 0m0.016s This patch (of 2): Replace the current bitmap implementation for Process ID allocation. Functions that are no longer required, for example, free_pidmap(), alloc_pidmap(), etc. are removed. The rest of the functions are modified to use the IDR API. The change was made to make the PID allocation less complex by replacing custom code with calls to generic API. [gs051095@gmail.com: v6] Link: http://lkml.kernel.org/r/1507760379-21662-2-git-send-email-gs051095@gmail.com [avagin@openvz.org: restore the old behaviour of the ns_last_pid sysctl] Link: http://lkml.kernel.org/r/20171106183144.16368-1-avagin@openvz.org Link: http://lkml.kernel.org/r/1507583624-22146-2-git-send-email-gs051095@gmail.com Signed-off-by: Gargi Sharma Reviewed-by: Rik van Riel Acked-by: Oleg Nesterov Cc: Julia Lawall Cc: Ingo Molnar Cc: Pavel Tatashin Cc: Kirill Tkhai Cc: Eric W. Biederman Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pid_namespace.h | 14 +++----------- 1 file changed, 3 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index c78af6061644..92c6aa509d2e 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -10,15 +10,8 @@ #include #include #include +#include -struct pidmap { - atomic_t nr_free; - void *page; -}; - -#define BITS_PER_PAGE (PAGE_SIZE * 8) -#define BITS_PER_PAGE_MASK (BITS_PER_PAGE-1) -#define PIDMAP_ENTRIES ((PID_MAX_LIMIT+BITS_PER_PAGE-1)/BITS_PER_PAGE) struct fs_pin; @@ -30,9 +23,8 @@ enum { /* definitions for pid_namespace's hide_pid field */ struct pid_namespace { struct kref kref; - struct pidmap pidmap[PIDMAP_ENTRIES]; + struct idr idr; struct rcu_head rcu; - int last_pid; unsigned int nr_hashed; struct task_struct *child_reaper; struct kmem_cache *pid_cachep; @@ -106,6 +98,6 @@ static inline int reboot_pid_ns(struct pid_namespace *pid_ns, int cmd) extern struct pid_namespace *task_active_pid_ns(struct task_struct *tsk); void pidhash_init(void); -void pidmap_init(void); +void pid_idr_init(void); #endif /* _LINUX_PID_NS_H */ -- cgit v1.2.3 From e8cfbc245e24887e3c30235f71e9e9405e0cfc39 Mon Sep 17 00:00:00 2001 From: Gargi Sharma Date: Fri, 17 Nov 2017 15:30:34 -0800 Subject: pid: remove pidhash pidhash is no longer required as all the information can be looked up from idr tree. nr_hashed represented the number of pids that had been hashed. Since, nr_hashed and PIDNS_HASH_ADDING are no longer relevant, it has been renamed to pid_allocated and PIDNS_ADDING respectively. [gs051095@gmail.com: v6] Link: http://lkml.kernel.org/r/1507760379-21662-3-git-send-email-gs051095@gmail.com Link: http://lkml.kernel.org/r/1507583624-22146-3-git-send-email-gs051095@gmail.com Signed-off-by: Gargi Sharma Reviewed-by: Rik van Riel Tested-by: Tony Luck [ia64] Cc: Julia Lawall Cc: Ingo Molnar Cc: Pavel Tatashin Cc: Kirill Tkhai Cc: Oleg Nesterov Cc: Eric W. Biederman Cc: Christoph Hellwig Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/init_task.h | 1 - include/linux/pid.h | 2 -- include/linux/pid_namespace.h | 4 ++-- 3 files changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index 8062e6cc607c..6a532629c983 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -105,7 +105,6 @@ extern struct group_info init_groups; .numbers = { { \ .nr = 0, \ .ns = &init_pid_ns, \ - .pid_chain = { .next = NULL, .pprev = NULL }, \ }, } \ } diff --git a/include/linux/pid.h b/include/linux/pid.h index dfd684ce0787..7633d55d9a24 100644 --- a/include/linux/pid.h +++ b/include/linux/pid.h @@ -51,10 +51,8 @@ enum pid_type */ struct upid { - /* Try to keep pid_chain in the same cacheline as nr for find_vpid */ int nr; struct pid_namespace *ns; - struct hlist_node pid_chain; }; struct pid diff --git a/include/linux/pid_namespace.h b/include/linux/pid_namespace.h index 92c6aa509d2e..49538b172483 100644 --- a/include/linux/pid_namespace.h +++ b/include/linux/pid_namespace.h @@ -25,7 +25,7 @@ struct pid_namespace { struct kref kref; struct idr idr; struct rcu_head rcu; - unsigned int nr_hashed; + unsigned int pid_allocated; struct task_struct *child_reaper; struct kmem_cache *pid_cachep; unsigned int level; @@ -49,7 +49,7 @@ struct pid_namespace { extern struct pid_namespace init_pid_ns; -#define PIDNS_HASH_ADDING (1U << 31) +#define PIDNS_ADDING (1U << 31) #ifdef CONFIG_PID_NS static inline struct pid_namespace *get_pid_ns(struct pid_namespace *ns) -- cgit v1.2.3 From 4efb442cc12eb66535b7c7ed06005fd7889c1d77 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 17 Nov 2017 15:30:38 -0800 Subject: kernel/panic.c: add TAINT_AUX This is the gist of a patch which we've been forward-porting in our kernels for a long time now and it probably would make a good sense to have such TAINT_AUX flag upstream which can be used by each distro etc, how they see fit. This way, we won't need to forward-port a distro-only version indefinitely. Add an auxiliary taint flag to be used by distros and others. This obviates the need to forward-port whatever internal solutions people have in favor of a single flag which they can map arbitrarily to a definition of their pleasing. The "X" mnemonic could also mean eXternal, which would be taint from a distro or something else but not the upstream kernel. We will use it to mark modules for which we don't provide support. I.e., a really eXternal module. Link: http://lkml.kernel.org/r/20170911134533.dp5mtyku5bongx4c@pd.tnic Signed-off-by: Borislav Petkov Cc: Kees Cook Cc: Jessica Yu Cc: Peter Zijlstra Cc: Jiri Slaby Cc: Jiri Olsa Cc: Michal Marek Cc: Jiri Kosina Cc: Takashi Iwai Cc: Petr Mladek Cc: Jeff Mahoney Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 4b484ab9e163..ce51455e2adf 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -549,7 +549,8 @@ extern enum system_states { #define TAINT_UNSIGNED_MODULE 13 #define TAINT_SOFTLOCKUP 14 #define TAINT_LIVEPATCH 15 -#define TAINT_FLAGS_COUNT 16 +#define TAINT_AUX 16 +#define TAINT_FLAGS_COUNT 17 struct taint_flag { char c_true; /* character printed when tainted */ -- cgit v1.2.3 From ded97d2c2b2c5f1dcced0bc57133f7753b037dfc Mon Sep 17 00:00:00 2001 From: Victor Chibotaru Date: Fri, 17 Nov 2017 15:30:46 -0800 Subject: kcov: support comparison operands collection Enables kcov to collect comparison operands from instrumented code. This is done by using Clang's -fsanitize=trace-cmp instrumentation (currently not available for GCC). The comparison operands help a lot in fuzz testing. E.g. they are used in Syzkaller to cover the interiors of conditional statements with way less attempts and thus make previously unreachable code reachable. To allow separate collection of coverage and comparison operands two different work modes are implemented. Mode selection is now done via a KCOV_ENABLE ioctl call with corresponding argument value. Link: http://lkml.kernel.org/r/20171011095459.70721-1-glider@google.com Signed-off-by: Victor Chibotaru Signed-off-by: Alexander Potapenko Cc: Dmitry Vyukov Cc: Andrey Konovalov Cc: Mark Rutland Cc: Alexander Popov Cc: Andrey Ryabinin Cc: Kees Cook Cc: Vegard Nossum Cc: Quentin Casasnovas Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kcov.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kcov.h b/include/linux/kcov.h index f5d8ce4f4f86..3ecf6f5e3a5f 100644 --- a/include/linux/kcov.h +++ b/include/linux/kcov.h @@ -8,19 +8,23 @@ struct task_struct; #ifdef CONFIG_KCOV -void kcov_task_init(struct task_struct *t); -void kcov_task_exit(struct task_struct *t); - enum kcov_mode { /* Coverage collection is not enabled yet. */ KCOV_MODE_DISABLED = 0, + /* KCOV was initialized, but tracing mode hasn't been chosen yet. */ + KCOV_MODE_INIT = 1, /* * Tracing coverage collection mode. * Covered PCs are collected in a per-task buffer. */ - KCOV_MODE_TRACE = 1, + KCOV_MODE_TRACE_PC = 2, + /* Collecting comparison operands mode. */ + KCOV_MODE_TRACE_CMP = 3, }; +void kcov_task_init(struct task_struct *t); +void kcov_task_exit(struct task_struct *t); + #else static inline void kcov_task_init(struct task_struct *t) {} -- cgit v1.2.3 From 2d8364bae4db144df75ba85e92d2b8619ba8eedc Mon Sep 17 00:00:00 2001 From: Andrey Smirnov Date: Fri, 17 Nov 2017 15:30:57 -0800 Subject: kernel/reboot.c: add devm_register_reboot_notifier() Add devm_* wrapper around register_reboot_notifier to simplify device specific reboot notifier registration/unregistration. [akpm@linux-foundation.org: move `struct device' forward decl to top-of-file] Link: http://lkml.kernel.org/r/20170320171753.1705-1-andrew.smirnov@gmail.com Signed-off-by: Andrey Smirnov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/reboot.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/reboot.h b/include/linux/reboot.h index d03da0eb95ca..e63799a6e895 100644 --- a/include/linux/reboot.h +++ b/include/linux/reboot.h @@ -6,6 +6,8 @@ #include #include +struct device; + #define SYS_DOWN 0x0001 /* Notify of system down */ #define SYS_RESTART SYS_DOWN #define SYS_HALT 0x0002 /* Notify of system halt */ @@ -39,6 +41,8 @@ extern int reboot_force; extern int register_reboot_notifier(struct notifier_block *); extern int unregister_reboot_notifier(struct notifier_block *); +extern int devm_register_reboot_notifier(struct device *, struct notifier_block *); + extern int register_restart_handler(struct notifier_block *); extern int unregister_restart_handler(struct notifier_block *); extern void do_kernel_restart(char *cmd); -- cgit v1.2.3 From b8fd99838435f9b420c3e848192bd43abc648b7f Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 17 Nov 2017 15:31:08 -0800 Subject: sysvipc: unteach ids->next_id for !CHECKPOINT_RESTORE Patch series "sysvipc: ipc-key management improvements". Here are a few improvements I spotted while eyeballing Guillaume's rhashtable implementation for ipc keys. The first and fourth patches are the interesting ones, the middle two are trivial. This patch (of 4): The next_id object-allocation functionality was introduced in commit 03f595668017 ("ipc: add sysctl to specify desired next object id"). Given that these new entries are _only_ exported under the CONFIG_CHECKPOINT_RESTORE option, there is no point for the common case to even know about ->next_id. As such rewrite ipc_buildid() such that it can do away with the field as well as unnecessary branches when adding a new identifier. The end result also better differentiates both cases, so the code ends up being cleaner; albeit the small duplications regarding the default case. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170831172049.14576-2-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index 474812abe773..d7cf3a850853 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -19,7 +19,9 @@ struct ipc_ids { bool tables_initialized; struct rw_semaphore rwsem; struct idr ipcs_idr; +#ifdef CONFIG_CHECKPOINT_RESTORE int next_id; +#endif struct rhashtable key_ht; }; -- cgit v1.2.3 From 15df03c87983660a4d1eedb4541778592bd97684 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Fri, 17 Nov 2017 15:31:18 -0800 Subject: sysvipc: make get_maxid O(1) again For a custom microbenchmark on a 3.30GHz Xeon SandyBridge, which calls IPC_STAT over and over, it was calculated that, on avg the cost of ipc_get_maxid() for increasing amounts of keys was: 10 keys: ~900 cycles 100 keys: ~15000 cycles 1000 keys: ~150000 cycles 10000 keys: ~2100000 cycles This is unsurprising as maxid is currently O(n). By having the max_id available in O(1) we save all those cycles for each semctl(_STAT) command, the idr_find can be expensive -- which some real (customer) workloads actually poll on. Note that this used to be the case, until commit 7ca7e564e04 ("ipc: store ipcs into IDRs"). The cost is the extra idr_find when doing RMIDs, but we simply go backwards, and should not take too many iterations to find the new value. [akpm@linux-foundation.org: coding-style fixes] Link: http://lkml.kernel.org/r/20170831172049.14576-5-dave@stgolabs.net Signed-off-by: Davidlohr Bueso Cc: Manfred Spraul Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/ipc_namespace.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipc_namespace.h b/include/linux/ipc_namespace.h index d7cf3a850853..b5630c8eb2f3 100644 --- a/include/linux/ipc_namespace.h +++ b/include/linux/ipc_namespace.h @@ -19,6 +19,7 @@ struct ipc_ids { bool tables_initialized; struct rw_semaphore rwsem; struct idr ipcs_idr; + int max_id; #ifdef CONFIG_CHECKPOINT_RESTORE int next_id; #endif -- cgit v1.2.3 From 5a1c269f15dc9f964f829d864ae8abebcaa3d3be Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 3 Aug 2017 12:19:40 -0600 Subject: NTB: switchtec: Move structure definitions into a common header Create the switchtec.h header in include/linux with hardware defines and the switchtec_dev structure. Both moved directly from switchtec.c. This is a prep patch for creating an NTB driver for Switchtec. Signed-off-by: Logan Gunthorpe Reviewed-by: Stephen Bates Reviewed-by: Kurt Schwemmer Acked-by: Greg Kroah-Hartman Acked-by: Bjorn Helgaas Signed-off-by: Jon Mason --- include/linux/switchtec.h | 279 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 279 insertions(+) create mode 100644 include/linux/switchtec.h (limited to 'include/linux') diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h new file mode 100644 index 000000000000..1cbd0e63b0ab --- /dev/null +++ b/include/linux/switchtec.h @@ -0,0 +1,279 @@ +/* + * Microsemi Switchtec PCIe Driver + * Copyright (c) 2017, Microsemi Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + */ + +#ifndef _SWITCHTEC_H +#define _SWITCHTEC_H + +#include +#include + +#define MICROSEMI_VENDOR_ID 0x11f8 +#define MICROSEMI_NTB_CLASSCODE 0x068000 +#define MICROSEMI_MGMT_CLASSCODE 0x058000 + +#define SWITCHTEC_MRPC_PAYLOAD_SIZE 1024 +#define SWITCHTEC_MAX_PFF_CSR 48 + +#define SWITCHTEC_EVENT_OCCURRED BIT(0) +#define SWITCHTEC_EVENT_CLEAR BIT(0) +#define SWITCHTEC_EVENT_EN_LOG BIT(1) +#define SWITCHTEC_EVENT_EN_CLI BIT(2) +#define SWITCHTEC_EVENT_EN_IRQ BIT(3) +#define SWITCHTEC_EVENT_FATAL BIT(4) + +enum { + SWITCHTEC_GAS_MRPC_OFFSET = 0x0000, + SWITCHTEC_GAS_TOP_CFG_OFFSET = 0x1000, + SWITCHTEC_GAS_SW_EVENT_OFFSET = 0x1800, + SWITCHTEC_GAS_SYS_INFO_OFFSET = 0x2000, + SWITCHTEC_GAS_FLASH_INFO_OFFSET = 0x2200, + SWITCHTEC_GAS_PART_CFG_OFFSET = 0x4000, + SWITCHTEC_GAS_NTB_OFFSET = 0x10000, + SWITCHTEC_GAS_PFF_CSR_OFFSET = 0x134000, +}; + +struct mrpc_regs { + u8 input_data[SWITCHTEC_MRPC_PAYLOAD_SIZE]; + u8 output_data[SWITCHTEC_MRPC_PAYLOAD_SIZE]; + u32 cmd; + u32 status; + u32 ret_value; +} __packed; + +enum mrpc_status { + SWITCHTEC_MRPC_STATUS_INPROGRESS = 1, + SWITCHTEC_MRPC_STATUS_DONE = 2, + SWITCHTEC_MRPC_STATUS_ERROR = 0xFF, + SWITCHTEC_MRPC_STATUS_INTERRUPTED = 0x100, +}; + +struct sw_event_regs { + u64 event_report_ctrl; + u64 reserved1; + u64 part_event_bitmap; + u64 reserved2; + u32 global_summary; + u32 reserved3[3]; + u32 stack_error_event_hdr; + u32 stack_error_event_data; + u32 reserved4[4]; + u32 ppu_error_event_hdr; + u32 ppu_error_event_data; + u32 reserved5[4]; + u32 isp_error_event_hdr; + u32 isp_error_event_data; + u32 reserved6[4]; + u32 sys_reset_event_hdr; + u32 reserved7[5]; + u32 fw_exception_hdr; + u32 reserved8[5]; + u32 fw_nmi_hdr; + u32 reserved9[5]; + u32 fw_non_fatal_hdr; + u32 reserved10[5]; + u32 fw_fatal_hdr; + u32 reserved11[5]; + u32 twi_mrpc_comp_hdr; + u32 twi_mrpc_comp_data; + u32 reserved12[4]; + u32 twi_mrpc_comp_async_hdr; + u32 twi_mrpc_comp_async_data; + u32 reserved13[4]; + u32 cli_mrpc_comp_hdr; + u32 cli_mrpc_comp_data; + u32 reserved14[4]; + u32 cli_mrpc_comp_async_hdr; + u32 cli_mrpc_comp_async_data; + u32 reserved15[4]; + u32 gpio_interrupt_hdr; + u32 gpio_interrupt_data; + u32 reserved16[4]; +} __packed; + +enum { + SWITCHTEC_CFG0_RUNNING = 0x04, + SWITCHTEC_CFG1_RUNNING = 0x05, + SWITCHTEC_IMG0_RUNNING = 0x03, + SWITCHTEC_IMG1_RUNNING = 0x07, +}; + +struct sys_info_regs { + u32 device_id; + u32 device_version; + u32 firmware_version; + u32 reserved1; + u32 vendor_table_revision; + u32 table_format_version; + u32 partition_id; + u32 cfg_file_fmt_version; + u16 cfg_running; + u16 img_running; + u32 reserved2[57]; + char vendor_id[8]; + char product_id[16]; + char product_revision[4]; + char component_vendor[8]; + u16 component_id; + u8 component_revision; +} __packed; + +struct flash_info_regs { + u32 flash_part_map_upd_idx; + + struct active_partition_info { + u32 address; + u32 build_version; + u32 build_string; + } active_img; + + struct active_partition_info active_cfg; + struct active_partition_info inactive_img; + struct active_partition_info inactive_cfg; + + u32 flash_length; + + struct partition_info { + u32 address; + u32 length; + } cfg0; + + struct partition_info cfg1; + struct partition_info img0; + struct partition_info img1; + struct partition_info nvlog; + struct partition_info vendor[8]; +}; + +struct ntb_info_regs { + u8 partition_count; + u8 partition_id; + u16 reserved1; + u64 ep_map; + u16 requester_id; +} __packed; + +struct part_cfg_regs { + u32 status; + u32 state; + u32 port_cnt; + u32 usp_port_mode; + u32 usp_pff_inst_id; + u32 vep_pff_inst_id; + u32 dsp_pff_inst_id[47]; + u32 reserved1[11]; + u16 vep_vector_number; + u16 usp_vector_number; + u32 port_event_bitmap; + u32 reserved2[3]; + u32 part_event_summary; + u32 reserved3[3]; + u32 part_reset_hdr; + u32 part_reset_data[5]; + u32 mrpc_comp_hdr; + u32 mrpc_comp_data[5]; + u32 mrpc_comp_async_hdr; + u32 mrpc_comp_async_data[5]; + u32 dyn_binding_hdr; + u32 dyn_binding_data[5]; + u32 reserved4[159]; +} __packed; + +enum { + SWITCHTEC_PART_CFG_EVENT_RESET = 1 << 0, + SWITCHTEC_PART_CFG_EVENT_MRPC_CMP = 1 << 1, + SWITCHTEC_PART_CFG_EVENT_MRPC_ASYNC_CMP = 1 << 2, + SWITCHTEC_PART_CFG_EVENT_DYN_PART_CMP = 1 << 3, +}; + +struct pff_csr_regs { + u16 vendor_id; + u16 device_id; + u32 pci_cfg_header[15]; + u32 pci_cap_region[48]; + u32 pcie_cap_region[448]; + u32 indirect_gas_window[128]; + u32 indirect_gas_window_off; + u32 reserved[127]; + u32 pff_event_summary; + u32 reserved2[3]; + u32 aer_in_p2p_hdr; + u32 aer_in_p2p_data[5]; + u32 aer_in_vep_hdr; + u32 aer_in_vep_data[5]; + u32 dpc_hdr; + u32 dpc_data[5]; + u32 cts_hdr; + u32 cts_data[5]; + u32 reserved3[6]; + u32 hotplug_hdr; + u32 hotplug_data[5]; + u32 ier_hdr; + u32 ier_data[5]; + u32 threshold_hdr; + u32 threshold_data[5]; + u32 power_mgmt_hdr; + u32 power_mgmt_data[5]; + u32 tlp_throttling_hdr; + u32 tlp_throttling_data[5]; + u32 force_speed_hdr; + u32 force_speed_data[5]; + u32 credit_timeout_hdr; + u32 credit_timeout_data[5]; + u32 link_state_hdr; + u32 link_state_data[5]; + u32 reserved4[174]; +} __packed; + +struct switchtec_dev { + struct pci_dev *pdev; + struct device dev; + struct cdev cdev; + + int partition; + int partition_count; + int pff_csr_count; + char pff_local[SWITCHTEC_MAX_PFF_CSR]; + + void __iomem *mmio; + struct mrpc_regs __iomem *mmio_mrpc; + struct sw_event_regs __iomem *mmio_sw_event; + struct sys_info_regs __iomem *mmio_sys_info; + struct flash_info_regs __iomem *mmio_flash_info; + struct ntb_info_regs __iomem *mmio_ntb; + struct part_cfg_regs __iomem *mmio_part_cfg; + struct part_cfg_regs __iomem *mmio_part_cfg_all; + struct pff_csr_regs __iomem *mmio_pff_csr; + + /* + * The mrpc mutex must be held when accessing the other + * mrpc_ fields, alive flag and stuser->state field + */ + struct mutex mrpc_mutex; + struct list_head mrpc_queue; + int mrpc_busy; + struct work_struct mrpc_work; + struct delayed_work mrpc_timeout; + bool alive; + + wait_queue_head_t event_wq; + atomic_t event_cnt; +}; + +static inline struct switchtec_dev *to_stdev(struct device *dev) +{ + return container_of(dev, struct switchtec_dev, dev); +} + +#endif -- cgit v1.2.3 From 302e994d3af7e4be8d0f789aa66422166ccd89c2 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 3 Aug 2017 12:19:41 -0600 Subject: NTB: switchtec: Export class symbol for use in upper layer driver We export the class pointer symbol and add an extern define in the Switchtec header file. Signed-off-by: Logan Gunthorpe Reviewed-by: Stephen Bates Reviewed-by: Kurt Schwemmer Acked-by: Bjorn Helgaas Signed-off-by: Jon Mason --- include/linux/switchtec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 1cbd0e63b0ab..18e388101855 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -276,4 +276,6 @@ static inline struct switchtec_dev *to_stdev(struct device *dev) return container_of(dev, struct switchtec_dev, dev); } +extern struct class *switchtec_class; + #endif -- cgit v1.2.3 From c082b04c9d40f69dacd93a151db114299f5de6eb Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 3 Aug 2017 12:19:42 -0600 Subject: NTB: switchtec: Add NTB hardware register definitions There are two additional regions: ctrl and dbmsg. The first is for generic NTB control and memory windows. The second is for doorbells and message registers. This patch also adds a number of related constants for using these registers. Signed-off-by: Logan Gunthorpe Reviewed-by: Stephen Bates Reviewed-by: Kurt Schwemmer Signed-off-by: Jon Mason --- include/linux/switchtec.h | 84 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 84 insertions(+) (limited to 'include/linux') diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 18e388101855..9c2be7631257 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -156,6 +156,12 @@ struct flash_info_regs { struct partition_info vendor[8]; }; +enum { + SWITCHTEC_NTB_REG_INFO_OFFSET = 0x0000, + SWITCHTEC_NTB_REG_CTRL_OFFSET = 0x4000, + SWITCHTEC_NTB_REG_DBMSG_OFFSET = 0x64000, +}; + struct ntb_info_regs { u8 partition_count; u8 partition_id; @@ -190,6 +196,84 @@ struct part_cfg_regs { u32 reserved4[159]; } __packed; +enum { + NTB_CTRL_PART_OP_LOCK = 0x1, + NTB_CTRL_PART_OP_CFG = 0x2, + NTB_CTRL_PART_OP_RESET = 0x3, + + NTB_CTRL_PART_STATUS_NORMAL = 0x1, + NTB_CTRL_PART_STATUS_LOCKED = 0x2, + NTB_CTRL_PART_STATUS_LOCKING = 0x3, + NTB_CTRL_PART_STATUS_CONFIGURING = 0x4, + NTB_CTRL_PART_STATUS_RESETTING = 0x5, + + NTB_CTRL_BAR_VALID = 1 << 0, + NTB_CTRL_BAR_DIR_WIN_EN = 1 << 4, + NTB_CTRL_BAR_LUT_WIN_EN = 1 << 5, + + NTB_CTRL_REQ_ID_EN = 1 << 0, + + NTB_CTRL_LUT_EN = 1 << 0, + + NTB_PART_CTRL_ID_PROT_DIS = 1 << 0, +}; + +struct ntb_ctrl_regs { + u32 partition_status; + u32 partition_op; + u32 partition_ctrl; + u32 bar_setup; + u32 bar_error; + u16 lut_table_entries; + u16 lut_table_offset; + u32 lut_error; + u16 req_id_table_size; + u16 req_id_table_offset; + u32 req_id_error; + u32 reserved1[7]; + struct { + u32 ctl; + u32 win_size; + u64 xlate_addr; + } bar_entry[6]; + u32 reserved2[216]; + u32 req_id_table[256]; + u32 reserved3[512]; + u64 lut_entry[512]; +} __packed; + +#define NTB_DBMSG_IMSG_STATUS BIT_ULL(32) +#define NTB_DBMSG_IMSG_MASK BIT_ULL(40) + +struct ntb_dbmsg_regs { + u32 reserved1[1024]; + u64 odb; + u64 odb_mask; + u64 idb; + u64 idb_mask; + u8 idb_vec_map[64]; + u32 msg_map; + u32 reserved2; + struct { + u32 msg; + u32 status; + } omsg[4]; + + struct { + u32 msg; + u8 status; + u8 mask; + u8 src; + u8 reserved; + } imsg[4]; + + u8 reserved3[3928]; + u8 msix_table[1024]; + u8 reserved4[3072]; + u8 pba[24]; + u8 reserved5[4072]; +} __packed; + enum { SWITCHTEC_PART_CFG_EVENT_RESET = 1 << 0, SWITCHTEC_PART_CFG_EVENT_MRPC_CMP = 1 << 1, -- cgit v1.2.3 From 48c302dc8f3acc1b0ef56a28569bc6a35b9b0e60 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 3 Aug 2017 12:19:43 -0600 Subject: NTB: switchtec: Add link event notifier callback In order for the Switchtec NTB code to handle link change events we create a notifier callback in the switchtec code which gets called whenever an appropriate event interrupt occurs. In order to preserve userspace's ability to follow these events, we compare the event count with a stored copy from last time we checked. Signed-off-by: Logan Gunthorpe Reviewed-by: Stephen Bates Reviewed-by: Kurt Schwemmer Acked-by: Bjorn Helgaas Signed-off-by: Jon Mason --- include/linux/switchtec.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index 9c2be7631257..d8159944f013 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -353,6 +353,10 @@ struct switchtec_dev { wait_queue_head_t event_wq; atomic_t event_cnt; + + struct work_struct link_event_work; + void (*link_notifier)(struct switchtec_dev *stdev); + u8 link_event_count[SWITCHTEC_MAX_PFF_CSR]; }; static inline struct switchtec_dev *to_stdev(struct device *dev) -- cgit v1.2.3 From fa5ab66e36de56f7e40dad57780d53e059dced86 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 3 Aug 2017 12:19:45 -0600 Subject: NTB: Add check and comment for link up to mw_count() and mw_get_align() Adds a comment and a check to ntb_mw_get_align() so that it always fails if the function is called before the link is up. Also adds a comment to ntb_mw_count() to note that it may return 0 if it is called before the link is up. This is to prevent accidental mis-use in clients that are testing on hardware that this doesn't matter for. Signed-off-by: Logan Gunthorpe Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 609e232c00da..47f2966cfd7f 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -730,7 +730,8 @@ static inline int ntb_link_disable(struct ntb_dev *ntb) * Hardware and topology may support a different number of memory windows. * Moreover different peer devices can support different number of memory * windows. Simply speaking this method returns the number of possible inbound - * memory windows to share with specified peer device. + * memory windows to share with specified peer device. Note: this may return + * zero if the link is not up yet. * * Return: the number of memory windows. */ @@ -751,7 +752,7 @@ static inline int ntb_mw_count(struct ntb_dev *ntb, int pidx) * Get the alignments of an inbound memory window with specified index. * NULL may be given for any output parameter if the value is not needed. * The alignment and size parameters may be used for allocation of proper - * shared memory. + * shared memory. Note: this must only be called when the link is up. * * Return: Zero on success, otherwise a negative error number. */ @@ -760,6 +761,9 @@ static inline int ntb_mw_get_align(struct ntb_dev *ntb, int pidx, int widx, resource_size_t *size_align, resource_size_t *size_max) { + if (!(ntb_link_is_up(ntb, NULL, NULL) & (1 << pidx))) + return -ENOTCONN; + return ntb->ops->mw_get_align(ntb, pidx, widx, addr_align, size_align, size_max); } -- cgit v1.2.3 From 33dea5aae0320345af26ae9aba0894a930e0d4ec Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 3 Aug 2017 12:19:46 -0600 Subject: NTB: switchtec_ntb: Introduce initial NTB driver Seeing the Switchtec NTB hardware shares the same endpoint as the management endpoint we utilize the class_interface API to register an NTB driver for every Switchtec device in the system that has the NTB class code. Signed-off-by: Logan Gunthorpe Reviewed-by: Stephen Bates Reviewed-by: Kurt Schwemmer Acked-by: Allen Hubbe Acked-by: Bjorn Helgaas Signed-off-by: Jon Mason --- include/linux/switchtec.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/switchtec.h b/include/linux/switchtec.h index d8159944f013..09d73d0d1aa8 100644 --- a/include/linux/switchtec.h +++ b/include/linux/switchtec.h @@ -320,6 +320,8 @@ struct pff_csr_regs { u32 reserved4[174]; } __packed; +struct switchtec_ntb; + struct switchtec_dev { struct pci_dev *pdev; struct device dev; @@ -357,6 +359,8 @@ struct switchtec_dev { struct work_struct link_event_work; void (*link_notifier)(struct switchtec_dev *stdev); u8 link_event_count[SWITCHTEC_MAX_PFF_CSR]; + + struct switchtec_ntb *sndev; }; static inline struct switchtec_dev *to_stdev(struct device *dev) -- cgit v1.2.3 From e099b45b7c27b4fc6510918ea8c7d18980787283 Mon Sep 17 00:00:00 2001 From: Logan Gunthorpe Date: Thu, 3 Aug 2017 12:19:49 -0600 Subject: NTB: switchtec_ntb: Add skeleton NTB driver Add a skeleton NTB driver which will be filled out in subsequent patches. Signed-off-by: Logan Gunthorpe Reviewed-by: Stephen Bates Reviewed-by: Kurt Schwemmer Acked-by: Allen Hubbe Signed-off-by: Jon Mason --- include/linux/ntb.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index 47f2966cfd7f..c308964777eb 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -70,6 +70,7 @@ struct pci_dev; * @NTB_TOPO_SEC: On secondary side of remote ntb. * @NTB_TOPO_B2B_USD: On primary side of local ntb upstream of remote ntb. * @NTB_TOPO_B2B_DSD: On primary side of local ntb downstream of remote ntb. + * @NTB_TOPO_SWITCH: Connected via a switch which supports ntb. */ enum ntb_topo { NTB_TOPO_NONE = -1, @@ -77,6 +78,7 @@ enum ntb_topo { NTB_TOPO_SEC, NTB_TOPO_B2B_USD, NTB_TOPO_B2B_DSD, + NTB_TOPO_SWITCH, }; static inline int ntb_topo_is_b2b(enum ntb_topo topo) @@ -97,6 +99,7 @@ static inline char *ntb_topo_string(enum ntb_topo topo) case NTB_TOPO_SEC: return "NTB_TOPO_SEC"; case NTB_TOPO_B2B_USD: return "NTB_TOPO_B2B_USD"; case NTB_TOPO_B2B_DSD: return "NTB_TOPO_B2B_DSD"; + case NTB_TOPO_SWITCH: return "NTB_TOPO_SWITCH"; } return "NTB_TOPO_INVALID"; } -- cgit v1.2.3 From 288b3de55aace830f13280985ec9e6bcbff33b1b Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:54 -0800 Subject: bpf: offload: move offload device validation out to the drivers With TC shared block changes we can't depend on correct netdev pointer being available in cls_bpf. Move the device validation to the driver. Core will only make sure that offloaded programs are always attached in the driver (or in HW by the driver). We trust that drivers which implement offload callbacks will perform necessary checks. Moving the checks to the driver is generally a useful thing, in practice the check should be against a switchdev instance, not a netdev, given that most ASICs will probably allow using the same program on many ports. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Acked-by: Jiri Pirko Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c397934f91dd..f82be640731e 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -336,7 +336,7 @@ extern const struct bpf_verifier_ops xdp_analyzer_ops; struct bpf_prog *bpf_prog_get(u32 ufd); struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, - struct net_device *netdev); + bool attach_drv); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); void bpf_prog_sub(struct bpf_prog *prog, int i); struct bpf_prog * __must_check bpf_prog_inc(struct bpf_prog *prog); @@ -433,7 +433,7 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, - struct net_device *netdev) + bool attach_drv) { return ERR_PTR(-EOPNOTSUPP); } -- cgit v1.2.3 From 479321e9c31a6c05426790b11888427400f75ac8 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:56 -0800 Subject: bpf: turn bpf_prog_get_type() into a wrapper bpf_prog_get_type() is identical to bpf_prog_get_type_dev(), with false passed as attach_drv. Instead of keeping it as an exported symbol turn it into static inline wrapper. Signed-off-by: Jakub Kicinski Reviewed-by: Quentin Monnet Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f82be640731e..37bbab8c0f56 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -334,7 +334,6 @@ extern const struct bpf_verifier_ops tc_cls_act_analyzer_ops; extern const struct bpf_verifier_ops xdp_analyzer_ops; struct bpf_prog *bpf_prog_get(u32 ufd); -struct bpf_prog *bpf_prog_get_type(u32 ufd, enum bpf_prog_type type); struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv); struct bpf_prog * __must_check bpf_prog_add(struct bpf_prog *prog, int i); @@ -425,12 +424,6 @@ static inline struct bpf_prog *bpf_prog_get(u32 ufd) return ERR_PTR(-EOPNOTSUPP); } -static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, - enum bpf_prog_type type) -{ - return ERR_PTR(-EOPNOTSUPP); -} - static inline struct bpf_prog *bpf_prog_get_type_dev(u32 ufd, enum bpf_prog_type type, bool attach_drv) @@ -514,6 +507,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, } #endif /* CONFIG_BPF_SYSCALL */ +static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, + enum bpf_prog_type type) +{ + return bpf_prog_get_type_dev(ufd, type, false); +} + int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); u32 bpf_prog_offload_ifindex(struct bpf_prog *prog); -- cgit v1.2.3 From 1ee640095f049e7ac4ec36b985abada497b98cc2 Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:21:59 -0800 Subject: bpf: revert report offload info to user space This reverts commit bd601b6ada11 ("bpf: report offload info to user space"). The ifindex by itself is not sufficient, we should provide information on which network namespace this ifindex belongs to. After considering some options we concluded that it's best to just remove this API for now, and rework it in -next. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 37bbab8c0f56..76c577281d78 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -515,7 +515,6 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); -u32 bpf_prog_offload_ifindex(struct bpf_prog *prog); #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_init(struct bpf_prog *prog, union bpf_attr *attr); -- cgit v1.2.3 From 1438019479349d262b76f8767ace3273d11b6dcb Mon Sep 17 00:00:00 2001 From: Jakub Kicinski Date: Mon, 20 Nov 2017 15:22:00 -0800 Subject: bpf: make bpf_prog_offload_verifier_prep() static inline Header implementation of bpf_prog_offload_verifier_prep() which is used if CONFIG_NET=n should be a static inline. Signed-off-by: Jakub Kicinski Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 07b96aaca256..b61482d354a2 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -171,7 +171,7 @@ static inline struct bpf_reg_state *cur_regs(struct bpf_verifier_env *env) #if defined(CONFIG_NET) && defined(CONFIG_BPF_SYSCALL) int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env); #else -int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) +static inline int bpf_prog_offload_verifier_prep(struct bpf_verifier_env *env) { return -EOPNOTSUPP; } -- cgit v1.2.3 From aa5222e92f8000ed3c1c38dddf11c83222aadfb3 Mon Sep 17 00:00:00 2001 From: Dan Carpenter Date: Fri, 13 Oct 2017 10:01:22 +0300 Subject: sched/deadline: Don't use dubious signed bitfields It doesn't cause a run-time bug, but these bitfields should be unsigned. When it's signed ->dl_throttled is set to either 0 or -1, instead of 0 and 1 as expected. The sched.h file is included into tons of places so Sparse generates a flood of warnings like this: ./include/linux/sched.h:477:54: error: dubious one-bit signed bitfield Reported-by: Matthew Wilcox Reported-by: Xin Long Signed-off-by: Dan Carpenter Reviewed-by: Luca Abeni Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: kernel-janitors@vger.kernel.org Cc: luca abeni Link: http://lkml.kernel.org/r/20171013070121.dzcncojuj2f4utij@mwanda Signed-off-by: Ingo Molnar --- include/linux/sched.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a5dc7c98b0a2..21991d668d35 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -473,10 +473,10 @@ struct sched_dl_entity { * conditions between the inactive timer handler and the wakeup * code. */ - int dl_throttled : 1; - int dl_boosted : 1; - int dl_yielded : 1; - int dl_non_contending : 1; + unsigned int dl_throttled : 1; + unsigned int dl_boosted : 1; + unsigned int dl_yielded : 1; + unsigned int dl_non_contending : 1; /* * Bandwidth enforcement timer. Each -deadline task has its -- cgit v1.2.3 From bca237a52ca0035b0a0380003283d8bf590188d5 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 28 Aug 2017 15:03:41 -0700 Subject: block/laptop_mode: Convert timers to use timer_setup() In preparation for unconditionally passing the struct timer_list pointer to all timer callbacks, switch to using the new timer_setup() and from_timer() to pass the timer pointer explicitly. Cc: Jens Axboe Cc: Michal Hocko Cc: Andrew Morton Cc: Jan Kara Cc: Johannes Weiner Cc: Nicholas Piggin Cc: Vladimir Davydov Cc: Matthew Wilcox Cc: Jeff Layton Cc: linux-block@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Kees Cook --- include/linux/writeback.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index f42d85631d17..fdfd04e348f6 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -308,7 +308,7 @@ static inline void cgroup_writeback_umount(void) void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); void laptop_mode_sync(struct work_struct *work); -void laptop_mode_timer_fn(unsigned long data); +void laptop_mode_timer_fn(struct timer_list *t); #else static inline void laptop_sync_completion(void) { } #endif -- cgit v1.2.3 From 7eeb6b893bd28c68b6d664de1d3120e49b855cdb Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Wed, 11 Oct 2017 23:13:49 -0700 Subject: timer: Remove init_timer() interface All users of init_timer() have been updated. Remove the ancient interface. Cc: Thomas Gleixner Cc: Jonathan Corbet Signed-off-by: Kees Cook --- include/linux/timer.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index bf781acfc6d8..a58097ea7cfc 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -117,9 +117,6 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, init_timer_on_stack_key((_timer), (_flags), NULL, NULL) #endif -#define init_timer(timer) \ - __init_timer((timer), 0) - #define __setup_timer(_timer, _fn, _data, _flags) \ do { \ __init_timer((_timer), (_flags)); \ -- cgit v1.2.3 From 513ae785c63c30741e46f43960213d4ae5382ec0 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 22 Oct 2017 18:02:27 -0700 Subject: timer: Remove setup_*timer() interface With all callers converted to timer_setup(), the old setup_*timer() interface can be removed. Cc: Thomas Gleixner Signed-off-by: Kees Cook --- include/linux/timer.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index a58097ea7cfc..47615dca4c5c 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -131,23 +131,6 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, (_timer)->data = (_data); \ } while (0) -#define setup_timer(timer, fn, data) \ - __setup_timer((timer), (fn), (data), 0) -#define setup_pinned_timer(timer, fn, data) \ - __setup_timer((timer), (fn), (data), TIMER_PINNED) -#define setup_deferrable_timer(timer, fn, data) \ - __setup_timer((timer), (fn), (data), TIMER_DEFERRABLE) -#define setup_pinned_deferrable_timer(timer, fn, data) \ - __setup_timer((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED) -#define setup_timer_on_stack(timer, fn, data) \ - __setup_timer_on_stack((timer), (fn), (data), 0) -#define setup_pinned_timer_on_stack(timer, fn, data) \ - __setup_timer_on_stack((timer), (fn), (data), TIMER_PINNED) -#define setup_deferrable_timer_on_stack(timer, fn, data) \ - __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE) -#define setup_pinned_deferrable_timer_on_stack(timer, fn, data) \ - __setup_timer_on_stack((timer), (fn), (data), TIMER_DEFERRABLE | TIMER_PINNED) - #ifndef CONFIG_LOCKDEP static inline void timer_setup(struct timer_list *timer, void (*callback)(struct timer_list *), -- cgit v1.2.3 From c1eba5bcb6430868427e0b9d1cd1205a07302f06 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 22 Oct 2017 18:18:19 -0700 Subject: timer: Pass timer_list pointer to callbacks unconditionally Now that all timer callbacks are already taking their struct timer_list pointer as the callback argument, just do this unconditionally and remove the .data field. Cc: Thomas Gleixner Cc: John Stultz Cc: Stephen Boyd Signed-off-by: Kees Cook --- include/linux/timer.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 47615dca4c5c..20a6e7af5fd6 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -18,7 +18,6 @@ struct timer_list { struct hlist_node entry; unsigned long expires; void (*function)(unsigned long); - unsigned long data; u32 flags; #ifdef CONFIG_LOCKDEP @@ -70,7 +69,6 @@ struct timer_list { #define __TIMER_INITIALIZER(_function, _data, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ - .data = (_data), \ .flags = (_flags), \ __TIMER_LOCKDEP_MAP_INITIALIZER( \ __FILE__ ":" __stringify(__LINE__)) \ @@ -121,14 +119,12 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, do { \ __init_timer((_timer), (_flags)); \ (_timer)->function = (_fn); \ - (_timer)->data = (_data); \ } while (0) #define __setup_timer_on_stack(_timer, _fn, _data, _flags) \ do { \ __init_timer_on_stack((_timer), (_flags)); \ (_timer)->function = (_fn); \ - (_timer)->data = (_data); \ } while (0) #ifndef CONFIG_LOCKDEP -- cgit v1.2.3 From 354b46b1a0adda1dd5b7f0bc2a5604cca091be5f Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 22 Oct 2017 19:15:40 -0700 Subject: timer: Switch callback prototype to take struct timer_list * argument Since all callbacks have been converted, we can switch the core prototype to "struct timer_list *" now too. Cc: Thomas Gleixner Cc: John Stultz Cc: Stephen Boyd Signed-off-by: Kees Cook --- include/linux/timer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index 20a6e7af5fd6..a6d04fb72c9e 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -17,7 +17,7 @@ struct timer_list { */ struct hlist_node entry; unsigned long expires; - void (*function)(unsigned long); + void (*function)(struct timer_list *); u32 flags; #ifdef CONFIG_LOCKDEP @@ -63,7 +63,7 @@ struct timer_list { #define TIMER_TRACE_FLAGMASK (TIMER_MIGRATING | TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE) -#define TIMER_DATA_TYPE unsigned long +#define TIMER_DATA_TYPE struct timer_list * #define TIMER_FUNC_TYPE void (*)(TIMER_DATA_TYPE) #define __TIMER_INITIALIZER(_function, _data, _flags) { \ -- cgit v1.2.3 From 1fe66ba572b455270dc35a2c099dd7328cec9e4c Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 22 Oct 2017 18:22:50 -0700 Subject: timer: Remove unused data arguments from macros With the .data field removed, the ignored data arguments in timer macros can be removed. Cc: Thomas Gleixner Cc: Tejun Heo Cc: Lai Jiangshan Cc: Jens Axboe Cc: Andrew Morton Cc: Shaohua Li Signed-off-by: Kees Cook --- include/linux/kthread.h | 2 -- include/linux/timer.h | 18 ++++++++---------- include/linux/workqueue.h | 3 --- 3 files changed, 8 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index 3203e36b2ee8..b855c5b72b26 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -119,7 +119,6 @@ struct kthread_delayed_work { #define KTHREAD_DELAYED_WORK_INIT(dwork, fn) { \ .work = KTHREAD_WORK_INIT((dwork).work, (fn)), \ .timer = __TIMER_INITIALIZER((TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn,\ - (TIMER_DATA_TYPE)&(dwork.timer), \ TIMER_IRQSAFE), \ } @@ -167,7 +166,6 @@ extern void __kthread_init_worker(struct kthread_worker *worker, kthread_init_work(&(dwork)->work, (fn)); \ __setup_timer(&(dwork)->timer, \ (TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn,\ - (TIMER_DATA_TYPE)&(dwork)->timer, \ TIMER_IRQSAFE); \ } while (0) diff --git a/include/linux/timer.h b/include/linux/timer.h index a6d04fb72c9e..e6bab51db13d 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -66,7 +66,7 @@ struct timer_list { #define TIMER_DATA_TYPE struct timer_list * #define TIMER_FUNC_TYPE void (*)(TIMER_DATA_TYPE) -#define __TIMER_INITIALIZER(_function, _data, _flags) { \ +#define __TIMER_INITIALIZER(_function, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ .flags = (_flags), \ @@ -76,7 +76,7 @@ struct timer_list { #define DEFINE_TIMER(_name, _function) \ struct timer_list _name = \ - __TIMER_INITIALIZER((TIMER_FUNC_TYPE)_function, 0, 0) + __TIMER_INITIALIZER((TIMER_FUNC_TYPE)_function, 0) void init_timer_key(struct timer_list *timer, unsigned int flags, const char *name, struct lock_class_key *key); @@ -115,13 +115,13 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, init_timer_on_stack_key((_timer), (_flags), NULL, NULL) #endif -#define __setup_timer(_timer, _fn, _data, _flags) \ +#define __setup_timer(_timer, _fn, _flags) \ do { \ __init_timer((_timer), (_flags)); \ (_timer)->function = (_fn); \ } while (0) -#define __setup_timer_on_stack(_timer, _fn, _data, _flags) \ +#define __setup_timer_on_stack(_timer, _fn, _flags) \ do { \ __init_timer_on_stack((_timer), (_flags)); \ (_timer)->function = (_fn); \ @@ -132,16 +132,14 @@ static inline void timer_setup(struct timer_list *timer, void (*callback)(struct timer_list *), unsigned int flags) { - __setup_timer(timer, (TIMER_FUNC_TYPE)callback, - (TIMER_DATA_TYPE)timer, flags); + __setup_timer(timer, (TIMER_FUNC_TYPE)callback, flags); } static inline void timer_setup_on_stack(struct timer_list *timer, void (*callback)(struct timer_list *), unsigned int flags) { - __setup_timer_on_stack(timer, (TIMER_FUNC_TYPE)callback, - (TIMER_DATA_TYPE)timer, flags); + __setup_timer_on_stack(timer, (TIMER_FUNC_TYPE)callback, flags); } #else /* @@ -151,11 +149,11 @@ static inline void timer_setup_on_stack(struct timer_list *timer, */ # define timer_setup(timer, callback, flags) \ __setup_timer((timer), (TIMER_FUNC_TYPE)(callback), \ - (TIMER_DATA_TYPE)(timer), (flags)) + (flags)) # define timer_setup_on_stack(timer, callback, flags) \ __setup_timer_on_stack((timer), \ (TIMER_FUNC_TYPE)(callback), \ - (TIMER_DATA_TYPE)(timer), (flags)) + (flags)) #endif #define from_timer(var, callback_timer, timer_fieldname) \ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 01a050fc6650..8d11580237f5 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -177,7 +177,6 @@ struct execute_work { #define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ .timer = __TIMER_INITIALIZER((TIMER_FUNC_TYPE)delayed_work_timer_fn,\ - (TIMER_DATA_TYPE)&(n.timer), \ (tflags) | TIMER_IRQSAFE), \ } @@ -244,7 +243,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } INIT_WORK(&(_work)->work, (_func)); \ __setup_timer(&(_work)->timer, \ (TIMER_FUNC_TYPE)delayed_work_timer_fn, \ - (TIMER_DATA_TYPE)&(_work)->timer, \ (_tflags) | TIMER_IRQSAFE); \ } while (0) @@ -253,7 +251,6 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ __setup_timer_on_stack(&(_work)->timer, \ (TIMER_FUNC_TYPE)delayed_work_timer_fn,\ - (TIMER_DATA_TYPE)&(_work)->timer,\ (_tflags) | TIMER_IRQSAFE); \ } while (0) -- cgit v1.2.3 From 188665b2d67db8953899551d1a9d4481b2a0ac60 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 22 Oct 2017 18:14:46 -0700 Subject: timer: Pass function down to initialization routines In preparation for removing more macros, pass the function down to the initialization routines instead of doing it in macros. Cc: Thomas Gleixner Cc: John Stultz Cc: Stephen Boyd Signed-off-by: Kees Cook --- include/linux/timer.h | 33 ++++++++++++++++++--------------- 1 file changed, 18 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/timer.h b/include/linux/timer.h index e6bab51db13d..aff73b1c8f7b 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -78,53 +78,56 @@ struct timer_list { struct timer_list _name = \ __TIMER_INITIALIZER((TIMER_FUNC_TYPE)_function, 0) -void init_timer_key(struct timer_list *timer, unsigned int flags, +void init_timer_key(struct timer_list *timer, + void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); #ifdef CONFIG_DEBUG_OBJECTS_TIMERS extern void init_timer_on_stack_key(struct timer_list *timer, + void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); extern void destroy_timer_on_stack(struct timer_list *timer); #else static inline void destroy_timer_on_stack(struct timer_list *timer) { } static inline void init_timer_on_stack_key(struct timer_list *timer, - unsigned int flags, const char *name, + void (*func)(struct timer_list *), + unsigned int flags, + const char *name, struct lock_class_key *key) { - init_timer_key(timer, flags, name, key); + init_timer_key(timer, func, flags, name, key); } #endif #ifdef CONFIG_LOCKDEP -#define __init_timer(_timer, _flags) \ +#define __init_timer(_timer, _fn, _flags) \ do { \ static struct lock_class_key __key; \ - init_timer_key((_timer), (_flags), #_timer, &__key); \ + init_timer_key((_timer), (_fn), (_flags), #_timer, &__key);\ } while (0) -#define __init_timer_on_stack(_timer, _flags) \ +#define __init_timer_on_stack(_timer, _fn, _flags) \ do { \ static struct lock_class_key __key; \ - init_timer_on_stack_key((_timer), (_flags), #_timer, &__key); \ + init_timer_on_stack_key((_timer), (_fn), (_flags), \ + #_timer, &__key); \ } while (0) #else -#define __init_timer(_timer, _flags) \ - init_timer_key((_timer), (_flags), NULL, NULL) -#define __init_timer_on_stack(_timer, _flags) \ - init_timer_on_stack_key((_timer), (_flags), NULL, NULL) +#define __init_timer(_timer, _fn, _flags) \ + init_timer_key((_timer), (_fn), (_flags), NULL, NULL) +#define __init_timer_on_stack(_timer, _fn, _flags) \ + init_timer_on_stack_key((_timer), (_fn), (_flags), NULL, NULL) #endif #define __setup_timer(_timer, _fn, _flags) \ do { \ - __init_timer((_timer), (_flags)); \ - (_timer)->function = (_fn); \ + __init_timer((_timer), (_fn), (_flags)); \ } while (0) #define __setup_timer_on_stack(_timer, _fn, _flags) \ do { \ - __init_timer_on_stack((_timer), (_flags)); \ - (_timer)->function = (_fn); \ + __init_timer_on_stack((_timer), (_fn), (_flags)); \ } while (0) #ifndef CONFIG_LOCKDEP -- cgit v1.2.3 From 919b250f8570618e84af544c3e18dad5210eb9b6 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Sun, 22 Oct 2017 18:48:43 -0700 Subject: timer: Remove redundant __setup_timer*() macros With __init_timer*() now matching __setup_timer*(), remove the redundant internal interface, clean up the resulting definitions and add more documentation. Cc: Thomas Gleixner Cc: Tejun Heo Cc: Lai Jiangshan Cc: Shaohua Li Cc: Jens Axboe Cc: Andrew Morton Signed-off-by: Kees Cook --- include/linux/kthread.h | 6 ++--- include/linux/timer.h | 56 +++++++++++++++++------------------------------ include/linux/workqueue.h | 12 +++++----- 3 files changed, 29 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index b855c5b72b26..dc850d257ea2 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -164,9 +164,9 @@ extern void __kthread_init_worker(struct kthread_worker *worker, #define kthread_init_delayed_work(dwork, fn) \ do { \ kthread_init_work(&(dwork)->work, (fn)); \ - __setup_timer(&(dwork)->timer, \ - (TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn,\ - TIMER_IRQSAFE); \ + __init_timer(&(dwork)->timer, \ + kthread_delayed_work_timer_fn, \ + TIMER_IRQSAFE); \ } while (0) int kthread_worker_fn(void *worker_ptr); diff --git a/include/linux/timer.h b/include/linux/timer.h index aff73b1c8f7b..b1ae64b112c2 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -78,6 +78,9 @@ struct timer_list { struct timer_list _name = \ __TIMER_INITIALIZER((TIMER_FUNC_TYPE)_function, 0) +/* + * LOCKDEP and DEBUG timer interfaces. + */ void init_timer_key(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); @@ -87,9 +90,7 @@ extern void init_timer_on_stack_key(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, const char *name, struct lock_class_key *key); -extern void destroy_timer_on_stack(struct timer_list *timer); #else -static inline void destroy_timer_on_stack(struct timer_list *timer) { } static inline void init_timer_on_stack_key(struct timer_list *timer, void (*func)(struct timer_list *), unsigned int flags, @@ -120,43 +121,26 @@ static inline void init_timer_on_stack_key(struct timer_list *timer, init_timer_on_stack_key((_timer), (_fn), (_flags), NULL, NULL) #endif -#define __setup_timer(_timer, _fn, _flags) \ - do { \ - __init_timer((_timer), (_fn), (_flags)); \ - } while (0) - -#define __setup_timer_on_stack(_timer, _fn, _flags) \ - do { \ - __init_timer_on_stack((_timer), (_fn), (_flags)); \ - } while (0) +/** + * timer_setup - prepare a timer for first use + * @timer: the timer in question + * @callback: the function to call when timer expires + * @flags: any TIMER_* flags + * + * Regular timer initialization should use either DEFINE_TIMER() above, + * or timer_setup(). For timers on the stack, timer_setup_on_stack() must + * be used and must be balanced with a call to destroy_timer_on_stack(). + */ +#define timer_setup(timer, callback, flags) \ + __init_timer((timer), (callback), (flags)) -#ifndef CONFIG_LOCKDEP -static inline void timer_setup(struct timer_list *timer, - void (*callback)(struct timer_list *), - unsigned int flags) -{ - __setup_timer(timer, (TIMER_FUNC_TYPE)callback, flags); -} +#define timer_setup_on_stack(timer, callback, flags) \ + __init_timer_on_stack((timer), (callback), (flags)) -static inline void timer_setup_on_stack(struct timer_list *timer, - void (*callback)(struct timer_list *), - unsigned int flags) -{ - __setup_timer_on_stack(timer, (TIMER_FUNC_TYPE)callback, flags); -} +#ifdef CONFIG_DEBUG_OBJECTS_TIMERS +extern void destroy_timer_on_stack(struct timer_list *timer); #else -/* - * Under LOCKDEP, the timer lock_class_key (set up in __init_timer) needs - * to be tied to the caller's context, so an inline (above) won't work. We - * do want to keep the inline for argument type checking, though. - */ -# define timer_setup(timer, callback, flags) \ - __setup_timer((timer), (TIMER_FUNC_TYPE)(callback), \ - (flags)) -# define timer_setup_on_stack(timer, callback, flags) \ - __setup_timer_on_stack((timer), \ - (TIMER_FUNC_TYPE)(callback), \ - (flags)) +static inline void destroy_timer_on_stack(struct timer_list *timer) { } #endif #define from_timer(var, callback_timer, timer_fieldname) \ diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 8d11580237f5..bff39faba793 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -241,17 +241,17 @@ static inline unsigned int work_static(struct work_struct *work) { return 0; } #define __INIT_DELAYED_WORK(_work, _func, _tflags) \ do { \ INIT_WORK(&(_work)->work, (_func)); \ - __setup_timer(&(_work)->timer, \ - (TIMER_FUNC_TYPE)delayed_work_timer_fn, \ - (_tflags) | TIMER_IRQSAFE); \ + __init_timer(&(_work)->timer, \ + delayed_work_timer_fn, \ + (_tflags) | TIMER_IRQSAFE); \ } while (0) #define __INIT_DELAYED_WORK_ONSTACK(_work, _func, _tflags) \ do { \ INIT_WORK_ONSTACK(&(_work)->work, (_func)); \ - __setup_timer_on_stack(&(_work)->timer, \ - (TIMER_FUNC_TYPE)delayed_work_timer_fn,\ - (_tflags) | TIMER_IRQSAFE); \ + __init_timer_on_stack(&(_work)->timer, \ + delayed_work_timer_fn, \ + (_tflags) | TIMER_IRQSAFE); \ } while (0) #define INIT_DELAYED_WORK(_work, _func) \ -- cgit v1.2.3 From 841b86f3289dbe858daeceec36423d4ea286fac2 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Mon, 23 Oct 2017 09:40:42 +0200 Subject: treewide: Remove TIMER_FUNC_TYPE and TIMER_DATA_TYPE casts With all callbacks converted, and the timer callback prototype switched over, the TIMER_FUNC_TYPE cast is no longer needed, so remove it. Conversion was done with the following scripts: perl -pi -e 's|\(TIMER_FUNC_TYPE\)||g' \ $(git grep TIMER_FUNC_TYPE | cut -d: -f1 | sort -u) perl -pi -e 's|\(TIMER_DATA_TYPE\)||g' \ $(git grep TIMER_DATA_TYPE | cut -d: -f1 | sort -u) The now unused macros are also dropped from include/linux/timer.h. Signed-off-by: Kees Cook --- include/linux/kthread.h | 2 +- include/linux/timer.h | 5 +---- include/linux/workqueue.h | 2 +- 3 files changed, 3 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kthread.h b/include/linux/kthread.h index dc850d257ea2..c1961761311d 100644 --- a/include/linux/kthread.h +++ b/include/linux/kthread.h @@ -118,7 +118,7 @@ struct kthread_delayed_work { #define KTHREAD_DELAYED_WORK_INIT(dwork, fn) { \ .work = KTHREAD_WORK_INIT((dwork).work, (fn)), \ - .timer = __TIMER_INITIALIZER((TIMER_FUNC_TYPE)kthread_delayed_work_timer_fn,\ + .timer = __TIMER_INITIALIZER(kthread_delayed_work_timer_fn,\ TIMER_IRQSAFE), \ } diff --git a/include/linux/timer.h b/include/linux/timer.h index b1ae64b112c2..04af640ea95b 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -63,9 +63,6 @@ struct timer_list { #define TIMER_TRACE_FLAGMASK (TIMER_MIGRATING | TIMER_DEFERRABLE | TIMER_PINNED | TIMER_IRQSAFE) -#define TIMER_DATA_TYPE struct timer_list * -#define TIMER_FUNC_TYPE void (*)(TIMER_DATA_TYPE) - #define __TIMER_INITIALIZER(_function, _flags) { \ .entry = { .next = TIMER_ENTRY_STATIC }, \ .function = (_function), \ @@ -76,7 +73,7 @@ struct timer_list { #define DEFINE_TIMER(_name, _function) \ struct timer_list _name = \ - __TIMER_INITIALIZER((TIMER_FUNC_TYPE)_function, 0) + __TIMER_INITIALIZER(_function, 0) /* * LOCKDEP and DEBUG timer interfaces. diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index bff39faba793..4a54ef96aff5 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -176,7 +176,7 @@ struct execute_work { #define __DELAYED_WORK_INITIALIZER(n, f, tflags) { \ .work = __WORK_INITIALIZER((n).work, (f)), \ - .timer = __TIMER_INITIALIZER((TIMER_FUNC_TYPE)delayed_work_timer_fn,\ + .timer = __TIMER_INITIALIZER(delayed_work_timer_fn,\ (tflags) | TIMER_IRQSAFE), \ } -- cgit v1.2.3 From 860dd4424f344400b491b212ee4acb3a358ba9d9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 21 Nov 2017 14:23:37 +0100 Subject: scsi: dma-mapping: always provide dma_get_cache_alignment Provide the dummy version of dma_get_cache_alignment that always returns 1 even if CONFIG_HAS_DMA is not set, so that drivers and subsystems can use it without ifdefs. Cc: stable@vger.kernel.org Signed-off-by: Christoph Hellwig Signed-off-by: Martin K. Petersen --- include/linux/dma-mapping.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index eee1499db396..29cfd18360be 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -710,7 +710,6 @@ static inline void *dma_zalloc_coherent(struct device *dev, size_t size, return ret; } -#ifdef CONFIG_HAS_DMA static inline int dma_get_cache_alignment(void) { #ifdef ARCH_DMA_MINALIGN @@ -718,7 +717,6 @@ static inline int dma_get_cache_alignment(void) #endif return 1; } -#endif /* flags for the coherent memory api */ #define DMA_MEMORY_EXCLUSIVE 0x01 -- cgit v1.2.3 From db1ac4964fa172803a0fea83033cd35d380a8a77 Mon Sep 17 00:00:00 2001 From: Gianluca Borello Date: Wed, 22 Nov 2017 18:32:53 +0000 Subject: bpf: introduce ARG_PTR_TO_MEM_OR_NULL With the current ARG_PTR_TO_MEM/ARG_PTR_TO_UNINIT_MEM semantics, an helper argument can be NULL when the next argument type is ARG_CONST_SIZE_OR_ZERO and the verifier can prove the value of this next argument is 0. However, most helpers are just interested in handling , so forcing them to deal with makes the implementation of those helpers more complicated for no apparent benefits, requiring them to explicitly handle those corner cases with checks that bpf programs could start relying upon, preventing the possibility of removing them later. Solve this by making ARG_PTR_TO_MEM/ARG_PTR_TO_UNINIT_MEM never accept NULL even when ARG_CONST_SIZE_OR_ZERO is set, and introduce a new argument type ARG_PTR_TO_MEM_OR_NULL to explicitly deal with the NULL case. Currently, the only helper that needs this is bpf_csum_diff_proto(), so change arg1 and arg3 to this new type as well. Also add a new battery of tests that explicitly test the !ARG_PTR_TO_MEM_OR_NULL combination: all the current ones testing the various variations are focused on bpf_csum_diff, so cover also other helpers. Signed-off-by: Gianluca Borello Acked-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 76c577281d78..e55e4255a210 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -78,6 +78,7 @@ enum bpf_arg_type { * functions that access data on eBPF program stack */ ARG_PTR_TO_MEM, /* pointer to valid memory (stack, packet, map value) */ + ARG_PTR_TO_MEM_OR_NULL, /* pointer to valid memory or NULL */ ARG_PTR_TO_UNINIT_MEM, /* pointer to memory does not need to be initialized, * helper function must fill all bytes or clear * them in error case. -- cgit v1.2.3 From c131187db2d3fa2f8bf32fdf4e9a4ef805168467 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 22 Nov 2017 16:42:05 -0800 Subject: bpf: fix branch pruning logic when the verifier detects that register contains a runtime constant and it's compared with another constant it will prune exploration of the branch that is guaranteed not to be taken at runtime. This is all correct, but malicious program may be constructed in such a way that it always has a constant comparison and the other branch is never taken under any conditions. In this case such path through the program will not be explored by the verifier. It won't be taken at run-time either, but since all instructions are JITed the malicious program may cause JITs to complain about using reserved fields, etc. To fix the issue we have to track the instructions explored by the verifier and sanitize instructions that are dead at run time with NOPs. We cannot reject such dead code, since llvm generates it for valid C code, since it doesn't do as much data flow analysis as the verifier does. Fixes: 17a5267067f3 ("bpf: verifier (add verifier core)") Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index b61482d354a2..c561b986bab0 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -115,7 +115,7 @@ struct bpf_insn_aux_data { struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ - int converted_op_size; /* the valid value width after perceived conversion */ + bool seen; /* this insn was processed by the verifier */ }; #define MAX_USED_MAPS 64 /* max number of maps accessed by one eBPF program */ -- cgit v1.2.3 From 0c19f846d582af919db66a5914a0189f9f92c936 Mon Sep 17 00:00:00 2001 From: Willem de Bruijn Date: Tue, 21 Nov 2017 10:22:25 -0500 Subject: net: accept UFO datagrams from tuntap and packet Tuntap and similar devices can inject GSO packets. Accept type VIRTIO_NET_HDR_GSO_UDP, even though not generating UFO natively. Processes are expected to use feature negotiation such as TUNSETOFFLOAD to detect supported offload types and refrain from injecting other packets. This process breaks down with live migration: guest kernels do not renegotiate flags, so destination hosts need to expose all features that the source host does. Partially revert the UFO removal from 182e0b6b5846~1..d9d30adf5677. This patch introduces nearly(*) no new code to simplify verification. It brings back verbatim tuntap UFO negotiation, VIRTIO_NET_HDR_GSO_UDP insertion and software UFO segmentation. It does not reinstate protocol stack support, hardware offload (NETIF_F_UFO), SKB_GSO_UDP tunneling in SKB_GSO_SOFTWARE or reception of VIRTIO_NET_HDR_GSO_UDP packets in tuntap. To support SKB_GSO_UDP reappearing in the stack, also reinstate logic in act_csum and openvswitch. Achieve equivalence with v4.13 HEAD by squashing in commit 939912216fa8 ("net: skb_needs_check() removes CHECKSUM_UNNECESSARY check for tx.") and reverting commit 8d63bee643f1 ("net: avoid skb_warn_bad_offload false positives on UFO"). (*) To avoid having to bring back skb_shinfo(skb)->ip6_frag_id, ipv6_proxy_select_ident is changed to return a __be32 and this is assigned directly to the frag_hdr. Also, SKB_GSO_UDP is inserted at the end of the enum to minimize code churn. Tested Booted a v4.13 guest kernel with QEMU. On a host kernel before this patch `ethtool -k eth0` shows UFO disabled. After the patch, it is enabled, same as on a v4.13 host kernel. A UFO packet sent from the guest appears on the tap device: host: nc -l -p -u 8000 & tcpdump -n -i tap0 guest: dd if=/dev/zero of=payload.txt bs=1 count=2000 nc -u 192.16.1.1 8000 < payload.txt Direct tap to tap transmission of VIRTIO_NET_HDR_GSO_UDP succeeds, packets arriving fragmented: ./with_tap_pair.sh ./tap_send_ufo tap0 tap1 (from https://github.com/wdebruij/kerneltools/tree/master/tests) Changes v1 -> v2 - simplified set_offload change (review comment) - documented test procedure Link: http://lkml.kernel.org/r/ Fixes: fb652fdfe837 ("macvlan/macvtap: Remove NETIF_F_UFO advertisement.") Reported-by: Michal Kubecek Signed-off-by: Willem de Bruijn Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 4 +++- include/linux/netdevice.h | 1 + include/linux/skbuff.h | 2 ++ include/linux/virtio_net.h | 5 ++++- 4 files changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index dc8b4896b77b..b1b0ca7ccb2b 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -54,8 +54,9 @@ enum { NETIF_F_GSO_TUNNEL_REMCSUM_BIT, /* ... TUNNEL with TSO & REMCSUM */ NETIF_F_GSO_SCTP_BIT, /* ... SCTP fragmentation */ NETIF_F_GSO_ESP_BIT, /* ... ESP with TSO */ + NETIF_F_GSO_UDP_BIT, /* ... UFO, deprecated except tuntap */ /**/NETIF_F_GSO_LAST = /* last bit, see GSO_MASK */ - NETIF_F_GSO_ESP_BIT, + NETIF_F_GSO_UDP_BIT, NETIF_F_FCOE_CRC_BIT, /* FCoE CRC32 */ NETIF_F_SCTP_CRC_BIT, /* SCTP checksum offload */ @@ -132,6 +133,7 @@ enum { #define NETIF_F_GSO_TUNNEL_REMCSUM __NETIF_F(GSO_TUNNEL_REMCSUM) #define NETIF_F_GSO_SCTP __NETIF_F(GSO_SCTP) #define NETIF_F_GSO_ESP __NETIF_F(GSO_ESP) +#define NETIF_F_GSO_UDP __NETIF_F(GSO_UDP) #define NETIF_F_HW_VLAN_STAG_FILTER __NETIF_F(HW_VLAN_STAG_FILTER) #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 6b274bfe489f..ef789e1d679e 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4140,6 +4140,7 @@ static inline bool net_gso_ok(netdev_features_t features, int gso_type) BUILD_BUG_ON(SKB_GSO_TUNNEL_REMCSUM != (NETIF_F_GSO_TUNNEL_REMCSUM >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_SCTP != (NETIF_F_GSO_SCTP >> NETIF_F_GSO_SHIFT)); BUILD_BUG_ON(SKB_GSO_ESP != (NETIF_F_GSO_ESP >> NETIF_F_GSO_SHIFT)); + BUILD_BUG_ON(SKB_GSO_UDP != (NETIF_F_GSO_UDP >> NETIF_F_GSO_SHIFT)); return (features & feature) == feature; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index ed06e1c28fc7..bc486ef23f20 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -568,6 +568,8 @@ enum { SKB_GSO_SCTP = 1 << 14, SKB_GSO_ESP = 1 << 15, + + SKB_GSO_UDP = 1 << 16, }; #if BITS_PER_LONG > 32 diff --git a/include/linux/virtio_net.h b/include/linux/virtio_net.h index 210034c896e3..f144216febc6 100644 --- a/include/linux/virtio_net.h +++ b/include/linux/virtio_net.h @@ -9,7 +9,7 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, const struct virtio_net_hdr *hdr, bool little_endian) { - unsigned short gso_type = 0; + unsigned int gso_type = 0; if (hdr->gso_type != VIRTIO_NET_HDR_GSO_NONE) { switch (hdr->gso_type & ~VIRTIO_NET_HDR_GSO_ECN) { @@ -19,6 +19,9 @@ static inline int virtio_net_hdr_to_skb(struct sk_buff *skb, case VIRTIO_NET_HDR_GSO_TCPV6: gso_type = SKB_GSO_TCPV6; break; + case VIRTIO_NET_HDR_GSO_UDP: + gso_type = SKB_GSO_UDP; + break; default: return -EINVAL; } -- cgit v1.2.3 From fd2fa6c18b729e19c51240453a521f76c766247e Mon Sep 17 00:00:00 2001 From: Bjorn Helgaas Date: Wed, 22 Nov 2017 16:13:37 -0600 Subject: x86/PCI: Remove unused HyperTransport interrupt support There are no in-tree callers of ht_create_irq(), the driver interface for HyperTransport interrupts, left. Remove the unused entry point and all the supporting code. See 8b955b0dddb3 ("[PATCH] Initial generic hypertransport interrupt support"). Signed-off-by: Bjorn Helgaas Signed-off-by: Thomas Gleixner Acked-by: "Eric W. Biederman" Cc: Andi Kleen Cc: Greg Kroah-Hartman Cc: linux-pci@vger.kernel.org Cc: Benjamin Herrenschmidt Link: https://lkml.kernel.org/r/20171122221337.3877.23362.stgit@bhelgaas-glaptop.roam.corp.google.com --- include/linux/htirq.h | 39 --------------------------------------- include/linux/pci.h | 6 ------ 2 files changed, 45 deletions(-) delete mode 100644 include/linux/htirq.h (limited to 'include/linux') diff --git a/include/linux/htirq.h b/include/linux/htirq.h deleted file mode 100644 index 127c39d815ba..000000000000 --- a/include/linux/htirq.h +++ /dev/null @@ -1,39 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -#ifndef LINUX_HTIRQ_H -#define LINUX_HTIRQ_H - -struct pci_dev; -struct irq_data; - -struct ht_irq_msg { - u32 address_lo; /* low 32 bits of the ht irq message */ - u32 address_hi; /* high 32 bits of the it irq message */ -}; - -typedef void (ht_irq_update_t)(struct pci_dev *dev, int irq, - struct ht_irq_msg *msg); - -struct ht_irq_cfg { - struct pci_dev *dev; - /* Update callback used to cope with buggy hardware */ - ht_irq_update_t *update; - unsigned pos; - unsigned idx; - struct ht_irq_msg msg; -}; - -/* Helper functions.. */ -void fetch_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); -void write_ht_irq_msg(unsigned int irq, struct ht_irq_msg *msg); -void mask_ht_irq(struct irq_data *data); -void unmask_ht_irq(struct irq_data *data); - -/* The arch hook for getting things started */ -int arch_setup_ht_irq(int idx, int pos, struct pci_dev *dev, - ht_irq_update_t *update); -void arch_teardown_ht_irq(unsigned int irq); - -/* For drivers of buggy hardware */ -int __ht_create_irq(struct pci_dev *dev, int idx, ht_irq_update_t *update); - -#endif /* LINUX_HTIRQ_H */ diff --git a/include/linux/pci.h b/include/linux/pci.h index d16a7c037ec0..16287684dfe8 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1484,12 +1484,6 @@ static inline void pcie_set_ecrc_checking(struct pci_dev *dev) { } static inline void pcie_ecrc_get_policy(char *str) { } #endif -#ifdef CONFIG_HT_IRQ -/* The functions a driver should call */ -int ht_create_irq(struct pci_dev *dev, int idx); -void ht_destroy_irq(unsigned int irq); -#endif /* CONFIG_HT_IRQ */ - #ifdef CONFIG_PCI_ATS /* Address Translation Service */ void pci_ats_init(struct pci_dev *dev); -- cgit v1.2.3 From 20b7035c66bacc909ae3ffe92c1a1ea7db99fe4f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jan=20H=2E=20Sch=C3=B6nherr?= Date: Fri, 24 Nov 2017 22:39:01 +0100 Subject: KVM: Let KVM_SET_SIGNAL_MASK work as advertised MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit KVM API says for the signal mask you set via KVM_SET_SIGNAL_MASK, that "any unblocked signal received [...] will cause KVM_RUN to return with -EINTR" and that "the signal will only be delivered if not blocked by the original signal mask". This, however, is only true, when the calling task has a signal handler registered for a signal. If not, signal evaluation is short-circuited for SIG_IGN and SIG_DFL, and the signal is either ignored without KVM_RUN returning or the whole process is terminated. Make KVM_SET_SIGNAL_MASK behave as advertised by utilizing logic similar to that in do_sigtimedwait() to avoid short-circuiting of signals. Signed-off-by: Jan H. Schönherr Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 2e754b7c282c..893d6d606cd0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -715,6 +715,9 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, unsigned long len); void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); +void kvm_sigset_activate(struct kvm_vcpu *vcpu); +void kvm_sigset_deactivate(struct kvm_vcpu *vcpu); + void kvm_vcpu_block(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From 1751e8a6cb935e555fcdbcb9ab4f0446e322ca3e Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 27 Nov 2017 13:05:09 -0800 Subject: Rename superblock flags (MS_xyz -> SB_xyz) This is a pure automated search-and-replace of the internal kernel superblock flags. The s_flags are now called SB_*, with the names and the values for the moment mirroring the MS_* flags that they're equivalent to. Note how the MS_xyz flags are the ones passed to the mount system call, while the SB_xyz flags are what we then use in sb->s_flags. The script to do this was: # places to look in; re security/*: it generally should *not* be # touched (that stuff parses mount(2) arguments directly), but # there are two places where we really deal with superblock flags. FILES="drivers/mtd drivers/staging/lustre fs ipc mm \ include/linux/fs.h include/uapi/linux/bfs_fs.h \ security/apparmor/apparmorfs.c security/apparmor/include/lib.h" # the list of MS_... constants SYMS="RDONLY NOSUID NODEV NOEXEC SYNCHRONOUS REMOUNT MANDLOCK \ DIRSYNC NOATIME NODIRATIME BIND MOVE REC VERBOSE SILENT \ POSIXACL UNBINDABLE PRIVATE SLAVE SHARED RELATIME KERNMOUNT \ I_VERSION STRICTATIME LAZYTIME SUBMOUNT NOREMOTELOCK NOSEC BORN \ ACTIVE NOUSER" SED_PROG= for i in $SYMS; do SED_PROG="$SED_PROG -e s/MS_$i/SB_$i/g"; done # we want files that contain at least one of MS_..., # with fs/namespace.c and fs/pnode.c excluded. L=$(for i in $SYMS; do git grep -w -l MS_$i $FILES; done| sort|uniq|grep -v '^fs/namespace.c'|grep -v '^fs/pnode.c') for f in $L; do sed -i $f $SED_PROG; done Requested-by: Al Viro Signed-off-by: Linus Torvalds --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2995a271ec46..bbd92da0946e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1872,7 +1872,7 @@ struct super_operations { */ #define __IS_FLG(inode, flg) ((inode)->i_sb->s_flags & (flg)) -static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & MS_RDONLY; } +static inline bool sb_rdonly(const struct super_block *sb) { return sb->s_flags & SB_RDONLY; } #define IS_RDONLY(inode) sb_rdonly((inode)->i_sb) #define IS_SYNC(inode) (__IS_FLG(inode, SB_SYNCHRONOUS) || \ ((inode)->i_flags & S_SYNC)) -- cgit v1.2.3 From d34971a65b72619508f49cd237283e92f1c329d5 Mon Sep 17 00:00:00 2001 From: Bhumika Goyal Date: Tue, 17 Oct 2017 18:14:23 +0200 Subject: sunrpc: make the function arg as const Make the struct cache_detail *tmpl argument of the function cache_create_net as const as it is only getting passed to kmemup having the argument as const void *. Add const to the prototype too. Signed-off-by: Bhumika Goyal Reviewed-by: Jeff Layton Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 270bad0e1bed..40d2822f0e2f 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -213,7 +213,7 @@ extern void __init cache_initialize(void); extern int cache_register_net(struct cache_detail *cd, struct net *net); extern void cache_unregister_net(struct cache_detail *cd, struct net *net); -extern struct cache_detail *cache_create_net(struct cache_detail *tmpl, struct net *net); +extern struct cache_detail *cache_create_net(const struct cache_detail *tmpl, struct net *net); extern void cache_destroy_net(struct cache_detail *cd, struct net *net); extern void sunrpc_init_cache_detail(struct cache_detail *cd); -- cgit v1.2.3 From f50caa9b517a2542ae9769fc17ad84110ae07c8b Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 14 Nov 2017 12:40:31 +0100 Subject: debugfs: fix debugfs_real_fops() build error Some drivers use debugfs_real_fops() even when CONFIG_DEBUG_FS is disabled, which now leads to a build error: In file included from include/linux/list.h:9:0, from include/linux/wait.h:7, from include/linux/wait_bit.h:8, from include/linux/fs.h:6, from drivers/net/wireless/broadcom/b43legacy/debugfs.c:26: drivers/net/wireless/broadcom/b43legacy/debugfs.c: In function 'b43legacy_debugfs_read': drivers/net/wireless/broadcom/b43legacy/debugfs.c:224:23: error: implicit declaration of function 'debugfs_real_fops'; did you mean 'debugfs_create_bool'? [-Werror=implicit-function-declaration] My first impulse was to add another 'static inline' dummy function returning NULL for it, which would work fine. However, most callers feed the pointer into container_of(), so it seems a little dangerous here. Since all the callers are inside of a read/write file operation that gets eliminated in this configuration, so having an 'extern' declaration seems better here. If it ever gets used in a dangerous way, that will now result in a link error. Fixes: 7c8d469877b1 ("debugfs: add support for more elaborate ->d_fsdata") Cc: Jakub Kicinski Cc: Simon Horman Signed-off-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index f36ecc2a5712..3b0ba54cc4d5 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -216,6 +216,8 @@ static inline void debugfs_remove(struct dentry *dentry) static inline void debugfs_remove_recursive(struct dentry *dentry) { } +const struct file_operations *debugfs_real_fops(const struct file *filp); + static inline int debugfs_file_get(struct dentry *dentry) { return 0; -- cgit v1.2.3 From fd00cf81a9a84776ba58e56bd042c726dcf75cf3 Mon Sep 17 00:00:00 2001 From: Johan Hovold Date: Fri, 3 Nov 2017 15:30:53 +0100 Subject: serdev: fix receive_buf return value when no callback The receive_buf callback is supposed to return the number of bytes processed and should specifically not return a negative errno. Due to missing sanity checks in the serdev tty-port controller, a driver not providing a receive_buf callback could cause the flush_to_ldisc() worker to spin in a tight loop when the tty buffer pointers are incremented with -EINVAL (-22). The missing sanity checks have now been added to the tty-port controller, but let's fix up the serdev-controller helper as well. Signed-off-by: Johan Hovold Signed-off-by: Greg Kroah-Hartman --- include/linux/serdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/serdev.h b/include/linux/serdev.h index e69402d4a8ae..d609e6dc5bad 100644 --- a/include/linux/serdev.h +++ b/include/linux/serdev.h @@ -184,7 +184,7 @@ static inline int serdev_controller_receive_buf(struct serdev_controller *ctrl, struct serdev_device *serdev = ctrl->serdev; if (!serdev || !serdev->ops->receive_buf) - return -EINVAL; + return 0; return serdev->ops->receive_buf(serdev, data, count); } -- cgit v1.2.3 From 7fa32e5ec28b1609abc0b797b58267f725fc3964 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Tue, 14 Nov 2017 06:53:33 -0700 Subject: Drivers: hv: vmbus: Fix a rescind issue The current rescind processing code will not correctly handle the case where the host immediately rescinds a channel that has been offerred. In this case, we could be blocked in the open call and since the channel is rescinded, the host will not respond and we could be blocked forever in the vmbus open call.i Fix this problem. Signed-off-by: K. Y. Srinivasan Cc: stable@vger.kernel.org Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index f3e97c5f94c9..6c9336626592 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -708,6 +708,7 @@ struct vmbus_channel { u8 monitor_bit; bool rescind; /* got rescind msg */ + struct completion rescind_event; u32 ringbuffer_gpadlhandle; -- cgit v1.2.3 From 668533dc0764b30c9dd2baf3ca800156f688326b Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 29 Nov 2017 10:30:13 -0800 Subject: kallsyms: take advantage of the new '%px' format The conditional kallsym hex printing used a special fixed-width '%lx' output (KALLSYM_FMT) in preparation for the hashing of %p, but that series ended up adding a %px specifier to help with the conversions. Use it, and avoid the "print pointer as an unsigned long" code. Signed-off-by: Linus Torvalds --- include/linux/kallsyms.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kallsyms.h b/include/linux/kallsyms.h index 708f337d780b..bd118a6c60cb 100644 --- a/include/linux/kallsyms.h +++ b/include/linux/kallsyms.h @@ -14,12 +14,6 @@ #define KSYM_SYMBOL_LEN (sizeof("%s+%#lx/%#lx [%s]") + (KSYM_NAME_LEN - 1) + \ 2*(BITS_PER_LONG*3/10) + (MODULE_NAME_LEN - 1) + 1) -#ifndef CONFIG_64BIT -# define KALLSYM_FMT "%08lx" -#else -# define KALLSYM_FMT "%016lx" -#endif - struct module; #ifdef CONFIG_KALLSYMS -- cgit v1.2.3 From 1501899a898dfb5477c55534bdfd734c046da06d Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:06 -0800 Subject: mm: fix device-dax pud write-faults triggered by get_user_pages() Currently only get_user_pages_fast() can safely handle the writable gup case due to its use of pud_access_permitted() to check whether the pud entry is writable. In the gup slow path pud_write() is used instead of pud_access_permitted() and to date it has been unimplemented, just calls BUG_ON(). kernel BUG at ./include/linux/hugetlb.h:244! [..] RIP: 0010:follow_devmap_pud+0x482/0x490 [..] Call Trace: follow_page_mask+0x28c/0x6e0 __get_user_pages+0xe4/0x6c0 get_user_pages_unlocked+0x130/0x1b0 get_user_pages_fast+0x89/0xb0 iov_iter_get_pages_alloc+0x114/0x4a0 nfs_direct_read_schedule_iovec+0xd2/0x350 ? nfs_start_io_direct+0x63/0x70 nfs_file_direct_read+0x1e0/0x250 nfs_file_read+0x90/0xc0 For now this just implements a simple check for the _PAGE_RW bit similar to pmd_write. However, this implies that the gup-slow-path check is missing the extra checks that the gup-fast-path performs with pud_access_permitted. Later patches will align all checks to use the 'access_permitted' helper if the architecture provides it. Note that the generic 'access_permitted' helper fallback is the simple _PAGE_RW check on architectures that do not define the 'access_permitted' helper(s). [dan.j.williams@intel.com: fix powerpc compile error] Link: http://lkml.kernel.org/r/151129126165.37405.16031785266675461397.stgit@dwillia2-desk3.amr.corp.intel.com Link: http://lkml.kernel.org/r/151043109938.2842.14834662818213616199.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: a00cc7d9dd93 ("mm, x86: add support for PUD-sized transparent hugepages") Signed-off-by: Dan Williams Reported-by: Stephen Rothwell Acked-by: Thomas Gleixner [x86] Cc: Kirill A. Shutemov Cc: Catalin Marinas Cc: "David S. Miller" Cc: Dave Hansen Cc: Will Deacon Cc: "H. Peter Anvin" Cc: Ingo Molnar Cc: Arnd Bergmann Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index fbf5b31d47ee..82a25880714a 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -239,14 +239,6 @@ static inline int pgd_write(pgd_t pgd) } #endif -#ifndef pud_write -static inline int pud_write(pud_t pud) -{ - BUG(); - return 0; -} -#endif - #define HUGETLB_ANON_FILE "anon_hugepage" enum { -- cgit v1.2.3 From 31383c6865a578834dd953d9dbc88e6b19fe3997 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:28 -0800 Subject: mm, hugetlbfs: introduce ->split() to vm_operations_struct Patch series "device-dax: fix unaligned munmap handling" When device-dax is operating in huge-page mode we want it to behave like hugetlbfs and fail attempts to split vmas into unaligned ranges. It would be messy to teach the munmap path about device-dax alignment constraints in the same (hstate) way that hugetlbfs communicates this constraint. Instead, these patches introduce a new ->split() vm operation. This patch (of 2): The device-dax interface has similar constraints as hugetlbfs in that it requires the munmap path to unmap in huge page aligned units. Rather than add more custom vma handling code in __split_vma() introduce a new vm operation to perform this vma specific check. Link: http://lkml.kernel.org/r/151130418135.4029.6783191281930729710.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: dee410792419 ("/dev/dax, core: file operations and dax-mmap") Signed-off-by: Dan Williams Cc: Jeff Moyer Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ee073146aaa7..b3b6a7e313e9 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -377,6 +377,7 @@ enum page_entry_size { struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); + int (*split)(struct vm_area_struct * area, unsigned long addr); int (*mremap)(struct vm_area_struct * area); int (*fault)(struct vm_fault *vmf); int (*huge_fault)(struct vm_fault *vmf, enum page_entry_size pe_size); -- cgit v1.2.3 From 2bb6d2837083de722bfdc369cb0d76ce188dd9b4 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 29 Nov 2017 16:10:35 -0800 Subject: mm: introduce get_user_pages_longterm Patch series "introduce get_user_pages_longterm()", v2. Here is a new get_user_pages api for cases where a driver intends to keep an elevated page count indefinitely. This is distinct from usages like iov_iter_get_pages where the elevated page counts are transient. The iov_iter_get_pages cases immediately turn around and submit the pages to a device driver which will put_page when the i/o operation completes (under kernel control). In the longterm case userspace is responsible for dropping the page reference at some undefined point in the future. This is untenable for filesystem-dax case where the filesystem is in control of the lifetime of the block / page and needs reasonable limits on how long it can wait for pages in a mapping to become idle. Fixing filesystems to actually wait for dax pages to be idle before blocks from a truncate/hole-punch operation are repurposed is saved for a later patch series. Also, allowing longterm registration of dax mappings is a future patch series that introduces a "map with lease" semantic where the kernel can revoke a lease and force userspace to drop its page references. I have also tagged these for -stable to purposely break cases that might assume that longterm memory registrations for filesystem-dax mappings were supported by the kernel. The behavior regression this policy change implies is one of the reasons we maintain the "dax enabled. Warning: EXPERIMENTAL, use at your own risk" notification when mounting a filesystem in dax mode. It is worth noting the device-dax interface does not suffer the same constraints since it does not support file space management operations like hole-punch. This patch (of 4): Until there is a solution to the dma-to-dax vs truncate problem it is not safe to allow long standing memory registrations against filesytem-dax vmas. Device-dax vmas do not have this problem and are explicitly allowed. This is temporary until a "memory registration with layout-lease" mechanism can be implemented for the affected sub-systems (RDMA and V4L2). [akpm@linux-foundation.org: use kcalloc()] Link: http://lkml.kernel.org/r/151068939435.7446.13560129395419350737.stgit@dwillia2-desk3.amr.corp.intel.com Fixes: 3565fce3a659 ("mm, x86: get_user_pages() for dax mappings") Signed-off-by: Dan Williams Suggested-by: Christoph Hellwig Cc: Doug Ledford Cc: Hal Rosenstock Cc: Inki Dae Cc: Jan Kara Cc: Jason Gunthorpe Cc: Jeff Moyer Cc: Joonyoung Shim Cc: Kyungmin Park Cc: Mauro Carvalho Chehab Cc: Mel Gorman Cc: Ross Zwisler Cc: Sean Hefty Cc: Seung-Woo Kim Cc: Vlastimil Babka Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 14 ++++++++++++++ include/linux/mm.h | 13 +++++++++++++ 2 files changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index bbd92da0946e..9dc498d16cc1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3194,6 +3194,20 @@ static inline bool vma_is_dax(struct vm_area_struct *vma) return vma->vm_file && IS_DAX(vma->vm_file->f_mapping->host); } +static inline bool vma_is_fsdax(struct vm_area_struct *vma) +{ + struct inode *inode; + + if (!vma->vm_file) + return false; + if (!vma_is_dax(vma)) + return false; + inode = file_inode(vma->vm_file); + if (inode->i_mode == S_IFCHR) + return false; /* device-dax */ + return true; +} + static inline int iocb_flags(struct file *file) { int res = 0; diff --git a/include/linux/mm.h b/include/linux/mm.h index b3b6a7e313e9..ea818ff739cd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1380,6 +1380,19 @@ long get_user_pages_locked(unsigned long start, unsigned long nr_pages, unsigned int gup_flags, struct page **pages, int *locked); long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, struct page **pages, unsigned int gup_flags); +#ifdef CONFIG_FS_DAX +long get_user_pages_longterm(unsigned long start, unsigned long nr_pages, + unsigned int gup_flags, struct page **pages, + struct vm_area_struct **vmas); +#else +static inline long get_user_pages_longterm(unsigned long start, + unsigned long nr_pages, unsigned int gup_flags, + struct page **pages, struct vm_area_struct **vmas) +{ + return get_user_pages(start, nr_pages, gup_flags, pages, vmas); +} +#endif /* CONFIG_FS_DAX */ + int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); -- cgit v1.2.3 From 40a899ed16486455f964e46d1af31fd4fded21c1 Mon Sep 17 00:00:00 2001 From: Zi Yan Date: Wed, 29 Nov 2017 16:11:12 -0800 Subject: mm: migrate: fix an incorrect call of prep_transhuge_page() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit In https://lkml.org/lkml/2017/11/20/411, Andrea reported that during memory hotplug/hot remove prep_transhuge_page() is called incorrectly on non-THP pages for migration, when THP is on but THP migration is not enabled. This leads to a bad state of target pages for migration. By inspecting the code, if called on a non-THP, prep_transhuge_page() will 1) change the value of the mapping of (page + 2), since it is used for THP deferred list; 2) change the lru value of (page + 1), since it is used for THP's dtor. Both can lead to data corruption of these two pages. Andrea said: "Pragmatically and from the point of view of the memory_hotplug subsys, the effect is a kernel crash when pages are being migrated during a memory hot remove offline and migration target pages are found in a bad state" This patch fixes it by only calling prep_transhuge_page() when we are certain that the target page is THP. Link: http://lkml.kernel.org/r/20171121021855.50525-1-zi.yan@sent.com Fixes: 8135d8926c08 ("mm: memory_hotplug: memory hotremove supports thp migration") Signed-off-by: Zi Yan Reported-by: Andrea Reale Cc: Naoya Horiguchi Cc: Michal Hocko Cc: "Jérôme Glisse" Cc: [4.14] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/migrate.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/migrate.h b/include/linux/migrate.h index 895ec0c4942e..a2246cf670ba 100644 --- a/include/linux/migrate.h +++ b/include/linux/migrate.h @@ -54,7 +54,7 @@ static inline struct page *new_page_nodemask(struct page *page, new_page = __alloc_pages_nodemask(gfp_mask, order, preferred_nid, nodemask); - if (new_page && PageTransHuge(page)) + if (new_page && PageTransHuge(new_page)) prep_transhuge_page(new_page); return new_page; -- cgit v1.2.3 From 5d38f049cee1e1c4a7ac55aa79d37d01ddcc3860 Mon Sep 17 00:00:00 2001 From: Ian Kent Date: Wed, 29 Nov 2017 16:11:26 -0800 Subject: autofs: revert "autofs: fix AT_NO_AUTOMOUNT not being honored" Commit 42f461482178 ("autofs: fix AT_NO_AUTOMOUNT not being honored") allowed the fstatat(2) system call to properly honor the AT_NO_AUTOMOUNT flag but introduced a semantic change. In order to honor AT_NO_AUTOMOUNT a semantic change was made to the negative dentry case for stat family system calls in follow_automount(). This changed the unconditional triggering of an automount in this case to no longer be done and an error returned instead. This has caused more problems than I expected so reverting the change is needed. In a discussion with Neil Brown it was concluded that the automount(8) daemon can implement this change without kernel modifications. So that will be done instead and the autofs module documentation updated with a description of the problem and what needs to be done by module users for this specific case. Link: http://lkml.kernel.org/r/151174730120.6162.3848002191530283984.stgit@pluto.themaw.net Fixes: 42f4614821 ("autofs: fix AT_NO_AUTOMOUNT not being honored") Signed-off-by: Ian Kent Cc: Neil Brown Cc: Al Viro Cc: David Howells Cc: Colin Walters Cc: Ondrej Holy Cc: [4.11+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9dc498d16cc1..511fbaabf624 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3088,7 +3088,8 @@ static inline int vfs_lstat(const char __user *name, struct kstat *stat) static inline int vfs_fstatat(int dfd, const char __user *filename, struct kstat *stat, int flags) { - return vfs_statx(dfd, filename, flags, stat, STATX_BASIC_STATS); + return vfs_statx(dfd, filename, flags | AT_NO_AUTOMOUNT, + stat, STATX_BASIC_STATS); } static inline int vfs_fstat(int fd, struct kstat *stat) { -- cgit v1.2.3 From f8821f96ae97260d68228fe53f81848b2ede44d7 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Nov 2017 14:33:56 +0100 Subject: skbuff: Grammar s/are can/can/, s/change/changes/ Signed-off-by: Geert Uytterhoeven Signed-off-by: David S. Miller --- include/linux/skbuff.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index bc486ef23f20..a38c80e9f91e 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1406,8 +1406,7 @@ static inline struct sk_buff *skb_get(struct sk_buff *skb) } /* - * If users == 1, we are the only owner and are can avoid redundant - * atomic change. + * If users == 1, we are the only owner and can avoid redundant atomic changes. */ /** -- cgit v1.2.3 From 8d26fdfcb45dc420115b267ac9d6b3ac13457f1b Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Thu, 30 Nov 2017 14:35:08 +0100 Subject: spi: Fix double "when" Signed-off-by: Geert Uytterhoeven Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 7b2170bfd6e7..bc6bb325d1bf 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -126,7 +126,7 @@ void spi_statistics_add_transfer_stats(struct spi_statistics *stats, * for that name. This appears in the sysfs "modalias" attribute * for driver coldplugging, and in uevents used for hotplugging * @cs_gpio: gpio number of the chipselect line (optional, -ENOENT when - * when not using a GPIO line) + * not using a GPIO line) * * @statistics: statistics for the spi_device * -- cgit v1.2.3 From 4db2b604c05afc3d2678fe01d3136c015df313ec Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 22 Nov 2017 11:47:28 +0100 Subject: move libgcc.h to include/linux Introducing a new include/lib directory just for this file totally messes up tab completion for include/linux, which is highly annoying. Move it to include/linux where we have headers for all kinds of other lib/ code as well. Signed-off-by: Christoph Hellwig Signed-off-by: Palmer Dabbelt --- include/linux/libgcc.h | 43 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 43 insertions(+) create mode 100644 include/linux/libgcc.h (limited to 'include/linux') diff --git a/include/linux/libgcc.h b/include/linux/libgcc.h new file mode 100644 index 000000000000..32e1e0f4b2d0 --- /dev/null +++ b/include/linux/libgcc.h @@ -0,0 +1,43 @@ +/* + * include/lib/libgcc.h + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, see the file COPYING, or write + * to the Free Software Foundation, Inc. + */ + +#ifndef __LIB_LIBGCC_H +#define __LIB_LIBGCC_H + +#include + +typedef int word_type __attribute__ ((mode (__word__))); + +#ifdef __BIG_ENDIAN +struct DWstruct { + int high, low; +}; +#elif defined(__LITTLE_ENDIAN) +struct DWstruct { + int low, high; +}; +#else +#error I feel sick. +#endif + +typedef union { + struct DWstruct s; + long long ll; +} DWunion; + +#endif /* __ASM_LIBGCC_H */ -- cgit v1.2.3 From 6d745ee8b5e81f3a33791e3c854fbbfd6f3e585e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 6 Sep 2017 14:56:50 +0200 Subject: iio: stm32: fix adc/trigger link error The ADC driver can trigger on either the timer or the lptim trigger, but it only uses a Kconfig 'select' statement to ensure that the first of the two is present. When the lptim trigger is enabled as a loadable module, and the adc driver is built-in, we now get a link error: drivers/iio/adc/stm32-adc.o: In function `stm32_adc_get_trig_extsel': stm32-adc.c:(.text+0x4e0): undefined reference to `is_stm32_lptim_trigger' We could use a second 'select' statement and always have both trigger drivers enabled when the adc driver is, but it seems that the lptimer trigger was intentionally left optional, so it seems better to keep it that way. This adds a hack to use 'IS_REACHABLE()' rather than 'IS_ENABLED()', which avoids the link error, but instead leads to the lptimer trigger not being used in the broken configuration. I've added a runtime warning for this case to help users figure out what they did wrong if this should ever be done by accident. Fixes: f0b638a7f6db ("iio: adc: stm32: add support for lptimer triggers") Signed-off-by: Arnd Bergmann Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/timer/stm32-lptim-trigger.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iio/timer/stm32-lptim-trigger.h b/include/linux/iio/timer/stm32-lptim-trigger.h index 34d59bfdce2d..464458d20b16 100644 --- a/include/linux/iio/timer/stm32-lptim-trigger.h +++ b/include/linux/iio/timer/stm32-lptim-trigger.h @@ -16,11 +16,14 @@ #define LPTIM2_OUT "lptim2_out" #define LPTIM3_OUT "lptim3_out" -#if IS_ENABLED(CONFIG_IIO_STM32_LPTIMER_TRIGGER) +#if IS_REACHABLE(CONFIG_IIO_STM32_LPTIMER_TRIGGER) bool is_stm32_lptim_trigger(struct iio_trigger *trig); #else static inline bool is_stm32_lptim_trigger(struct iio_trigger *trig) { +#if IS_ENABLED(CONFIG_IIO_STM32_LPTIMER_TRIGGER) + pr_warn_once("stm32 lptim_trigger not linked in\n"); +#endif return false; } #endif -- cgit v1.2.3 From a773d419275bf54854ca6cfda8f2594ed2790faa Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Fri, 2 Jun 2017 13:20:25 +0300 Subject: tracing: Pass export pointer as argument to ->write() By passing an export descriptor to the write function, users don't need to keep a global static pointer and can rely on container_of() to fetch their own structure. Link: http://lkml.kernel.org/r/20170602102025.5140-1-felipe.balbi@linux.intel.com Acked-by: Steven Rostedt (VMware) Reviewed-by: Chunyan Zhang Signed-off-by: Felipe Balbi Signed-off-by: Steven Rostedt (VMware) --- include/linux/trace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/trace.h b/include/linux/trace.h index d24991c1fef3..b95ffb2188ab 100644 --- a/include/linux/trace.h +++ b/include/linux/trace.h @@ -18,7 +18,7 @@ */ struct trace_export { struct trace_export __rcu *next; - void (*write)(const void *, unsigned int); + void (*write)(struct trace_export *, const void *, unsigned int); }; int register_ftrace_export(struct trace_export *export); -- cgit v1.2.3 From 4ce413d1840b25b101be3c0559161db8891f3360 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Fri, 1 Dec 2017 15:29:39 +0000 Subject: irqdesc: Use bool return type instead of int The irq_balancing_disabled and irq_is_percpu{,_devid} functions are clearly intended to return bool like the functions in kernel/irq/settings.h, but actually return an int containing a masked value of desc->status_use_accessors. This can lead to subtle breakage if, for example, the return value is subsequently truncated when assigned to a narrower type. As Linus points out: | In particular, what can (and _has_ happened) is that people end up | using these functions that return true or false, and they assign the | result to something like a bitfield (or a char) or whatever. | | And the code looks *obviously* correct, when you have things like | | dev->percpu = irq_is_percpu_devid(dev->irq); | | and that "percpu" thing is just one status bit among many. It may even | *work*, because maybe that "percpu" flag ends up not being all that | important, or it just happens to never be set on the particular | hardware that people end up testing. | | But while it looks obviously correct, and might even work, it's really | fundamentally broken. Because that "true or false" function didn't | actually return 0/1, it returned 0 or 0x20000. | | And 0x20000 may not fit in a bitmask or a "char" or whatever. Fix the problem by consistently using bool as the return type for these functions. Reported-by: Linus Torvalds Signed-off-by: Will Deacon Signed-off-by: Thomas Gleixner Cc: marc.zyngier@arm.com Link: https://lkml.kernel.org/r/1512142179-24616-1-git-send-email-will.deacon@arm.com --- include/linux/irqdesc.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index dd418955962b..39fb3700f7a9 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -230,7 +230,7 @@ irq_set_chip_handler_name_locked(struct irq_data *data, struct irq_chip *chip, data->chip = chip; } -static inline int irq_balancing_disabled(unsigned int irq) +static inline bool irq_balancing_disabled(unsigned int irq) { struct irq_desc *desc; @@ -238,7 +238,7 @@ static inline int irq_balancing_disabled(unsigned int irq) return desc->status_use_accessors & IRQ_NO_BALANCING_MASK; } -static inline int irq_is_percpu(unsigned int irq) +static inline bool irq_is_percpu(unsigned int irq) { struct irq_desc *desc; @@ -246,7 +246,7 @@ static inline int irq_is_percpu(unsigned int irq) return desc->status_use_accessors & IRQ_PER_CPU; } -static inline int irq_is_percpu_devid(unsigned int irq) +static inline bool irq_is_percpu_devid(unsigned int irq) { struct irq_desc *desc; -- cgit v1.2.3 From c895f6f703ad7dd2f99e751d9884b0aa5d0eea25 Mon Sep 17 00:00:00 2001 From: Hendrik Brueckner Date: Mon, 4 Dec 2017 10:56:44 +0100 Subject: bpf: correct broken uapi for BPF_PROG_TYPE_PERF_EVENT program type Commit 0515e5999a466dfe ("bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type") introduced the bpf_perf_event_data structure which exports the pt_regs structure. This is OK for multiple architectures but fail for s390 and arm64 which do not export pt_regs. Programs using them, for example, the bpf selftest fail to compile on these architectures. For s390, exporting the pt_regs is not an option because s390 wants to allow changes to it. For arm64, there is a user_pt_regs structure that covers parts of the pt_regs structure for use by user space. To solve the broken uapi for s390 and arm64, introduce an abstract type for pt_regs and add an asm/bpf_perf_event.h file that concretes the type. An asm-generic header file covers the architectures that export pt_regs today. The arch-specific enablement for s390 and arm64 follows in separate commits. Reported-by: Thomas Richter Fixes: 0515e5999a466dfe ("bpf: introduce BPF_PROG_TYPE_PERF_EVENT program type") Signed-off-by: Hendrik Brueckner Reviewed-and-tested-by: Thomas Richter Acked-by: Alexei Starovoitov Cc: Arnaldo Carvalho de Melo Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Alexander Shishkin Cc: Jiri Olsa Cc: Namhyung Kim Cc: Arnd Bergmann Cc: Daniel Borkmann Signed-off-by: Daniel Borkmann --- include/linux/perf_event.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 2c9c87d8a0c1..7546822a1d74 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -15,6 +15,7 @@ #define _LINUX_PERF_EVENT_H #include +#include /* * Kernel-internal data types and definitions: @@ -787,7 +788,7 @@ struct perf_output_handle { }; struct bpf_perf_event_data_kern { - struct pt_regs *regs; + bpf_user_pt_regs_t *regs; struct perf_sample_data *data; struct perf_event *event; }; @@ -1177,6 +1178,9 @@ extern void perf_bp_event(struct perf_event *event, void *data); (user_mode(regs) ? PERF_RECORD_MISC_USER : PERF_RECORD_MISC_KERNEL) # define perf_instruction_pointer(regs) instruction_pointer(regs) #endif +#ifndef perf_arch_bpf_user_pt_regs +# define perf_arch_bpf_user_pt_regs(regs) regs +#endif static inline bool has_branch_stack(struct perf_event *event) { -- cgit v1.2.3 From f775b13eedee2f7f3c6fdd4e90fb79090ce5d339 Mon Sep 17 00:00:00 2001 From: Rik van Riel Date: Tue, 14 Nov 2017 16:54:23 -0500 Subject: x86,kvm: move qemu/guest FPU switching out to vcpu_run MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently, every time a VCPU is scheduled out, the host kernel will first save the guest FPU/xstate context, then load the qemu userspace FPU context, only to then immediately save the qemu userspace FPU context back to memory. When scheduling in a VCPU, the same extraneous FPU loads and saves are done. This could be avoided by moving from a model where the guest FPU is loaded and stored with preemption disabled, to a model where the qemu userspace FPU is swapped out for the guest FPU context for the duration of the KVM_RUN ioctl. This is done under the VCPU mutex, which is also taken when other tasks inspect the VCPU FPU context, so the code should already be safe for this change. That should come as no surprise, given that s390 already has this optimization. This can fix a bug where KVM calls get_user_pages while owning the FPU, and the file system ends up requesting the FPU again: [258270.527947] __warn+0xcb/0xf0 [258270.527948] warn_slowpath_null+0x1d/0x20 [258270.527951] kernel_fpu_disable+0x3f/0x50 [258270.527953] __kernel_fpu_begin+0x49/0x100 [258270.527955] kernel_fpu_begin+0xe/0x10 [258270.527958] crc32c_pcl_intel_update+0x84/0xb0 [258270.527961] crypto_shash_update+0x3f/0x110 [258270.527968] crc32c+0x63/0x8a [libcrc32c] [258270.527975] dm_bm_checksum+0x1b/0x20 [dm_persistent_data] [258270.527978] node_prepare_for_write+0x44/0x70 [dm_persistent_data] [258270.527985] dm_block_manager_write_callback+0x41/0x50 [dm_persistent_data] [258270.527988] submit_io+0x170/0x1b0 [dm_bufio] [258270.527992] __write_dirty_buffer+0x89/0x90 [dm_bufio] [258270.527994] __make_buffer_clean+0x4f/0x80 [dm_bufio] [258270.527996] __try_evict_buffer+0x42/0x60 [dm_bufio] [258270.527998] dm_bufio_shrink_scan+0xc0/0x130 [dm_bufio] [258270.528002] shrink_slab.part.40+0x1f5/0x420 [258270.528004] shrink_node+0x22c/0x320 [258270.528006] do_try_to_free_pages+0xf5/0x330 [258270.528008] try_to_free_pages+0xe9/0x190 [258270.528009] __alloc_pages_slowpath+0x40f/0xba0 [258270.528011] __alloc_pages_nodemask+0x209/0x260 [258270.528014] alloc_pages_vma+0x1f1/0x250 [258270.528017] do_huge_pmd_anonymous_page+0x123/0x660 [258270.528021] handle_mm_fault+0xfd3/0x1330 [258270.528025] __get_user_pages+0x113/0x640 [258270.528027] get_user_pages+0x4f/0x60 [258270.528063] __gfn_to_pfn_memslot+0x120/0x3f0 [kvm] [258270.528108] try_async_pf+0x66/0x230 [kvm] [258270.528135] tdp_page_fault+0x130/0x280 [kvm] [258270.528149] kvm_mmu_page_fault+0x60/0x120 [kvm] [258270.528158] handle_ept_violation+0x91/0x170 [kvm_intel] [258270.528162] vmx_handle_exit+0x1ca/0x1400 [kvm_intel] No performance changes were detected in quick ping-pong tests on my 4 socket system, which is expected since an FPU+xstate load is on the order of 0.1us, while ping-ponging between CPUs is on the order of 20us, and somewhat noisy. Cc: stable@vger.kernel.org Signed-off-by: Rik van Riel Suggested-by: Christian Borntraeger Signed-off-by: Paolo Bonzini [Fixed a bug where reset_vcpu called put_fpu without preceding load_fpu, which happened inside from KVM_CREATE_VCPU ioctl. - Radim] Signed-off-by: Radim Krčmář --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 893d6d606cd0..6bdd4b9f6611 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -232,7 +232,7 @@ struct kvm_vcpu { struct mutex mutex; struct kvm_run *run; - int guest_fpu_loaded, guest_xcr0_loaded; + int guest_xcr0_loaded; struct swait_queue_head wq; struct pid __rcu *pid; int sigset_active; -- cgit v1.2.3 From d7efc6c11b277d9d80b99b1334a78bfe7d7edf10 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 5 Dec 2017 12:45:56 -0800 Subject: net: remove hlist_nulls_add_tail_rcu() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Alexander Potapenko reported use of uninitialized memory [1] This happens when inserting a request socket into TCP ehash, in __sk_nulls_add_node_rcu(), since sk_reuseport is not initialized. Bug was added by commit d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets") Note that d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix") missed the opportunity to get rid of hlist_nulls_add_tail_rcu() : Both UDP sockets and TCP/DCCP listeners no longer use __sk_nulls_add_node_rcu() for their hash insertion. Since all other sockets have unique 4-tuple, the reuseport status has no special meaning, so we can always use hlist_nulls_add_head_rcu() for them and save few cycles/instructions. [1] ================================================================== BUG: KMSAN: use of uninitialized memory in inet_ehash_insert+0xd40/0x1050 CPU: 0 PID: 0 Comm: swapper/0 Not tainted 4.13.0+ #3288 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS Bochs 01/01/2011 Call Trace:    __dump_stack lib/dump_stack.c:16  dump_stack+0x185/0x1d0 lib/dump_stack.c:52  kmsan_report+0x13f/0x1c0 mm/kmsan/kmsan.c:1016  __msan_warning_32+0x69/0xb0 mm/kmsan/kmsan_instr.c:766  __sk_nulls_add_node_rcu ./include/net/sock.h:684  inet_ehash_insert+0xd40/0x1050 net/ipv4/inet_hashtables.c:413  reqsk_queue_hash_req net/ipv4/inet_connection_sock.c:754  inet_csk_reqsk_queue_hash_add+0x1cc/0x300 net/ipv4/inet_connection_sock.c:765  tcp_conn_request+0x31e7/0x36f0 net/ipv4/tcp_input.c:6414  tcp_v4_conn_request+0x16d/0x220 net/ipv4/tcp_ipv4.c:1314  tcp_rcv_state_process+0x42a/0x7210 net/ipv4/tcp_input.c:5917  tcp_v4_do_rcv+0xa6a/0xcd0 net/ipv4/tcp_ipv4.c:1483  tcp_v4_rcv+0x3de0/0x4ab0 net/ipv4/tcp_ipv4.c:1763  ip_local_deliver_finish+0x6bb/0xcb0 net/ipv4/ip_input.c:216  NF_HOOK ./include/linux/netfilter.h:248  ip_local_deliver+0x3fa/0x480 net/ipv4/ip_input.c:257  dst_input ./include/net/dst.h:477  ip_rcv_finish+0x6fb/0x1540 net/ipv4/ip_input.c:397  NF_HOOK ./include/linux/netfilter.h:248  ip_rcv+0x10f6/0x15c0 net/ipv4/ip_input.c:488  __netif_receive_skb_core+0x36f6/0x3f60 net/core/dev.c:4298  __netif_receive_skb net/core/dev.c:4336  netif_receive_skb_internal+0x63c/0x19c0 net/core/dev.c:4497  napi_skb_finish net/core/dev.c:4858  napi_gro_receive+0x629/0xa50 net/core/dev.c:4889  e1000_receive_skb drivers/net/ethernet/intel/e1000/e1000_main.c:4018  e1000_clean_rx_irq+0x1492/0x1d30 drivers/net/ethernet/intel/e1000/e1000_main.c:4474  e1000_clean+0x43aa/0x5970 drivers/net/ethernet/intel/e1000/e1000_main.c:3819  napi_poll net/core/dev.c:5500  net_rx_action+0x73c/0x1820 net/core/dev.c:5566  __do_softirq+0x4b4/0x8dd kernel/softirq.c:284  invoke_softirq kernel/softirq.c:364  irq_exit+0x203/0x240 kernel/softirq.c:405  exiting_irq+0xe/0x10 ./arch/x86/include/asm/apic.h:638  do_IRQ+0x15e/0x1a0 arch/x86/kernel/irq.c:263  common_interrupt+0x86/0x86 Fixes: d894ba18d4e4 ("soreuseport: fix ordering for mixed v4/v6 sockets") Fixes: d296ba60d8e2 ("soreuseport: Resolve merge conflict for v4/v6 ordering fix") Signed-off-by: Eric Dumazet Reported-by: Alexander Potapenko Acked-by: Craig Gallek Signed-off-by: David S. Miller --- include/linux/rculist_nulls.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index a328e8181e49..e4b257ff881b 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -100,44 +100,6 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, first->pprev = &n->next; } -/** - * hlist_nulls_add_tail_rcu - * @n: the element to add to the hash list. - * @h: the list to add to. - * - * Description: - * Adds the specified element to the end of the specified hlist_nulls, - * while permitting racing traversals. NOTE: tail insertion requires - * list traversal. - * - * The caller must take whatever precautions are necessary - * (such as holding appropriate locks) to avoid racing - * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() - * or hlist_nulls_del_rcu(), running on this same list. - * However, it is perfectly legal to run concurrently with - * the _rcu list-traversal primitives, such as - * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency - * problems on Alpha CPUs. Regardless of the type of CPU, the - * list-traversal primitive must be guarded by rcu_read_lock(). - */ -static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, - struct hlist_nulls_head *h) -{ - struct hlist_nulls_node *i, *last = NULL; - - for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i); - i = hlist_nulls_next_rcu(i)) - last = i; - - if (last) { - n->next = last->next; - n->pprev = &last->next; - rcu_assign_pointer(hlist_nulls_next_rcu(last), n); - } else { - hlist_nulls_add_head_rcu(n, h); - } -} - /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. -- cgit v1.2.3 From af97a77bc01ce49a466f9d4c0125479e2e2230b6 Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 6 Dec 2017 09:50:08 +0000 Subject: efi: Move some sysfs files to be read-only by root Thanks to the scripts/leaking_addresses.pl script, it was found that some EFI values should not be readable by non-root users. So make them root-only, and to do that, add a __ATTR_RO_MODE() macro to make this easier, and use it in other places at the same time. Reported-by: Linus Torvalds Tested-by: Dave Young Signed-off-by: Greg Kroah-Hartman Signed-off-by: Ard Biesheuvel Cc: H. Peter Anvin Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Cc: stable Link: http://lkml.kernel.org/r/20171206095010.24170-2-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/sysfs.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index e32dfe098e82..40839c02d28c 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -117,6 +117,12 @@ struct attribute_group { .show = _name##_show, \ } +#define __ATTR_RO_MODE(_name, _mode) { \ + .attr = { .name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ + .show = _name##_show, \ +} + #define __ATTR_WO(_name) { \ .attr = { .name = __stringify(_name), .mode = S_IWUSR }, \ .store = _name##_store, \ -- cgit v1.2.3 From 7912af5c835bd86f2b0347a480e0f40e2fab30d0 Mon Sep 17 00:00:00 2001 From: Randy Dunlap Date: Wed, 6 Dec 2017 14:55:05 -0600 Subject: PCI: Add pci_get_domain_bus_and_slot() stub The coretemp driver build fails when CONFIG_PCI is not enabled because it uses a function that does not have a stub for that config case, so add the function stub. ../drivers/hwmon/coretemp.c: In function 'adjust_tjmax': ../drivers/hwmon/coretemp.c:250:9: error: implicit declaration of function 'pci_get_domain_bus_and_slot' [-Werror=implicit-function-declaration] struct pci_dev *host_bridge = pci_get_domain_bus_and_slot(0, 0, devfn); ../drivers/hwmon/coretemp.c:250:32: warning: initialization makes pointer from integer without a cast [enabled by default] struct pci_dev *host_bridge = pci_get_domain_bus_and_slot(0, 0, devfn); Signed-off-by: Randy Dunlap [bhelgaas: identical patch also by Arnd Bergmann ] Signed-off-by: Bjorn Helgaas Acked-by: Guenter Roeck --- include/linux/pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 0403894147a3..c170c9250c8b 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1674,6 +1674,9 @@ static inline struct pci_dev *pci_get_slot(struct pci_bus *bus, static inline struct pci_dev *pci_get_bus_and_slot(unsigned int bus, unsigned int devfn) { return NULL; } +static inline struct pci_dev *pci_get_domain_bus_and_slot(int domain, + unsigned int bus, unsigned int devfn) +{ return NULL; } static inline int pci_domain_nr(struct pci_bus *bus) { return 0; } static inline struct pci_dev *pci_dev_get(struct pci_dev *dev) { return NULL; } -- cgit v1.2.3 From b860b419d970f286294fbfb2b21a4028fd8ee442 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Wed, 6 Dec 2017 12:21:35 +0100 Subject: mfd: Fix RTS5227 (and others) powermanagement Commit 8275b77a1513 ("mfd: rts5249: Add support for RTS5250S power saving") adds powersaving support for device-ids 5249 524a and 525a. But as a side effect it breaks ASPM support for all the other device-ids, causing e.g. the Haswell CPU on a Lenovo T440s to not go into a higher c-state then PC3, while previously it would go to PC7, causing the machine to idle at 7.4W instead of 6.6W! The problem here is the new option.dev_aspm_mode field, which only gets explicitly initialized in the new code for the device-ids 5249 524a and 525a. Leaving the dev_aspm_mode 0 for the other device-ids. The default dev_aspm_mode 0 is mapped to DEV_ASPM_DISABLE, but the old behavior of calling rtsx_pci_enable_aspm() when idle and rtsx_pci_disable_aspm() when busy happens when dev_aspm_mode == DEV_ASPM_DYNAMIC. This commit changes the enum so that 0 = DEV_ASPM_DYNAMIC matching the old default behavior, fixing the pm regression with the other device-ids. Fixes: 8275b77a1513 ("mfd: rts5249: Add support for RTS5250S power saving") Signed-off-by: Hans de Goede Acked-by: Rui Feng Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index a2a1318a3d0c..c3d3f04d8cc6 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -915,10 +915,10 @@ enum PDEV_STAT {PDEV_STAT_IDLE, PDEV_STAT_RUN}; #define LTR_L1SS_PWR_GATE_CHECK_CARD_EN BIT(6) enum dev_aspm_mode { - DEV_ASPM_DISABLE = 0, DEV_ASPM_DYNAMIC, DEV_ASPM_BACKDOOR, DEV_ASPM_STATIC, + DEV_ASPM_DISABLE, }; /* -- cgit v1.2.3 From a4abd7a80addb4a9547f7dfc7812566b60ec505c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Bj=C3=B8rn=20Mork?= Date: Wed, 6 Dec 2017 20:21:24 +0100 Subject: usbnet: fix alignment for frames with no ethernet header MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The qmi_wwan minidriver support a 'raw-ip' mode where frames are received without any ethernet header. This causes alignment issues because the skbs allocated by usbnet are "IP aligned". Fix by allowing minidrivers to disable the additional alignment offset. This is implemented using a per-device flag, since the same minidriver also supports 'ethernet' mode. Fixes: 32f7adf633b9 ("net: qmi_wwan: support "raw IP" mode") Reported-and-tested-by: Jay Foster Signed-off-by: Bjørn Mork Signed-off-by: David S. Miller --- include/linux/usb/usbnet.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/usbnet.h b/include/linux/usb/usbnet.h index a69877734c4e..e2ec3582e549 100644 --- a/include/linux/usb/usbnet.h +++ b/include/linux/usb/usbnet.h @@ -82,6 +82,7 @@ struct usbnet { # define EVENT_RX_KILL 10 # define EVENT_LINK_CHANGE 11 # define EVENT_SET_RX_MODE 12 +# define EVENT_NO_IP_ALIGN 13 }; static inline struct usb_driver *driver_of(struct usb_interface *intf) -- cgit v1.2.3 From d4761754b4fb2ef8d9a1e9d121c4bec84e1fe292 Mon Sep 17 00:00:00 2001 From: Yousuk Seung Date: Thu, 7 Dec 2017 13:41:34 -0800 Subject: tcp: invalidate rate samples during SACK reneging Mark tcp_sock during a SACK reneging event and invalidate rate samples while marked. Such rate samples may overestimate bw by including packets that were SACKed before reneging. < ack 6001 win 10000 sack 7001:38001 < ack 7001 win 0 sack 8001:38001 // Reneg detected > seq 7001:8001 // RTO, SACK cleared. < ack 38001 win 10000 In above example the rate sample taken after the last ack will count 7001-38001 as delivered while the actual delivery rate likely could be much lower i.e. 7001-8001. This patch adds a new field tcp_sock.sack_reneg and marks it when we declare SACK reneging and entering TCP_CA_Loss, and unmarks it after the last rate sample was taken before moving back to TCP_CA_Open. This patch also invalidates rate samples taken while tcp_sock.is_sack_reneg is set. Fixes: b9f64820fb22 ("tcp: track data delivery rate for a TCP connection") Signed-off-by: Yousuk Seung Signed-off-by: Neal Cardwell Signed-off-by: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Acked-by: Eric Dumazet Acked-by: Priyaranjan Jha Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index df5d97a85e1a..ca4a6361389b 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -224,7 +224,8 @@ struct tcp_sock { rate_app_limited:1, /* rate_{delivered,interval_us} limited? */ fastopen_connect:1, /* FASTOPEN_CONNECT sockopt */ fastopen_no_cookie:1, /* Allow send/recv SYN+data without a cookie */ - unused:3; + is_sack_reneg:1, /* in recovery from loss with SACK reneg? */ + unused:2; u8 nonagle : 4,/* Disable Nagle algorithm? */ thin_lto : 1,/* Use linear timeouts for thin streams */ unused1 : 1, -- cgit v1.2.3 From f335195adf043168ee69d78ea72ac3e30f0c57ce Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Wed, 6 Dec 2017 11:27:57 +0100 Subject: kmemcheck: rip it out for real Commit 4675ff05de2d ("kmemcheck: rip it out") has removed the code but for some reason SPDX header stayed in place. This looks like a rebase mistake in the mmotm tree or the merge mistake. Let's drop those leftovers as well. Signed-off-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/kmemcheck.h | 1 - 1 file changed, 1 deletion(-) delete mode 100644 include/linux/kmemcheck.h (limited to 'include/linux') diff --git a/include/linux/kmemcheck.h b/include/linux/kmemcheck.h deleted file mode 100644 index ea32a7d3cf1b..000000000000 --- a/include/linux/kmemcheck.h +++ /dev/null @@ -1 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -- cgit v1.2.3 From 3487972d7fa6c5143951436ada5933dcf0ec659d Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 7 Dec 2017 02:41:18 +0100 Subject: PM / sleep: Avoid excess pm_runtime_enable() calls in device_resume() Middle-layer code doing suspend-time optimizations for devices with the DPM_FLAG_SMART_SUSPEND flag set (currently, the PCI bus type and the ACPI PM domain) needs to make the core skip ->thaw_early and ->thaw callbacks for those devices in some cases and it sets the power.direct_complete flag for them for this purpose. However, it turns out that setting power.direct_complete outside of the PM core is a bad idea as it triggers an excess invocation of pm_runtime_enable() in device_resume(). For this reason, provide a helper to clear power.is_late_suspended and power.is_suspended to be invoked by the middle-layer code in question instead of setting power.direct_complete and make that code call the new helper. Fixes: c4b65157aeef (PCI / PM: Take SMART_SUSPEND driver flag into account) Fixes: 05087360fd7a (ACPI / PM: Take SMART_SUSPEND driver flag into account) Signed-off-by: Rafael J. Wysocki Reviewed-by: Ulf Hansson Acked-by: Bjorn Helgaas --- include/linux/pm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 65d39115f06d..492ed473ba7e 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -765,6 +765,7 @@ extern int pm_generic_poweroff_late(struct device *dev); extern int pm_generic_poweroff(struct device *dev); extern void pm_generic_complete(struct device *dev); +extern void dev_pm_skip_next_resume_phases(struct device *dev); extern bool dev_pm_smart_suspend_and_suspended(struct device *dev); #else /* !CONFIG_PM_SLEEP */ -- cgit v1.2.3 From a8ceb5dbfde1092b466936bca0ff3be127ecf38e Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Tue, 5 Dec 2017 21:29:37 +0200 Subject: ptr_ring: add barriers Users of ptr_ring expect that it's safe to give the data structure a pointer and have it be available to consumers, but that actually requires an smb_wmb or a stronger barrier. In absence of such barriers and on architectures that reorder writes, consumer might read an un=initialized value from an skb pointer stored in the skb array. This was observed causing crashes. To fix, add memory barriers. The barrier we use is a wmb, the assumption being that producers do not need to read the value so we do not need to order these reads. Reported-by: George Cherian Suggested-by: Jason Wang Signed-off-by: Michael S. Tsirkin Acked-by: Jason Wang Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 37b4bb2545b3..6866df4f31b5 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -101,12 +101,18 @@ static inline bool ptr_ring_full_bh(struct ptr_ring *r) /* Note: callers invoking this in a loop must use a compiler barrier, * for example cpu_relax(). Callers must hold producer_lock. + * Callers are responsible for making sure pointer that is being queued + * points to a valid data. */ static inline int __ptr_ring_produce(struct ptr_ring *r, void *ptr) { if (unlikely(!r->size) || r->queue[r->producer]) return -ENOSPC; + /* Make sure the pointer we are storing points to a valid data. */ + /* Pairs with smp_read_barrier_depends in __ptr_ring_consume. */ + smp_wmb(); + r->queue[r->producer++] = ptr; if (unlikely(r->producer >= r->size)) r->producer = 0; @@ -275,6 +281,9 @@ static inline void *__ptr_ring_consume(struct ptr_ring *r) if (ptr) __ptr_ring_discard_one(r); + /* Make sure anyone accessing data through the pointer is up to date. */ + /* Pairs with smp_wmb in __ptr_ring_produce. */ + smp_read_barrier_depends(); return ptr; } -- cgit v1.2.3 From d89c70356acf11b7cf47ca5cfcafae5062a85451 Mon Sep 17 00:00:00 2001 From: Will Deacon Date: Tue, 28 Nov 2017 18:42:19 +0000 Subject: locking/core: Remove break_lock field when CONFIG_GENERIC_LOCKBREAK=y When CONFIG_GENERIC_LOCKBEAK=y, locking structures grow an extra int ->break_lock field which is used to implement raw_spin_is_contended() by setting the field to 1 when waiting on a lock and clearing it to zero when holding a lock. However, there are a few problems with this approach: - There is a write-write race between a CPU successfully taking the lock (and subsequently writing break_lock = 0) and a waiter waiting on the lock (and subsequently writing break_lock = 1). This could result in a contended lock being reported as uncontended and vice-versa. - On machines with store buffers, nothing guarantees that the writes to break_lock are visible to other CPUs at any particular time. - READ_ONCE/WRITE_ONCE are not used, so the field is potentially susceptible to harmful compiler optimisations, Consequently, the usefulness of this field is unclear and we'd be better off removing it and allowing architectures to implement raw_spin_is_contended() by providing a definition of arch_spin_is_contended(), as they can when CONFIG_GENERIC_LOCKBREAK=n. Signed-off-by: Will Deacon Acked-by: Peter Zijlstra Cc: Heiko Carstens Cc: Linus Torvalds Cc: Martin Schwidefsky Cc: Sebastian Ott Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1511894539-7988-3-git-send-email-will.deacon@arm.com Signed-off-by: Ingo Molnar --- include/linux/rwlock_types.h | 3 --- include/linux/spinlock.h | 5 ----- include/linux/spinlock_types.h | 3 --- 3 files changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rwlock_types.h b/include/linux/rwlock_types.h index cc0072e93e36..857a72ceb794 100644 --- a/include/linux/rwlock_types.h +++ b/include/linux/rwlock_types.h @@ -10,9 +10,6 @@ */ typedef struct { arch_rwlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif #ifdef CONFIG_DEBUG_SPINLOCK unsigned int magic, owner_cpu; void *owner; diff --git a/include/linux/spinlock.h b/include/linux/spinlock.h index a39186194cd6..3bf273538840 100644 --- a/include/linux/spinlock.h +++ b/include/linux/spinlock.h @@ -107,16 +107,11 @@ do { \ #define raw_spin_is_locked(lock) arch_spin_is_locked(&(lock)->raw_lock) -#ifdef CONFIG_GENERIC_LOCKBREAK -#define raw_spin_is_contended(lock) ((lock)->break_lock) -#else - #ifdef arch_spin_is_contended #define raw_spin_is_contended(lock) arch_spin_is_contended(&(lock)->raw_lock) #else #define raw_spin_is_contended(lock) (((void)(lock), 0)) #endif /*arch_spin_is_contended*/ -#endif /* * This barrier must provide two things: diff --git a/include/linux/spinlock_types.h b/include/linux/spinlock_types.h index 73548eb13a5d..24b4e6f2c1a2 100644 --- a/include/linux/spinlock_types.h +++ b/include/linux/spinlock_types.h @@ -19,9 +19,6 @@ typedef struct raw_spinlock { arch_spinlock_t raw_lock; -#ifdef CONFIG_GENERIC_LOCKBREAK - unsigned int break_lock; -#endif #ifdef CONFIG_DEBUG_SPINLOCK unsigned int magic, owner_cpu; void *owner; -- cgit v1.2.3 From e966eaeeb623f09975ef362c2866fae6f86844f9 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Tue, 12 Dec 2017 12:31:16 +0100 Subject: locking/lockdep: Remove the cross-release locking checks This code (CONFIG_LOCKDEP_CROSSRELEASE=y and CONFIG_LOCKDEP_COMPLETIONS=y), while it found a number of old bugs initially, was also causing too many false positives that caused people to disable lockdep - which is arguably a worse overall outcome. If we disable cross-release by default but keep the code upstream then in practice the most likely outcome is that we'll allow the situation to degrade gradually, by allowing entropy to introduce more and more false positives, until it overwhelms maintenance capacity. Another bad side effect was that people were trying to work around the false positives by uglifying/complicating unrelated code. There's a marked difference between annotating locking operations and uglifying good code just due to bad lock debugging code ... This gradual decrease in quality happened to a number of debugging facilities in the kernel, and lockdep is pretty complex already, so we cannot risk this outcome. Either cross-release checking can be done right with no false positives, or it should not be included in the upstream kernel. ( Note that it might make sense to maintain it out of tree and go through the false positives every now and then and see whether new bugs were introduced. ) Cc: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/completion.h | 45 ---------------- include/linux/lockdep.h | 125 --------------------------------------------- include/linux/sched.h | 11 ---- 3 files changed, 181 deletions(-) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index 0662a417febe..94a59ba7d422 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -10,9 +10,6 @@ */ #include -#ifdef CONFIG_LOCKDEP_COMPLETIONS -#include -#endif /* * struct completion - structure used to maintain state for a "completion" @@ -29,58 +26,16 @@ struct completion { unsigned int done; wait_queue_head_t wait; -#ifdef CONFIG_LOCKDEP_COMPLETIONS - struct lockdep_map_cross map; -#endif }; -#ifdef CONFIG_LOCKDEP_COMPLETIONS -static inline void complete_acquire(struct completion *x) -{ - lock_acquire_exclusive((struct lockdep_map *)&x->map, 0, 0, NULL, _RET_IP_); -} - -static inline void complete_release(struct completion *x) -{ - lock_release((struct lockdep_map *)&x->map, 0, _RET_IP_); -} - -static inline void complete_release_commit(struct completion *x) -{ - lock_commit_crosslock((struct lockdep_map *)&x->map); -} - -#define init_completion_map(x, m) \ -do { \ - lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \ - (m)->name, (m)->key, 0); \ - __init_completion(x); \ -} while (0) - -#define init_completion(x) \ -do { \ - static struct lock_class_key __key; \ - lockdep_init_map_crosslock((struct lockdep_map *)&(x)->map, \ - "(completion)" #x, \ - &__key, 0); \ - __init_completion(x); \ -} while (0) -#else #define init_completion_map(x, m) __init_completion(x) #define init_completion(x) __init_completion(x) static inline void complete_acquire(struct completion *x) {} static inline void complete_release(struct completion *x) {} static inline void complete_release_commit(struct completion *x) {} -#endif -#ifdef CONFIG_LOCKDEP_COMPLETIONS -#define COMPLETION_INITIALIZER(work) \ - { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait), \ - STATIC_CROSS_LOCKDEP_MAP_INIT("(completion)" #work, &(work)) } -#else #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } -#endif #define COMPLETION_INITIALIZER_ONSTACK_MAP(work, map) \ (*({ init_completion_map(&(work), &(map)); &(work); })) diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index a842551fe044..2e75dc34bff5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -158,12 +158,6 @@ struct lockdep_map { int cpu; unsigned long ip; #endif -#ifdef CONFIG_LOCKDEP_CROSSRELEASE - /* - * Whether it's a crosslock. - */ - int cross; -#endif }; static inline void lockdep_copy_map(struct lockdep_map *to, @@ -267,95 +261,8 @@ struct held_lock { unsigned int hardirqs_off:1; unsigned int references:12; /* 32 bits */ unsigned int pin_count; -#ifdef CONFIG_LOCKDEP_CROSSRELEASE - /* - * Generation id. - * - * A value of cross_gen_id will be stored when holding this, - * which is globally increased whenever each crosslock is held. - */ - unsigned int gen_id; -#endif -}; - -#ifdef CONFIG_LOCKDEP_CROSSRELEASE -#define MAX_XHLOCK_TRACE_ENTRIES 5 - -/* - * This is for keeping locks waiting for commit so that true dependencies - * can be added at commit step. - */ -struct hist_lock { - /* - * Id for each entry in the ring buffer. This is used to - * decide whether the ring buffer was overwritten or not. - * - * For example, - * - * |<----------- hist_lock ring buffer size ------->| - * pppppppppppppppppppppiiiiiiiiiiiiiiiiiiiiiiiiiiiii - * wrapped > iiiiiiiiiiiiiiiiiiiiiiiiiii....................... - * - * where 'p' represents an acquisition in process - * context, 'i' represents an acquisition in irq - * context. - * - * In this example, the ring buffer was overwritten by - * acquisitions in irq context, that should be detected on - * rollback or commit. - */ - unsigned int hist_id; - - /* - * Seperate stack_trace data. This will be used at commit step. - */ - struct stack_trace trace; - unsigned long trace_entries[MAX_XHLOCK_TRACE_ENTRIES]; - - /* - * Seperate hlock instance. This will be used at commit step. - * - * TODO: Use a smaller data structure containing only necessary - * data. However, we should make lockdep code able to handle the - * smaller one first. - */ - struct held_lock hlock; }; -/* - * To initialize a lock as crosslock, lockdep_init_map_crosslock() should - * be called instead of lockdep_init_map(). - */ -struct cross_lock { - /* - * When more than one acquisition of crosslocks are overlapped, - * we have to perform commit for them based on cross_gen_id of - * the first acquisition, which allows us to add more true - * dependencies. - * - * Moreover, when no acquisition of a crosslock is in progress, - * we should not perform commit because the lock might not exist - * any more, which might cause incorrect memory access. So we - * have to track the number of acquisitions of a crosslock. - */ - int nr_acquire; - - /* - * Seperate hlock instance. This will be used at commit step. - * - * TODO: Use a smaller data structure containing only necessary - * data. However, we should make lockdep code able to handle the - * smaller one first. - */ - struct held_lock hlock; -}; - -struct lockdep_map_cross { - struct lockdep_map map; - struct cross_lock xlock; -}; -#endif - /* * Initialization, self-test and debugging-output methods: */ @@ -560,37 +467,6 @@ enum xhlock_context_t { XHLOCK_CTX_NR, }; -#ifdef CONFIG_LOCKDEP_CROSSRELEASE -extern void lockdep_init_map_crosslock(struct lockdep_map *lock, - const char *name, - struct lock_class_key *key, - int subclass); -extern void lock_commit_crosslock(struct lockdep_map *lock); - -/* - * What we essencially have to initialize is 'nr_acquire'. Other members - * will be initialized in add_xlock(). - */ -#define STATIC_CROSS_LOCK_INIT() \ - { .nr_acquire = 0,} - -#define STATIC_CROSS_LOCKDEP_MAP_INIT(_name, _key) \ - { .map.name = (_name), .map.key = (void *)(_key), \ - .map.cross = 1, .xlock = STATIC_CROSS_LOCK_INIT(), } - -/* - * To initialize a lockdep_map statically use this macro. - * Note that _name must not be NULL. - */ -#define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ - { .name = (_name), .key = (void *)(_key), .cross = 0, } - -extern void crossrelease_hist_start(enum xhlock_context_t c); -extern void crossrelease_hist_end(enum xhlock_context_t c); -extern void lockdep_invariant_state(bool force); -extern void lockdep_init_task(struct task_struct *task); -extern void lockdep_free_task(struct task_struct *task); -#else /* !CROSSRELEASE */ #define lockdep_init_map_crosslock(m, n, k, s) do {} while (0) /* * To initialize a lockdep_map statically use this macro. @@ -604,7 +480,6 @@ static inline void crossrelease_hist_end(enum xhlock_context_t c) {} static inline void lockdep_invariant_state(bool force) {} static inline void lockdep_init_task(struct task_struct *task) {} static inline void lockdep_free_task(struct task_struct *task) {} -#endif /* CROSSRELEASE */ #ifdef CONFIG_LOCK_STAT diff --git a/include/linux/sched.h b/include/linux/sched.h index 21991d668d35..9ce6c3001e9f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -849,17 +849,6 @@ struct task_struct { struct held_lock held_locks[MAX_LOCK_DEPTH]; #endif -#ifdef CONFIG_LOCKDEP_CROSSRELEASE -#define MAX_XHLOCKS_NR 64UL - struct hist_lock *xhlocks; /* Crossrelease history locks */ - unsigned int xhlock_idx; - /* For restoring at history boundaries */ - unsigned int xhlock_idx_hist[XHLOCK_CTX_NR]; - unsigned int hist_id; - /* For overwrite check at each context exit */ - unsigned int hist_id_save[XHLOCK_CTX_NR]; -#endif - #ifdef CONFIG_UBSAN unsigned int in_ubsan; #endif -- cgit v1.2.3 From b899a850431e2dd0943205a63a68573f3e312d0d Mon Sep 17 00:00:00 2001 From: Mark Rutland Date: Mon, 27 Nov 2017 10:38:23 +0000 Subject: compiler.h: Remove ACCESS_ONCE() There are no longer any kernelspace uses of ACCESS_ONCE(), so we can remove the definition from . This patch removes the ACCESS_ONCE() definition, and updates comments which referred to it. At the same time, some inconsistent and redundant whitespace is removed from comments. Tested-by: Paul E. McKenney Signed-off-by: Mark Rutland Cc: Arnaldo Carvalho de Melo Cc: Joe Perches Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: apw@canonical.com Link: http://lkml.kernel.org/r/20171127103824.36526-4-mark.rutland@arm.com Signed-off-by: Ingo Molnar --- include/linux/compiler.h | 47 +++++++++++------------------------------------ 1 file changed, 11 insertions(+), 36 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 188ed9f65517..52e611ab9a6c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -220,21 +220,21 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s /* * Prevent the compiler from merging or refetching reads or writes. The * compiler is also forbidden from reordering successive instances of - * READ_ONCE, WRITE_ONCE and ACCESS_ONCE (see below), but only when the - * compiler is aware of some particular ordering. One way to make the - * compiler aware of ordering is to put the two invocations of READ_ONCE, - * WRITE_ONCE or ACCESS_ONCE() in different C statements. + * READ_ONCE and WRITE_ONCE, but only when the compiler is aware of some + * particular ordering. One way to make the compiler aware of ordering is to + * put the two invocations of READ_ONCE or WRITE_ONCE in different C + * statements. * - * In contrast to ACCESS_ONCE these two macros will also work on aggregate - * data types like structs or unions. If the size of the accessed data - * type exceeds the word size of the machine (e.g., 32 bits or 64 bits) - * READ_ONCE() and WRITE_ONCE() will fall back to memcpy(). There's at - * least two memcpy()s: one for the __builtin_memcpy() and then one for - * the macro doing the copy of variable - '__u' allocated on the stack. + * These two macros will also work on aggregate data types like structs or + * unions. If the size of the accessed data type exceeds the word size of + * the machine (e.g., 32 bits or 64 bits) READ_ONCE() and WRITE_ONCE() will + * fall back to memcpy(). There's at least two memcpy()s: one for the + * __builtin_memcpy() and then one for the macro doing the copy of variable + * - '__u' allocated on the stack. * * Their two major use cases are: (1) Mediating communication between * process-level code and irq/NMI handlers, all running on the same CPU, - * and (2) Ensuring that the compiler does not fold, spindle, or otherwise + * and (2) Ensuring that the compiler does not fold, spindle, or otherwise * mutilate accesses that either do not require ordering or that interact * with an explicit memory barrier or atomic instruction that provides the * required ordering. @@ -327,29 +327,4 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s compiletime_assert(__native_word(t), \ "Need native word sized stores/loads for atomicity.") -/* - * Prevent the compiler from merging or refetching accesses. The compiler - * is also forbidden from reordering successive instances of ACCESS_ONCE(), - * but only when the compiler is aware of some particular ordering. One way - * to make the compiler aware of ordering is to put the two invocations of - * ACCESS_ONCE() in different C statements. - * - * ACCESS_ONCE will only work on scalar types. For union types, ACCESS_ONCE - * on a union member will work as long as the size of the member matches the - * size of the union and the size is smaller than word size. - * - * The major use cases of ACCESS_ONCE used to be (1) Mediating communication - * between process-level code and irq/NMI handlers, all running on the same CPU, - * and (2) Ensuring that the compiler does not fold, spindle, or otherwise - * mutilate accesses that either do not require ordering or that interact - * with an explicit memory barrier or atomic instruction that provides the - * required ordering. - * - * If possible use READ_ONCE()/WRITE_ONCE() instead. - */ -#define __ACCESS_ONCE(x) ({ \ - __maybe_unused typeof(x) __var = (__force typeof(x)) 0; \ - (volatile typeof(x) *)&(x); }) -#define ACCESS_ONCE(x) (*__ACCESS_ONCE(x)) - #endif /* __LINUX_COMPILER_H */ -- cgit v1.2.3 From c47d7f56e914900410f65835933f9fc4374d0a2b Mon Sep 17 00:00:00 2001 From: Wei Wang Date: Thu, 14 Dec 2017 15:32:24 -0800 Subject: include/linux/idr.h: add #include The was removed from radix-tree.h by commit f5bba9d11a25 ("include/linux/radix-tree.h: remove unneeded #include "). Since that commit, tools/testing/radix-tree/ couldn't pass compilation due to tools/testing/radix-tree/idr.c:17: undefined reference to WARN_ON_ONCE. This patch adds the bug.h header to idr.h to solve the issue. Link: http://lkml.kernel.org/r/1511963726-34070-2-git-send-email-wei.w.wang@intel.com Fixes: f5bba9d11a2 ("include/linux/radix-tree.h: remove unneeded #include ") Signed-off-by: Wei Wang Cc: Matthew Wilcox Cc: Jan Kara Cc: Eric Biggers Cc: Tejun Heo Cc: Masahiro Yamada Cc: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index 7c3a365f7e12..fa14f834e4ed 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -15,6 +15,7 @@ #include #include #include +#include struct idr { struct radix_tree_root idr_rt; -- cgit v1.2.3 From 338f1d9d1b829fec494d053f62820a2ee625b1ec Mon Sep 17 00:00:00 2001 From: Chris Wilson Date: Thu, 14 Dec 2017 15:32:28 -0800 Subject: lib/rbtree,drm/mm: add rbtree_replace_node_cached() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a variant of rbtree_replace_node() that maintains the leftmost cache of struct rbtree_root_cached when replacing nodes within the rbtree. As drm_mm is the only rb_replace_node() being used on an interval tree, the mistake looks fairly self-contained. Furthermore the only user of drm_mm_replace_node() is its testsuite... Testcase: igt/drm_mm/replace Link: http://lkml.kernel.org/r/20171122100729.3742-1-chris@chris-wilson.co.uk Link: https://patchwork.freedesktop.org/patch/msgid/20171109212435.9265-1-chris@chris-wilson.co.uk Fixes: f808c13fd373 ("lib/interval_tree: fast overlap detection") Signed-off-by: Chris Wilson Reviewed-by: Joonas Lahtinen Acked-by: Davidlohr Bueso Cc: Jérôme Glisse Cc: Joonas Lahtinen Cc: Daniel Vetter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index d574361943ea..fcbeed4053ef 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -99,6 +99,8 @@ extern void rb_replace_node(struct rb_node *victim, struct rb_node *new, struct rb_root *root); extern void rb_replace_node_rcu(struct rb_node *victim, struct rb_node *new, struct rb_root *root); +extern void rb_replace_node_cached(struct rb_node *victim, struct rb_node *new, + struct rb_root_cached *root); static inline void rb_link_node(struct rb_node *node, struct rb_node *parent, struct rb_node **rb_link) -- cgit v1.2.3 From 146734b091430c80d80bb96b1139a96fb4bc830e Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Dec 2017 15:32:34 -0800 Subject: string.h: workaround for increased stack usage The hardened strlen() function causes rather large stack usage in at least one file in the kernel, in particular when CONFIG_KASAN is enabled: drivers/media/usb/em28xx/em28xx-dvb.c: In function 'em28xx_dvb_init': drivers/media/usb/em28xx/em28xx-dvb.c:2062:1: error: the frame size of 3256 bytes is larger than 204 bytes [-Werror=frame-larger-than=] Analyzing this problem led to the discovery that gcc fails to merge the stack slots for the i2c_board_info[] structures after we strlcpy() into them, due to the 'noreturn' attribute on the source string length check. I reported this as a gcc bug, but it is unlikely to get fixed for gcc-8, since it is relatively easy to work around, and it gets triggered rarely. An earlier workaround I did added an empty inline assembly statement before the call to fortify_panic(), which works surprisingly well, but is really ugly and unintuitive. This is a new approach to the same problem, this time addressing it by not calling the 'extern __real_strnlen()' function for string constants where __builtin_strlen() is a compile-time constant and therefore known to be safe. We do this by checking if the last character in the string is a compile-time constant '\0'. If it is, we can assume that strlen() of the string is also constant. As a side-effect, this should also improve the object code output for any other call of strlen() on a string constant. [akpm@linux-foundation.org: add comment] Link: http://lkml.kernel.org/r/20171205215143.3085755-1-arnd@arndb.de Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=82365 Link: https://patchwork.kernel.org/patch/9980413/ Link: https://patchwork.kernel.org/patch/9974047/ Fixes: 6974f0c4555 ("include/linux/string.h: add the option of fortified string.h functions") Signed-off-by: Arnd Bergmann Cc: Kees Cook Cc: Mauro Carvalho Chehab Cc: Dmitry Vyukov Cc: Alexander Potapenko Cc: Andrey Ryabinin Cc: Daniel Micay Cc: Greg Kroah-Hartman Cc: Martin Wilck Cc: Dan Williams Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/string.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index 410ecf17de3c..cfd83eb2f926 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -259,7 +259,10 @@ __FORTIFY_INLINE __kernel_size_t strlen(const char *p) { __kernel_size_t ret; size_t p_size = __builtin_object_size(p, 0); - if (p_size == (size_t)-1) + + /* Work around gcc excess stack consumption issue */ + if (p_size == (size_t)-1 || + (__builtin_constant_p(p[p_size - 1]) && p[p_size - 1] == '\0')) return __builtin_strlen(p); ret = strnlen(p, p_size); if (p_size <= ret) -- cgit v1.2.3 From 3756f6401c302617c5e091081ca4d26ab604bec5 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 14 Dec 2017 15:32:41 -0800 Subject: exec: avoid gcc-8 warning for get_task_comm gcc-8 warns about using strncpy() with the source size as the limit: fs/exec.c:1223:32: error: argument to 'sizeof' in 'strncpy' call is the same expression as the source; did you mean to use the size of the destination? [-Werror=sizeof-pointer-memaccess] This is indeed slightly suspicious, as it protects us from source arguments without NUL-termination, but does not guarantee that the destination is terminated. This keeps the strncpy() to ensure we have properly padded target buffer, but ensures that we use the correct length, by passing the actual length of the destination buffer as well as adding a build-time check to ensure it is exactly TASK_COMM_LEN. There are only 23 callsites which I all reviewed to ensure this is currently the case. We could get away with doing only the check or passing the right length, but it doesn't hurt to do both. Link: http://lkml.kernel.org/r/20171205151724.1764896-1-arnd@arndb.de Signed-off-by: Arnd Bergmann Suggested-by: Kees Cook Acked-by: Kees Cook Acked-by: Ingo Molnar Cc: Alexander Viro Cc: Peter Zijlstra Cc: Serge Hallyn Cc: James Morris Cc: Aleksa Sarai Cc: "Eric W. Biederman" Cc: Frederic Weisbecker Cc: Thomas Gleixner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 21991d668d35..5124ba709830 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1503,7 +1503,11 @@ static inline void set_task_comm(struct task_struct *tsk, const char *from) __set_task_comm(tsk, from, false); } -extern char *get_task_comm(char *to, struct task_struct *tsk); +extern char *__get_task_comm(char *to, size_t len, struct task_struct *tsk); +#define get_task_comm(buf, tsk) ({ \ + BUILD_BUG_ON(sizeof(buf) != TASK_COMM_LEN); \ + __get_task_comm(buf, sizeof(buf), tsk); \ +}) #ifdef CONFIG_SMP void scheduler_ipi(void); -- cgit v1.2.3 From bdcf0a423ea1c40bbb40e7ee483b50fc8aa3d758 Mon Sep 17 00:00:00 2001 From: Thiago Rafael Becker Date: Thu, 14 Dec 2017 15:33:12 -0800 Subject: kernel: make groups_sort calling a responsibility group_info allocators In testing, we found that nfsd threads may call set_groups in parallel for the same entry cached in auth.unix.gid, racing in the call of groups_sort, corrupting the groups for that entry and leading to permission denials for the client. This patch: - Make groups_sort globally visible. - Move the call to groups_sort to the modifiers of group_info - Remove the call to groups_sort from set_groups Link: http://lkml.kernel.org/r/20171211151420.18655-1-thiago.becker@gmail.com Signed-off-by: Thiago Rafael Becker Reviewed-by: Matthew Wilcox Reviewed-by: NeilBrown Acked-by: "J. Bruce Fields" Cc: Al Viro Cc: Martin Schwidefsky Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cred.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cred.h b/include/linux/cred.h index 099058e1178b..631286535d0f 100644 --- a/include/linux/cred.h +++ b/include/linux/cred.h @@ -83,6 +83,7 @@ extern int set_current_groups(struct group_info *); extern void set_groups(struct cred *, struct group_info *); extern int groups_search(const struct group_info *, kgid_t); extern bool may_setgroups(void); +extern void groups_sort(struct group_info *); /* * The security context of a task -- cgit v1.2.3 From 4837fe37adff1d159904f0c013471b1ecbcb455e Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Thu, 14 Dec 2017 15:33:15 -0800 Subject: mm, oom_reaper: fix memory corruption David Rientjes has reported the following memory corruption while the oom reaper tries to unmap the victims address space BUG: Bad page map in process oom_reaper pte:6353826300000000 pmd:00000000 addr:00007f50cab1d000 vm_flags:08100073 anon_vma:ffff9eea335603f0 mapping: (null) index:7f50cab1d file: (null) fault: (null) mmap: (null) readpage: (null) CPU: 2 PID: 1001 Comm: oom_reaper Call Trace: unmap_page_range+0x1068/0x1130 __oom_reap_task_mm+0xd5/0x16b oom_reaper+0xff/0x14c kthread+0xc1/0xe0 Tetsuo Handa has noticed that the synchronization inside exit_mmap is insufficient. We only synchronize with the oom reaper if tsk_is_oom_victim which is not true if the final __mmput is called from a different context than the oom victim exit path. This can trivially happen from context of any task which has grabbed mm reference (e.g. to read /proc// file which requires mm etc.). The race would look like this oom_reaper oom_victim task mmget_not_zero do_exit mmput __oom_reap_task_mm mmput __mmput exit_mmap remove_vma unmap_page_range Fix this issue by providing a new mm_is_oom_victim() helper which operates on the mm struct rather than a task. Any context which operates on a remote mm struct should use this helper in place of tsk_is_oom_victim. The flag is set in mark_oom_victim and never cleared so it is stable in the exit_mmap path. Debugged by Tetsuo Handa. Link: http://lkml.kernel.org/r/20171210095130.17110-1-mhocko@kernel.org Fixes: 212925802454 ("mm: oom: let oom_reap_task and exit_mmap run concurrently") Signed-off-by: Michal Hocko Reported-by: David Rientjes Acked-by: David Rientjes Cc: Tetsuo Handa Cc: Andrea Argangeli Cc: [4.14] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/oom.h | 9 +++++++++ include/linux/sched/coredump.h | 1 + 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/oom.h b/include/linux/oom.h index 01c91d874a57..5bad038ac012 100644 --- a/include/linux/oom.h +++ b/include/linux/oom.h @@ -66,6 +66,15 @@ static inline bool tsk_is_oom_victim(struct task_struct * tsk) return tsk->signal->oom_mm; } +/* + * Use this helper if tsk->mm != mm and the victim mm needs a special + * handling. This is guaranteed to stay true after once set. + */ +static inline bool mm_is_oom_victim(struct mm_struct *mm) +{ + return test_bit(MMF_OOM_VICTIM, &mm->flags); +} + /* * Checks whether a page fault on the given mm is still reliable. * This is no longer true if the oom reaper started to reap the diff --git a/include/linux/sched/coredump.h b/include/linux/sched/coredump.h index 9c8847395b5e..ec912d01126f 100644 --- a/include/linux/sched/coredump.h +++ b/include/linux/sched/coredump.h @@ -70,6 +70,7 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_UNSTABLE 22 /* mm is unstable for copy_from_user */ #define MMF_HUGE_ZERO_PAGE 23 /* mm has ever used the global huge zero page */ #define MMF_DISABLE_THP 24 /* disable THP for all VMAs */ +#define MMF_OOM_VICTIM 25 /* mm is the oom victim */ #define MMF_DISABLE_THP_MASK (1 << MMF_DISABLE_THP) #define MMF_INIT_MASK (MMF_DUMPABLE_MASK | MMF_DUMP_FILTER_MASK |\ -- cgit v1.2.3 From 14cb0dc6479dc5ebc63b3a459a5d89a2f1b39fed Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 18 Dec 2017 15:40:43 +0800 Subject: block: don't let passthrough IO go into .make_request_fn() Commit a8821f3f3("block: Improvements to bounce-buffer handling") tries to make sure that the bio to .make_request_fn won't exceed BIO_MAX_PAGES, but ignores that passthrough I/O can use blk_queue_bounce() too. Especially, passthrough IO may not be sector-aligned, and the check of 'sectors < bio_sectors(*bio_orig)' inside __blk_queue_bounce() may become true even though the max bvec number doesn't exceed BIO_MAX_PAGES, then cause the bio splitted, and the original passthrough bio is submited to generic_make_request(). This patch fixes this issue by checking if the bio is passthrough IO, and use bio_kmalloc() to allocate the cloned passthrough bio. Cc: NeilBrown Fixes: a8821f3f3("block: Improvements to bounce-buffer handling") Tested-by: Michele Ballabio Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8089ca17db9a..abd06f540863 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -241,14 +241,24 @@ struct request { struct request *next_rq; }; +static inline bool blk_op_is_scsi(unsigned int op) +{ + return op == REQ_OP_SCSI_IN || op == REQ_OP_SCSI_OUT; +} + +static inline bool blk_op_is_private(unsigned int op) +{ + return op == REQ_OP_DRV_IN || op == REQ_OP_DRV_OUT; +} + static inline bool blk_rq_is_scsi(struct request *rq) { - return req_op(rq) == REQ_OP_SCSI_IN || req_op(rq) == REQ_OP_SCSI_OUT; + return blk_op_is_scsi(req_op(rq)); } static inline bool blk_rq_is_private(struct request *rq) { - return req_op(rq) == REQ_OP_DRV_IN || req_op(rq) == REQ_OP_DRV_OUT; + return blk_op_is_private(req_op(rq)); } static inline bool blk_rq_is_passthrough(struct request *rq) @@ -256,6 +266,13 @@ static inline bool blk_rq_is_passthrough(struct request *rq) return blk_rq_is_scsi(rq) || blk_rq_is_private(rq); } +static inline bool bio_is_passthrough(struct bio *bio) +{ + unsigned op = bio_op(bio); + + return blk_op_is_scsi(op) || blk_op_is_private(op); +} + static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; -- cgit v1.2.3 From 0abc2a10389f0c9070f76ca906c7382788036b93 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 18 Dec 2017 15:40:44 +0800 Subject: block: fix blk_rq_append_bio Commit caa4b02476e3(blk-map: call blk_queue_bounce from blk_rq_append_bio) moves blk_queue_bounce() into blk_rq_append_bio(), but don't consider the fact that the bounced bio becomes invisible to caller since the parameter type is 'struct bio *'. Make it a pointer to a pointer to a bio, so the caller sees the right bio also after a bounce. Fixes: caa4b02476e3 ("blk-map: call blk_queue_bounce from blk_rq_append_bio") Cc: Christoph Hellwig Reported-by: Michele Ballabio (handling failure of blk_rq_append_bio(), only call bio_get() after blk_rq_append_bio() returns OK) Tested-by: Michele Ballabio Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index abd06f540863..100d0df38026 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -965,7 +965,7 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); -extern int blk_rq_append_bio(struct request *rq, struct bio *bio); +extern int blk_rq_append_bio(struct request *rq, struct bio **bio); extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_queue_split(struct request_queue *, struct bio **); extern void blk_recount_segments(struct request_queue *, struct bio *); -- cgit v1.2.3 From 231243c82793428467524227ae02ca451e6a98e7 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 10 Nov 2017 15:59:52 +0900 Subject: Revert "mlx5: move affinity hints assignments to generic code" Before the offending commit, mlx5 core did the IRQ affinity itself, and it seems that the new generic code have some drawbacks and one of them is the lack for user ability to modify irq affinity after the initial affinity values got assigned. The issue is still being discussed and a solution in the new generic code is required, until then we need to revert this patch. This fixes the following issue: echo > /proc/irq//smp_affinity fails with -EIO This reverts commit a435393acafbf0ecff4deb3e3cb554b34f0d0664. Note: kept mlx5_get_vector_affinity in include/linux/mlx5/driver.h since it is used in mlx5_ib driver. Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code") Cc: Sagi Grimberg Cc: Thomas Gleixner Cc: Jes Sorensen Reported-by: Jes Sorensen Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a886b51511ab..40a6f33c4cde 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -556,6 +556,7 @@ struct mlx5_core_sriov { }; struct mlx5_irq_info { + cpumask_var_t mask; char name[MLX5_MAX_IRQ_NAME]; }; -- cgit v1.2.3 From 37e92a9d4fe38dc3e7308913575983a6a088c8d4 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Mon, 13 Nov 2017 10:11:27 +0200 Subject: net/mlx5: Fix rate limit packet pacing naming and struct In mlx5_ifc, struct size was not complete, and thus driver was sending garbage after the last defined field. Fixed it by adding reserved field to complete the struct size. In addition, rename all set_rate_limit to set_pp_rate_limit to be compliant with the Firmware <-> Driver definition. Fixes: 7486216b3a0b ("{net,IB}/mlx5: mlx5_ifc updates") Fixes: 1466cc5b23d1 ("net/mlx5: Rate limit tables support") Signed-off-by: Eran Ben Elisha Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index 38a7577a9ce7..d44ec5f41d4a 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -147,7 +147,7 @@ enum { MLX5_CMD_OP_ALLOC_Q_COUNTER = 0x771, MLX5_CMD_OP_DEALLOC_Q_COUNTER = 0x772, MLX5_CMD_OP_QUERY_Q_COUNTER = 0x773, - MLX5_CMD_OP_SET_RATE_LIMIT = 0x780, + MLX5_CMD_OP_SET_PP_RATE_LIMIT = 0x780, MLX5_CMD_OP_QUERY_RATE_LIMIT = 0x781, MLX5_CMD_OP_CREATE_SCHEDULING_ELEMENT = 0x782, MLX5_CMD_OP_DESTROY_SCHEDULING_ELEMENT = 0x783, @@ -7239,7 +7239,7 @@ struct mlx5_ifc_add_vxlan_udp_dport_in_bits { u8 vxlan_udp_port[0x10]; }; -struct mlx5_ifc_set_rate_limit_out_bits { +struct mlx5_ifc_set_pp_rate_limit_out_bits { u8 status[0x8]; u8 reserved_at_8[0x18]; @@ -7248,7 +7248,7 @@ struct mlx5_ifc_set_rate_limit_out_bits { u8 reserved_at_40[0x40]; }; -struct mlx5_ifc_set_rate_limit_in_bits { +struct mlx5_ifc_set_pp_rate_limit_in_bits { u8 opcode[0x10]; u8 reserved_at_10[0x10]; @@ -7261,6 +7261,8 @@ struct mlx5_ifc_set_rate_limit_in_bits { u8 reserved_at_60[0x20]; u8 rate_limit[0x20]; + + u8 reserved_at_a0[0x160]; }; struct mlx5_ifc_access_register_out_bits { -- cgit v1.2.3 From d6b2785cd55ee72e9608762650b3ef299f801b1b Mon Sep 17 00:00:00 2001 From: Moshe Shemesh Date: Tue, 21 Nov 2017 15:15:51 +0200 Subject: net/mlx5: Cleanup IRQs in case of unload failure When mlx5_stop_eqs fails to destroy any of the eqs it returns with an error. In such failure flow the function will return without releasing all EQs irqs and then pci_free_irq_vectors will fail. Fix by only warn on destroy EQ failure and continue to release other EQs and their irqs. It fixes the following kernel trace: kernel: kernel BUG at drivers/pci/msi.c:352! ... ... kernel: Call Trace: kernel: pci_disable_msix+0xd3/0x100 kernel: pci_free_irq_vectors+0xe/0x20 kernel: mlx5_load_one.isra.17+0x9f5/0xec0 [mlx5_core] Fixes: e126ba97dba9 ("mlx5: Add driver for Mellanox Connect-IB adapters") Signed-off-by: Moshe Shemesh Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 40a6f33c4cde..57b109c6e422 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1049,7 +1049,7 @@ int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, enum mlx5_eq_type type); int mlx5_destroy_unmap_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq); int mlx5_start_eqs(struct mlx5_core_dev *dev); -int mlx5_stop_eqs(struct mlx5_core_dev *dev); +void mlx5_stop_eqs(struct mlx5_core_dev *dev); int mlx5_vector2eqn(struct mlx5_core_dev *dev, int vector, int *eqn, unsigned int *irqn); int mlx5_core_attach_mcg(struct mlx5_core_dev *dev, union ib_gid *mgid, u32 qpn); -- cgit v1.2.3 From 111be883981748acc9a56e855c8336404a8e787c Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 20 Dec 2017 11:10:17 -0700 Subject: block-throttle: avoid double charge If a bio is throttled and split after throttling, the bio could be resubmited and enters the throttling again. This will cause part of the bio to be charged multiple times. If the cgroup has an IO limit, the double charge will significantly harm the performance. The bio split becomes quite common after arbitrary bio size change. To fix this, we always set the BIO_THROTTLED flag if a bio is throttled. If the bio is cloned/split, we copy the flag to new bio too to avoid a double charge. However, cloned bio could be directed to a new disk, keeping the flag be a problem. The observation is we always set new disk for the bio in this case, so we can clear the flag in bio_set_dev(). This issue exists for a long time, arbitrary bio size change just makes it worse, so this should go into stable at least since v4.2. V1-> V2: Not add extra field in bio based on discussion with Tejun Cc: Vivek Goyal Cc: stable@vger.kernel.org Acked-by: Tejun Heo Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 ++ include/linux/blk_types.h | 9 ++++----- 2 files changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 82f0c8fd7be8..23d29b39f71e 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -492,6 +492,8 @@ extern unsigned int bvec_nr_vecs(unsigned short idx); #define bio_set_dev(bio, bdev) \ do { \ + if ((bio)->bi_disk != (bdev)->bd_disk) \ + bio_clear_flag(bio, BIO_THROTTLED);\ (bio)->bi_disk = (bdev)->bd_disk; \ (bio)->bi_partno = (bdev)->bd_partno; \ } while (0) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index a1e628e032da..9e7d8bd776d2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -50,8 +50,6 @@ struct blk_issue_stat { struct bio { struct bio *bi_next; /* request queue link */ struct gendisk *bi_disk; - u8 bi_partno; - blk_status_t bi_status; unsigned int bi_opf; /* bottom bits req flags, * top bits REQ_OP. Use * accessors. @@ -59,8 +57,8 @@ struct bio { unsigned short bi_flags; /* status, etc and bvec pool number */ unsigned short bi_ioprio; unsigned short bi_write_hint; - - struct bvec_iter bi_iter; + blk_status_t bi_status; + u8 bi_partno; /* Number of segments in this BIO after * physical address coalescing is performed. @@ -74,8 +72,9 @@ struct bio { unsigned int bi_seg_front_size; unsigned int bi_seg_back_size; - atomic_t __bi_remaining; + struct bvec_iter bi_iter; + atomic_t __bi_remaining; bio_end_io_t *bi_end_io; void *bi_private; -- cgit v1.2.3 From 4ccafe032005e9b96acbef2e389a4de5b1254add Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 20 Dec 2017 13:13:58 -0700 Subject: block: unalign call_single_data in struct request A previous change blindly added massive alignment to the call_single_data structure in struct request. This ballooned it in size from 296 to 320 bytes on my setup, for no valid reason at all. Use the unaligned struct __call_single_data variant instead. Fixes: 966a967116e69 ("smp: Avoid using two cache lines for struct call_single_data") Cc: stable@vger.kernel.org # v4.14 Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 100d0df38026..0ce8a372d506 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -135,7 +135,7 @@ typedef __u32 __bitwise req_flags_t; struct request { struct list_head queuelist; union { - call_single_data_t csd; + struct __call_single_data csd; u64 fifo_time; }; -- cgit v1.2.3 From bb7f0f989ca7de1153bd128a40a71709e339fa03 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Mon, 18 Dec 2017 20:12:00 -0800 Subject: bpf: fix integer overflows There were various issues related to the limited size of integers used in the verifier: - `off + size` overflow in __check_map_access() - `off + reg->off` overflow in check_mem_access() - `off + reg->var_off.value` overflow or 32-bit truncation of `reg->var_off.value` in check_mem_access() - 32-bit truncation in check_stack_boundary() Make sure that any integer math cannot overflow by not allowing pointer math with large values. Also reduce the scope of "scalar op scalar" tracking. Fixes: f1174f77b50c ("bpf/verifier: rework value tracking") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Signed-off-by: Daniel Borkmann --- include/linux/bpf_verifier.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index c561b986bab0..1632bb13ad8a 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -15,11 +15,11 @@ * In practice this is far bigger than any realistic pointer offset; this limit * ensures that umax_value + (int)off + (int)size cannot overflow a u64. */ -#define BPF_MAX_VAR_OFF (1ULL << 31) +#define BPF_MAX_VAR_OFF (1 << 29) /* Maximum variable size permitted for ARG_CONST_SIZE[_OR_ZERO]. This ensures * that converting umax_value to int cannot overflow. */ -#define BPF_MAX_VAR_SIZ INT_MAX +#define BPF_MAX_VAR_SIZ (1 << 29) /* Liveness marks, used for registers and spilled-regs (in stack slots). * Read marks propagate upwards until they find a write mark; they record that -- cgit v1.2.3 From 513674b5a2c9c7a67501506419da5c3c77ac6f08 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Wed, 20 Dec 2017 12:10:21 -0800 Subject: net: reevalulate autoflowlabel setting after sysctl setting sysctl.ip6.auto_flowlabels is default 1. In our hosts, we set it to 2. If sockopt doesn't set autoflowlabel, outcome packets from the hosts are supposed to not include flowlabel. This is true for normal packet, but not for reset packet. The reason is ipv6_pinfo.autoflowlabel is set in sock creation. Later if we change sysctl.ip6.auto_flowlabels, the ipv6_pinfo.autoflowlabel isn't changed, so the sock will keep the old behavior in terms of auto flowlabel. Reset packet is suffering from this problem, because reset packet is sent from a special control socket, which is created at boot time. Since sysctl.ipv6.auto_flowlabels is 1 by default, the control socket will always have its ipv6_pinfo.autoflowlabel set, even after user set sysctl.ipv6.auto_flowlabels to 1, so reset packset will always have flowlabel. Normal sock created before sysctl setting suffers from the same issue. We can't even turn off autoflowlabel unless we kill all socks in the hosts. To fix this, if IPV6_AUTOFLOWLABEL sockopt is used, we use the autoflowlabel setting from user, otherwise we always call ip6_default_np_autolabel() which has the new settings of sysctl. Note, this changes behavior a little bit. Before commit 42240901f7c4 (ipv6: Implement different admin modes for automatic flow labels), the autoflowlabel behavior of a sock isn't sticky, eg, if sysctl changes, existing connection will change autoflowlabel behavior. After that commit, autoflowlabel behavior is sticky in the whole life of the sock. With this patch, the behavior isn't sticky again. Cc: Martin KaFai Lau Cc: Eric Dumazet Cc: Tom Herbert Signed-off-by: Shaohua Li Signed-off-by: David S. Miller --- include/linux/ipv6.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index cb18c6290ca8..8415bf1a9776 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -273,7 +273,8 @@ struct ipv6_pinfo { * 100: prefer care-of address */ dontfrag:1, - autoflowlabel:1; + autoflowlabel:1, + autoflowlabel_set:1; __u8 min_hopcount; __u8 tclass; __be32 rcv_flowinfo; -- cgit v1.2.3 From 71a0ff65a21bf3e2c4fde208c4a635ed2bbb4e81 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Thu, 21 Dec 2017 17:38:26 +0200 Subject: IB/mlx5: Fix congestion counters in LAG mode Congestion counters are counted and queried per physical function. When working in LAG mode, CNP packets can be sent or received on both of the functions, thus congestion counters should be aggregated from the two physical functions. Fixes: e1f24a79f424 ("IB/mlx5: Support congestion related counters") Signed-off-by: Majd Dibbiny Reviewed-by: Aviv Heller Signed-off-by: Leon Romanovsky Signed-off-by: Jason Gunthorpe --- include/linux/mlx5/driver.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index a886b51511ab..8846919356ca 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1164,6 +1164,10 @@ int mlx5_cmd_create_vport_lag(struct mlx5_core_dev *dev); int mlx5_cmd_destroy_vport_lag(struct mlx5_core_dev *dev); bool mlx5_lag_is_active(struct mlx5_core_dev *dev); struct net_device *mlx5_lag_get_roce_netdev(struct mlx5_core_dev *dev); +int mlx5_lag_query_cong_counters(struct mlx5_core_dev *dev, + u64 *values, + int num_counters, + size_t *offsets); struct mlx5_uars_page *mlx5_get_uars_page(struct mlx5_core_dev *mdev); void mlx5_put_uars_page(struct mlx5_core_dev *mdev, struct mlx5_uars_page *up); -- cgit v1.2.3 From 39c3fd58952d7599d367c84c1330b785d91d6088 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Sat, 2 Dec 2017 18:11:04 +0100 Subject: kernel/irq: Extend lockdep class for request mutex The IRQ code already has support for lockdep class for the lock mutex in an interrupt descriptor. Extend this to add a second class for the request mutex in the descriptor. Not having a class is resulting in false positive splats in some code paths. Signed-off-by: Andrew Lunn Signed-off-by: Thomas Gleixner Acked-by: linus.walleij@linaro.org Cc: grygorii.strashko@ti.com Cc: f.fainelli@gmail.com Link: https://lkml.kernel.org/r/1512234664-21555-1-git-send-email-andrew@lunn.ch --- include/linux/gpio/driver.h | 33 +++++++++++++++++++++------------ include/linux/irqdesc.h | 9 ++++++--- 2 files changed, 27 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 55e672592fa9..7258cd676df4 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -66,9 +66,10 @@ struct gpio_irq_chip { /** * @lock_key: * - * Per GPIO IRQ chip lockdep class. + * Per GPIO IRQ chip lockdep classes. */ struct lock_class_key *lock_key; + struct lock_class_key *request_key; /** * @parent_handler: @@ -323,7 +324,8 @@ extern const char *gpiochip_is_requested(struct gpio_chip *chip, /* add/remove chips */ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); /** * gpiochip_add_data() - register a gpio_chip @@ -350,11 +352,13 @@ extern int gpiochip_add_data_with_key(struct gpio_chip *chip, void *data, */ #ifdef CONFIG_LOCKDEP #define gpiochip_add_data(chip, data) ({ \ - static struct lock_class_key key; \ - gpiochip_add_data_with_key(chip, data, &key); \ + static struct lock_class_key lock_key; \ + static struct lock_class_key request_key; \ + gpiochip_add_data_with_key(chip, data, &lock_key, \ + &request_key); \ }) #else -#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL) +#define gpiochip_add_data(chip, data) gpiochip_add_data_with_key(chip, data, NULL, NULL) #endif static inline int gpiochip_add(struct gpio_chip *chip) @@ -429,7 +433,8 @@ int gpiochip_irqchip_add_key(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type, bool threaded, - struct lock_class_key *lock_key); + struct lock_class_key *lock_key, + struct lock_class_key *request_key); #ifdef CONFIG_LOCKDEP @@ -445,10 +450,12 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, irq_flow_handler_t handler, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, &key); + handler, type, false, + &lock_key, &request_key); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -458,10 +465,12 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { - static struct lock_class_key key; + static struct lock_class_key lock_key; + static struct lock_class_key request_key; return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, &key); + handler, type, true, + &lock_key, &request_key); } #else static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, @@ -471,7 +480,7 @@ static inline int gpiochip_irqchip_add(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, false, NULL); + handler, type, false, NULL, NULL); } static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, @@ -481,7 +490,7 @@ static inline int gpiochip_irqchip_add_nested(struct gpio_chip *gpiochip, unsigned int type) { return gpiochip_irqchip_add_key(gpiochip, irqchip, first_irq, - handler, type, true, NULL); + handler, type, true, NULL, NULL); } #endif /* CONFIG_LOCKDEP */ diff --git a/include/linux/irqdesc.h b/include/linux/irqdesc.h index 39fb3700f7a9..25b33b664537 100644 --- a/include/linux/irqdesc.h +++ b/include/linux/irqdesc.h @@ -255,12 +255,15 @@ static inline bool irq_is_percpu_devid(unsigned int irq) } static inline void -irq_set_lockdep_class(unsigned int irq, struct lock_class_key *class) +irq_set_lockdep_class(unsigned int irq, struct lock_class_key *lock_class, + struct lock_class_key *request_class) { struct irq_desc *desc = irq_to_desc(irq); - if (desc) - lockdep_set_class(&desc->lock, class); + if (desc) { + lockdep_set_class(&desc->lock, lock_class); + lockdep_set_class(&desc->request_mutex, request_class); + } } #ifdef CONFIG_IRQ_PREFLOW_FASTEOI -- cgit v1.2.3 From 466a2b42d67644447a1765276259a3ea5531ddff Mon Sep 17 00:00:00 2001 From: Joel Fernandes Date: Thu, 21 Dec 2017 02:22:45 +0100 Subject: cpufreq: schedutil: Use idle_calls counter of the remote CPU Since the recent remote cpufreq callback work, its possible that a cpufreq update is triggered from a remote CPU. For single policies however, the current code uses the local CPU when trying to determine if the remote sg_cpu entered idle or is busy. This is incorrect. To remedy this, compare with the nohz tick idle_calls counter of the remote CPU. Fixes: 674e75411fc2 (sched: cpufreq: Allow remote cpufreq callbacks) Acked-by: Viresh Kumar Acked-by: Peter Zijlstra (Intel) Signed-off-by: Joel Fernandes Cc: 4.14+ # 4.14+ Signed-off-by: Rafael J. Wysocki --- include/linux/tick.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tick.h b/include/linux/tick.h index f442d1a42025..7cc35921218e 100644 --- a/include/linux/tick.h +++ b/include/linux/tick.h @@ -119,6 +119,7 @@ extern void tick_nohz_idle_exit(void); extern void tick_nohz_irq_exit(void); extern ktime_t tick_nohz_get_sleep_length(void); extern unsigned long tick_nohz_get_idle_calls(void); +extern unsigned long tick_nohz_get_idle_calls_cpu(int cpu); extern u64 get_cpu_idle_time_us(int cpu, u64 *last_update_time); extern u64 get_cpu_iowait_time_us(int cpu, u64 *last_update_time); #else /* !CONFIG_NO_HZ_COMMON */ -- cgit v1.2.3 From 69790ba92b8d67eaee5e50b30a5b696d40664caf Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Dec 2017 16:44:34 +0100 Subject: genirq: Introduce IRQD_CAN_RESERVE flag Add a new flag to mark interrupts which can use reservation mode. This is going to be used in subsequent patches to disable reservation mode for a certain class of MSI devices. Signed-off-by: Thomas Gleixner Tested-by: Alexandru Chirvasitu Tested-by: Andy Shevchenko Cc: Dou Liyang Cc: Pavel Machek Cc: Maciej W. Rozycki Cc: Mikael Pettersson Cc: Josh Poulson Cc: Mihai Costache Cc: Stephen Hemminger Cc: Marc Zyngier Cc: linux-pci@vger.kernel.org Cc: Haiyang Zhang Cc: Dexuan Cui Cc: Simon Xiao Cc: Saeed Mahameed Cc: Jork Loeser Cc: Bjorn Helgaas Cc: devel@linuxdriverproject.org Cc: KY Srinivasan Cc: Alan Cox Cc: Sakari Ailus , Cc: linux-media@vger.kernel.org --- include/linux/irq.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index e140f69163b6..a0231e96a578 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -212,6 +212,7 @@ struct irq_data { * mask. Applies only to affinity managed irqs. * IRQD_SINGLE_TARGET - IRQ allows only a single affinity target * IRQD_DEFAULT_TRIGGER_SET - Expected trigger already been set + * IRQD_CAN_RESERVE - Can use reservation mode */ enum { IRQD_TRIGGER_MASK = 0xf, @@ -233,6 +234,7 @@ enum { IRQD_MANAGED_SHUTDOWN = (1 << 23), IRQD_SINGLE_TARGET = (1 << 24), IRQD_DEFAULT_TRIGGER_SET = (1 << 25), + IRQD_CAN_RESERVE = (1 << 26), }; #define __irqd_to_state(d) ACCESS_PRIVATE((d)->common, state_use_accessors) @@ -377,6 +379,21 @@ static inline bool irqd_is_managed_and_shutdown(struct irq_data *d) return __irqd_to_state(d) & IRQD_MANAGED_SHUTDOWN; } +static inline void irqd_set_can_reserve(struct irq_data *d) +{ + __irqd_to_state(d) |= IRQD_CAN_RESERVE; +} + +static inline void irqd_clr_can_reserve(struct irq_data *d) +{ + __irqd_to_state(d) &= ~IRQD_CAN_RESERVE; +} + +static inline bool irqd_can_reserve(struct irq_data *d) +{ + return __irqd_to_state(d) & IRQD_CAN_RESERVE; +} + #undef __irqd_to_state static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) -- cgit v1.2.3 From 702cb0a02813299d6911b775c637906ae21b737d Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 29 Dec 2017 16:59:06 +0100 Subject: genirq/irqdomain: Rename early argument of irq_domain_activate_irq() The 'early' argument of irq_domain_activate_irq() is actually used to denote reservation mode. To avoid confusion, rename it before abuse happens. No functional change. Fixes: 72491643469a ("genirq/irqdomain: Update irq_domain_ops.activate() signature") Signed-off-by: Thomas Gleixner Cc: Alexandru Chirvasitu Cc: Andy Shevchenko Cc: Dou Liyang Cc: Pavel Machek Cc: Maciej W. Rozycki Cc: Mikael Pettersson Cc: Josh Poulson Cc: Mihai Costache Cc: Stephen Hemminger Cc: Marc Zyngier Cc: linux-pci@vger.kernel.org Cc: Haiyang Zhang Cc: Dexuan Cui Cc: Simon Xiao Cc: Saeed Mahameed Cc: Jork Loeser Cc: Bjorn Helgaas Cc: devel@linuxdriverproject.org Cc: KY Srinivasan Cc: Alan Cox Cc: Sakari Ailus , Cc: linux-media@vger.kernel.org --- include/linux/irqdomain.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index a34355d19546..48c7e86bb556 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -113,7 +113,7 @@ struct irq_domain_ops { unsigned int nr_irqs, void *arg); void (*free)(struct irq_domain *d, unsigned int virq, unsigned int nr_irqs); - int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool early); + int (*activate)(struct irq_domain *d, struct irq_data *irqd, bool reserve); void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, unsigned long *out_hwirq, unsigned int *out_type); -- cgit v1.2.3 From 26456f87aca7157c057de65c9414b37f1ab881d1 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 27 Dec 2017 21:37:25 +0100 Subject: timers: Reinitialize per cpu bases on hotplug The timer wheel bases are not (re)initialized on CPU hotplug. That leaves them with a potentially stale clk and next_expiry valuem, which can cause trouble then the CPU is plugged. Add a prepare callback which forwards the clock, sets next_expiry to far in the future and reset the control flags to a known state. Set base->must_forward_clk so the first timer which is queued will try to forward the clock to current jiffies. Fixes: 500462a9de65 ("timers: Switch to a non-cascading wheel") Reported-by: Paul E. McKenney Signed-off-by: Thomas Gleixner Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Sebastian Siewior Cc: Anna-Maria Gleixner Cc: stable@vger.kernel.org Link: https://lkml.kernel.org/r/alpine.DEB.2.20.1712272152200.2431@nanos --- include/linux/cpuhotplug.h | 2 +- include/linux/timer.h | 4 +++- 2 files changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuhotplug.h b/include/linux/cpuhotplug.h index 201ab7267986..1a32e558eb11 100644 --- a/include/linux/cpuhotplug.h +++ b/include/linux/cpuhotplug.h @@ -86,7 +86,7 @@ enum cpuhp_state { CPUHP_MM_ZSWP_POOL_PREPARE, CPUHP_KVM_PPC_BOOK3S_PREPARE, CPUHP_ZCOMP_PREPARE, - CPUHP_TIMERS_DEAD, + CPUHP_TIMERS_PREPARE, CPUHP_MIPS_SOC_PREPARE, CPUHP_BP_PREPARE_DYN, CPUHP_BP_PREPARE_DYN_END = CPUHP_BP_PREPARE_DYN + 20, diff --git a/include/linux/timer.h b/include/linux/timer.h index 04af640ea95b..2448f9cc48a3 100644 --- a/include/linux/timer.h +++ b/include/linux/timer.h @@ -207,9 +207,11 @@ unsigned long round_jiffies_up(unsigned long j); unsigned long round_jiffies_up_relative(unsigned long j); #ifdef CONFIG_HOTPLUG_CPU +int timers_prepare_cpu(unsigned int cpu); int timers_dead_cpu(unsigned int cpu); #else -#define timers_dead_cpu NULL +#define timers_prepare_cpu NULL +#define timers_dead_cpu NULL #endif #endif -- cgit v1.2.3 From 98801506552593c9b8ac11021b0cdad12cab4f6b Mon Sep 17 00:00:00 2001 From: David Howells Date: Tue, 2 Jan 2018 10:02:19 +0000 Subject: fscache: Fix the default for fscache_maybe_release_page() Fix the default for fscache_maybe_release_page() for when the cookie isn't valid or the page isn't cached. It mustn't return false as that indicates the page cannot yet be freed. The problem with the default is that if, say, there's no cache, but a network filesystem's pages are using up almost all the available memory, a system can OOM because the filesystem ->releasepage() op will not allow them to be released as fscache_maybe_release_page() incorrectly prevents it. This can be tested by writing a sequence of 512MiB files to an AFS mount. It does not affect NFS or CIFS because both of those wrap the call in a check of PG_fscache and it shouldn't bother Ceph as that only has PG_private set whilst writeback is in progress. This might be an issue for 9P, however. Note that the pages aren't entirely stuck. Removing a file or unmounting will clear things because that uses ->invalidatepage() instead. Fixes: 201a15428bd5 ("FS-Cache: Handle pages pending storage that get evicted under OOM conditions") Reported-by: Marc Dionne Signed-off-by: David Howells Reviewed-by: Jeff Layton Acked-by: Al Viro Tested-by: Marc Dionne cc: stable@vger.kernel.org # 2.6.32+ --- include/linux/fscache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fscache.h b/include/linux/fscache.h index f4ff47d4a893..fe0c349684fa 100644 --- a/include/linux/fscache.h +++ b/include/linux/fscache.h @@ -755,7 +755,7 @@ bool fscache_maybe_release_page(struct fscache_cookie *cookie, { if (fscache_cookie_valid(cookie) && PageFsCache(page)) return __fscache_maybe_release_page(cookie, page, gfp); - return false; + return true; } /** -- cgit v1.2.3 From f24c4d478013d82bd1b943df566fff3561d52864 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Tue, 2 Jan 2018 17:21:10 +0000 Subject: efi/capsule-loader: Reinstate virtual capsule mapping Commit: 82c3768b8d68 ("efi/capsule-loader: Use a cached copy of the capsule header") ... refactored the capsule loading code that maps the capsule header, to avoid having to map it several times. However, as it turns out, the vmap() call we ended up removing did not just map the header, but the entire capsule image, and dropping this virtual mapping breaks capsules that are processed by the firmware immediately (i.e., without a reboot). Unfortunately, that change was part of a larger refactor that allowed a quirk to be implemented for Quark, which has a non-standard memory layout for capsules, and we have slightly painted ourselves into a corner by allowing quirk code to mangle the capsule header and memory layout. So we need to fix this without breaking Quark. Fortunately, Quark does not appear to care about the virtual mapping, and so we can simply do a partial revert of commit: 2a457fb31df6 ("efi/capsule-loader: Use page addresses rather than struct page pointers") ... and create a vmap() mapping of the entire capsule (including header) based on the reinstated struct page array, unless running on Quark, in which case we pass the capsule header copy as before. Reported-by: Ge Song Tested-by: Bryan O'Donoghue Tested-by: Ge Song Signed-off-by: Ard Biesheuvel Cc: Cc: Dave Young Cc: Linus Torvalds Cc: Matt Fleming Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-efi@vger.kernel.org Fixes: 82c3768b8d68 ("efi/capsule-loader: Use a cached copy of the capsule header") Link: http://lkml.kernel.org/r/20180102172110.17018-3-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d813f7b04da7..29fdf8029cf6 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -140,11 +140,13 @@ struct efi_boot_memmap { struct capsule_info { efi_capsule_header_t header; + efi_capsule_header_t *capsule; int reset_type; long index; size_t count; size_t total_size; - phys_addr_t *pages; + struct page **pages; + phys_addr_t *phys; size_t page_bytes_remain; }; -- cgit v1.2.3 From 040ee69226f8a96b7943645d68f41d5d44b5ff7d Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 2 Dec 2017 20:20:38 -0500 Subject: fix "netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1'" Descriptor table is a shared object; it's not a place where you can stick temporary references to files, especially when we don't need an opened file at all. Cc: stable@vger.kernel.org # v4.14 Fixes: 98589a0998b8 ("netfilter: xt_bpf: Fix XT_BPF_MODE_FD_PINNED mode of 'xt_bpf_info_v1'") Signed-off-by: Al Viro --- include/linux/bpf.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e55e4255a210..b63a592ad29d 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -419,6 +419,8 @@ static inline int bpf_map_attr_numa_node(const union bpf_attr *attr) attr->numa_node : NUMA_NO_NODE; } +struct bpf_prog *bpf_prog_get_type_path(const char *name, enum bpf_prog_type type); + #else /* !CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get(u32 ufd) { @@ -506,6 +508,12 @@ static inline int cpu_map_enqueue(struct bpf_cpu_map_entry *rcpu, { return 0; } + +static inline struct bpf_prog *bpf_prog_get_type_path(const char *name, + enum bpf_prog_type type) +{ + return ERR_PTR(-EOPNOTSUPP); +} #endif /* CONFIG_BPF_SYSCALL */ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, @@ -514,6 +522,8 @@ static inline struct bpf_prog *bpf_prog_get_type(u32 ufd, return bpf_prog_get_type_dev(ufd, type, false); } +bool bpf_prog_get_ok(struct bpf_prog *, enum bpf_prog_type *, bool); + int bpf_prog_offload_compile(struct bpf_prog *prog); void bpf_prog_offload_destroy(struct bpf_prog *prog); -- cgit v1.2.3 From 5133550296d43236439494aa955bfb765a89f615 Mon Sep 17 00:00:00 2001 From: Sergei Shtylyov Date: Thu, 4 Jan 2018 21:06:49 +0300 Subject: sh_eth: fix SH7757 GEther initialization Renesas SH7757 has 2 Fast and 2 Gigabit Ether controllers, while the 'sh_eth' driver can only reset and initialize TSU of the first controller pair. Shimoda-san tried to solve that adding the 'needs_init' member to the 'struct sh_eth_plat_data', however the platform code still never sets this flag. I think that we can infer this information from the 'devno' variable (set to 'platform_device::id') and reset/init the Ether controller pair only for an even 'devno'; therefore 'sh_eth_plat_data::needs_init' can be removed... Fixes: 150647fb2c31 ("net: sh_eth: change the condition of initialization") Signed-off-by: Sergei Shtylyov Signed-off-by: David S. Miller --- include/linux/sh_eth.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sh_eth.h b/include/linux/sh_eth.h index ff3642d267f7..94081e9a5010 100644 --- a/include/linux/sh_eth.h +++ b/include/linux/sh_eth.h @@ -17,7 +17,6 @@ struct sh_eth_plat_data { unsigned char mac_addr[ETH_ALEN]; unsigned no_ether_link:1; unsigned ether_link_active_low:1; - unsigned needs_init:1; }; #endif -- cgit v1.2.3 From 527187d28569e39c5d489d6306d3b79605cf85a6 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 8 Jan 2018 17:27:19 +0100 Subject: locking/lockdep: Remove cross-release leftovers There's two cross-release leftover facilities: - the crossrelease_hist_*() irq-tracing callbacks (NOPs currently) - the complete_release_commit() callback (NOP as well) Remove them. Cc: David Sterba Cc: Byungchul Park Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/completion.h | 1 - include/linux/irqflags.h | 4 ---- include/linux/lockdep.h | 2 -- 3 files changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/completion.h b/include/linux/completion.h index 94a59ba7d422..519e94915d18 100644 --- a/include/linux/completion.h +++ b/include/linux/completion.h @@ -32,7 +32,6 @@ struct completion { #define init_completion(x) __init_completion(x) static inline void complete_acquire(struct completion *x) {} static inline void complete_release(struct completion *x) {} -static inline void complete_release_commit(struct completion *x) {} #define COMPLETION_INITIALIZER(work) \ { 0, __WAIT_QUEUE_HEAD_INITIALIZER((work).wait) } diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 46cb57d5eb13..1b3996ff3f16 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -27,22 +27,18 @@ # define trace_hardirq_enter() \ do { \ current->hardirq_context++; \ - crossrelease_hist_start(XHLOCK_HARD); \ } while (0) # define trace_hardirq_exit() \ do { \ current->hardirq_context--; \ - crossrelease_hist_end(XHLOCK_HARD); \ } while (0) # define lockdep_softirq_enter() \ do { \ current->softirq_context++; \ - crossrelease_hist_start(XHLOCK_SOFT); \ } while (0) # define lockdep_softirq_exit() \ do { \ current->softirq_context--; \ - crossrelease_hist_end(XHLOCK_SOFT); \ } while (0) # define INIT_TRACE_IRQFLAGS .softirqs_enabled = 1, #else diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 2e75dc34bff5..3251d9c0d313 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -475,8 +475,6 @@ enum xhlock_context_t { #define STATIC_LOCKDEP_MAP_INIT(_name, _key) \ { .name = (_name), .key = (void *)(_key), } -static inline void crossrelease_hist_start(enum xhlock_context_t c) {} -static inline void crossrelease_hist_end(enum xhlock_context_t c) {} static inline void lockdep_invariant_state(bool force) {} static inline void lockdep_init_task(struct task_struct *task) {} static inline void lockdep_free_task(struct task_struct *task) {} -- cgit v1.2.3 From b2157399cc9898260d6031c5bfe45fe137c1fbe7 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Sun, 7 Jan 2018 17:33:02 -0800 Subject: bpf: prevent out-of-bounds speculation Under speculation, CPUs may mis-predict branches in bounds checks. Thus, memory accesses under a bounds check may be speculated even if the bounds check fails, providing a primitive for building a side channel. To avoid leaking kernel data round up array-based maps and mask the index after bounds check, so speculated load with out of bounds index will load either valid value from the array or zero from the padded area. Unconditionally mask index for all array types even when max_entries are not rounded to power of 2 for root user. When map is created by unpriv user generate a sequence of bpf insns that includes AND operation to make sure that JITed code includes the same 'index & index_mask' operation. If prog_array map is created by unpriv user replace bpf_tail_call(ctx, map, index); with if (index >= max_entries) { index &= map->index_mask; bpf_tail_call(ctx, map, index); } (along with roundup to power 2) to prevent out-of-bounds speculation. There is secondary redundant 'if (index >= max_entries)' in the interpreter and in all JITs, but they can be optimized later if necessary. Other array-like maps (cpumap, devmap, sockmap, perf_event_array, cgroup_array) cannot be used by unpriv, so no changes there. That fixes bpf side of "Variant 1: bounds check bypass (CVE-2017-5753)" on all architectures with and without JIT. v2->v3: Daniel noticed that attack potentially can be crafted via syscall commands without loading the program, so add masking to those paths as well. Signed-off-by: Alexei Starovoitov Acked-by: John Fastabend Signed-off-by: Daniel Borkmann --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e55e4255a210..1b985ca4ffbe 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -52,6 +52,7 @@ struct bpf_map { u32 pages; u32 id; int numa_node; + bool unpriv_array; struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; @@ -221,6 +222,7 @@ struct bpf_prog_aux { struct bpf_array { struct bpf_map map; u32 elem_size; + u32 index_mask; /* 'ownership' of prog_array is claimed by the first program that * is going to use this map or by the first program which FD is stored * in the map to make sure that all callers and callees have the same -- cgit v1.2.3 From be95a845cc4402272994ce290e3ad928aff06cb9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Tue, 9 Jan 2018 13:17:44 +0100 Subject: bpf: avoid false sharing of map refcount with max_entries In addition to commit b2157399cc98 ("bpf: prevent out-of-bounds speculation") also change the layout of struct bpf_map such that false sharing of fast-path members like max_entries is avoided when the maps reference counter is altered. Therefore enforce them to be placed into separate cachelines. pahole dump after change: struct bpf_map { const struct bpf_map_ops * ops; /* 0 8 */ struct bpf_map * inner_map_meta; /* 8 8 */ void * security; /* 16 8 */ enum bpf_map_type map_type; /* 24 4 */ u32 key_size; /* 28 4 */ u32 value_size; /* 32 4 */ u32 max_entries; /* 36 4 */ u32 map_flags; /* 40 4 */ u32 pages; /* 44 4 */ u32 id; /* 48 4 */ int numa_node; /* 52 4 */ bool unpriv_array; /* 56 1 */ /* XXX 7 bytes hole, try to pack */ /* --- cacheline 1 boundary (64 bytes) --- */ struct user_struct * user; /* 64 8 */ atomic_t refcnt; /* 72 4 */ atomic_t usercnt; /* 76 4 */ struct work_struct work; /* 80 32 */ char name[16]; /* 112 16 */ /* --- cacheline 2 boundary (128 bytes) --- */ /* size: 128, cachelines: 2, members: 17 */ /* sum members: 121, holes: 1, sum holes: 7 */ }; Now all entries in the first cacheline are read only throughout the life time of the map, set up once during map creation. Overall struct size and number of cachelines doesn't change from the reordering. struct bpf_map is usually first member and embedded in map structs in specific map implementations, so also avoid those members to sit at the end where it could potentially share the cacheline with first map values e.g. in the array since remote CPUs could trigger map updates just as well for those (easily dirtying members like max_entries intentionally as well) while having subsequent values in cache. Quoting from Google's Project Zero blog [1]: Additionally, at least on the Intel machine on which this was tested, bouncing modified cache lines between cores is slow, apparently because the MESI protocol is used for cache coherence [8]. Changing the reference counter of an eBPF array on one physical CPU core causes the cache line containing the reference counter to be bounced over to that CPU core, making reads of the reference counter on all other CPU cores slow until the changed reference counter has been written back to memory. Because the length and the reference counter of an eBPF array are stored in the same cache line, this also means that changing the reference counter on one physical CPU core causes reads of the eBPF array's length to be slow on other physical CPU cores (intentional false sharing). While this doesn't 'control' the out-of-bounds speculation through masking the index as in commit b2157399cc98, triggering a manipulation of the map's reference counter is really trivial, so lets not allow to easily affect max_entries from it. Splitting to separate cachelines also generally makes sense from a performance perspective anyway in that fast-path won't have a cache miss if the map gets pinned, reused in other progs, etc out of control path, thus also avoids unintentional false sharing. [1] https://googleprojectzero.blogspot.ch/2018/01/reading-privileged-memory-with-side.html Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov --- include/linux/bpf.h | 24 ++++++++++++++++-------- 1 file changed, 16 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 1b985ca4ffbe..fe2cb7c398e3 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -43,7 +43,14 @@ struct bpf_map_ops { }; struct bpf_map { - atomic_t refcnt; + /* 1st cacheline with read-mostly members of which some + * are also accessed in fast-path (e.g. ops, max_entries). + */ + const struct bpf_map_ops *ops ____cacheline_aligned; + struct bpf_map *inner_map_meta; +#ifdef CONFIG_SECURITY + void *security; +#endif enum bpf_map_type map_type; u32 key_size; u32 value_size; @@ -53,15 +60,16 @@ struct bpf_map { u32 id; int numa_node; bool unpriv_array; - struct user_struct *user; - const struct bpf_map_ops *ops; - struct work_struct work; + /* 7 bytes hole */ + + /* 2nd cacheline with misc members to avoid false sharing + * particularly with refcounting. + */ + struct user_struct *user ____cacheline_aligned; + atomic_t refcnt; atomic_t usercnt; - struct bpf_map *inner_map_meta; + struct work_struct work; char name[BPF_OBJ_NAME_LEN]; -#ifdef CONFIG_SECURITY - void *security; -#endif }; /* function argument constraints */ -- cgit v1.2.3 From 8978cc921fc7fad3f4d6f91f1da01352aeeeff25 Mon Sep 17 00:00:00 2001 From: Eran Ben Elisha Date: Tue, 9 Jan 2018 11:41:10 +0200 Subject: {net,ib}/mlx5: Don't disable local loopback multicast traffic when needed There are systems platform information management interfaces (such as HOST2BMC) for which we cannot disable local loopback multicast traffic. Separate disable_local_lb_mc and disable_local_lb_uc capability bits so driver will not disable multicast loopback traffic if not supported. (It is expected that Firmware will not set disable_local_lb_mc if HOST2BMC is running for example.) Function mlx5_nic_vport_update_local_lb will do best effort to disable/enable UC/MC loopback traffic and return success only in case it succeeded to changed all allowed by Firmware. Adapt mlx5_ib and mlx5e to support the new cap bits. Fixes: 2c43c5a036be ("net/mlx5e: Enable local loopback in loopback selftest") Fixes: c85023e153e3 ("IB/mlx5: Add raw ethernet local loopback support") Fixes: bded747bb432 ("net/mlx5: Add raw ethernet local loopback firmware command") Signed-off-by: Eran Ben Elisha Cc: kernel-team@fb.com Signed-off-by: Saeed Mahameed --- include/linux/mlx5/mlx5_ifc.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index d44ec5f41d4a..1391a82da98e 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -1027,8 +1027,9 @@ struct mlx5_ifc_cmd_hca_cap_bits { u8 log_max_wq_sz[0x5]; u8 nic_vport_change_event[0x1]; - u8 disable_local_lb[0x1]; - u8 reserved_at_3e2[0x9]; + u8 disable_local_lb_uc[0x1]; + u8 disable_local_lb_mc[0x1]; + u8 reserved_at_3e3[0x8]; u8 log_max_vlan_list[0x5]; u8 reserved_at_3f0[0x3]; u8 log_max_current_mc_list[0x5]; -- cgit v1.2.3 From 05e0cc84e00c54fb152d1f4b86bc211823a83d0c Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Thu, 4 Jan 2018 04:35:51 +0200 Subject: net/mlx5: Fix get vector affinity helper function mlx5_get_vector_affinity used to call pci_irq_get_affinity and after reverting the patch that sets the device affinity via PCI_IRQ_AFFINITY API, calling pci_irq_get_affinity becomes useless and it breaks RDMA mlx5 users. To fix this, this patch provides an alternative way to retrieve IRQ vector affinity using legacy IRQ API, following smp_affinity read procfs implementation. Fixes: 231243c82793 ("Revert mlx5: move affinity hints assignments to generic code") Fixes: a435393acafb ("mlx5: move affinity hints assignments to generic code") Cc: Sagi Grimberg Signed-off-by: Saeed Mahameed --- include/linux/mlx5/driver.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 1f509d072026..a0610427e168 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -36,6 +36,7 @@ #include #include #include +#include #include #include #include @@ -1231,7 +1232,23 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity(struct mlx5_core_dev *dev, int vector) { - return pci_irq_get_affinity(dev->pdev, MLX5_EQ_VEC_COMP_BASE + vector); + const struct cpumask *mask; + struct irq_desc *desc; + unsigned int irq; + int eqn; + int err; + + err = mlx5_vector2eqn(dev, vector, &eqn, &irq); + if (err) + return NULL; + + desc = irq_to_desc(irq); +#ifdef CONFIG_GENERIC_IRQ_EFFECTIVE_AFF_MASK + mask = irq_data_get_effective_affinity_mask(&desc->irq_data); +#else + mask = desc->irq_common_data.affinity; +#endif + return mask; } #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From a0b1280368d1e91ab72f849ef095b4f07a39bbf1 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 12 Jan 2018 16:53:14 -0800 Subject: kdump: write correct address of mem_section into vmcoreinfo Depending on configuration mem_section can now be an array or a pointer to an array allocated dynamically. In most cases, we can continue to refer to it as 'mem_section' regardless of what it is. But there's one exception: '&mem_section' means "address of the array" if mem_section is an array, but if mem_section is a pointer, it would mean "address of the pointer". We've stepped onto this in kdump code. VMCOREINFO_SYMBOL(mem_section) writes down address of pointer into vmcoreinfo, not array as we wanted. Let's introduce VMCOREINFO_SYMBOL_ARRAY() that would handle the situation correctly for both cases. Link: http://lkml.kernel.org/r/20180112162532.35896-1-kirill.shutemov@linux.intel.com Signed-off-by: Kirill A. Shutemov Fixes: 83e3c48729d9 ("mm/sparsemem: Allocate mem_section at runtime for CONFIG_SPARSEMEM_EXTREME=y") Acked-by: Baoquan He Acked-by: Dave Young Cc: Ingo Molnar Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: Greg Kroah-Hartman Cc: Dave Young Cc: Baoquan He Cc: Vivek Goyal Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_core.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crash_core.h b/include/linux/crash_core.h index 06097ef30449..b511f6d24b42 100644 --- a/include/linux/crash_core.h +++ b/include/linux/crash_core.h @@ -42,6 +42,8 @@ phys_addr_t paddr_vmcoreinfo_note(void); vmcoreinfo_append_str("PAGESIZE=%ld\n", value) #define VMCOREINFO_SYMBOL(name) \ vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)&name) +#define VMCOREINFO_SYMBOL_ARRAY(name) \ + vmcoreinfo_append_str("SYMBOL(%s)=%lx\n", #name, (unsigned long)name) #define VMCOREINFO_SIZE(name) \ vmcoreinfo_append_str("SIZE(%s)=%lu\n", #name, \ (unsigned long)sizeof(name)) -- cgit v1.2.3 From 66940f35d5a81d5969bb5543171c70a434fc5110 Mon Sep 17 00:00:00 2001 From: "Michael S. Tsirkin" Date: Wed, 10 Jan 2018 16:03:05 +0200 Subject: ptr_ring: document usage around __ptr_ring_peek This explains why is the net usage of __ptr_ring_peek actually ok without locks. Signed-off-by: Michael S. Tsirkin Acked-by: John Fastabend Signed-off-by: David S. Miller --- include/linux/ptr_ring.h | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptr_ring.h b/include/linux/ptr_ring.h index 6866df4f31b5..d72b2e7dd500 100644 --- a/include/linux/ptr_ring.h +++ b/include/linux/ptr_ring.h @@ -174,6 +174,15 @@ static inline int ptr_ring_produce_bh(struct ptr_ring *r, void *ptr) * if they dereference the pointer - see e.g. PTR_RING_PEEK_CALL. * If ring is never resized, and if the pointer is merely * tested, there's no need to take the lock - see e.g. __ptr_ring_empty. + * However, if called outside the lock, and if some other CPU + * consumes ring entries at the same time, the value returned + * is not guaranteed to be correct. + * In this case - to avoid incorrectly detecting the ring + * as empty - the CPU consuming the ring entries is responsible + * for either consuming all ring entries until the ring is empty, + * or synchronizing with some other CPU and causing it to + * execute __ptr_ring_peek and/or consume the ring enteries + * after the synchronization point. */ static inline void *__ptr_ring_peek(struct ptr_ring *r) { @@ -182,10 +191,7 @@ static inline void *__ptr_ring_peek(struct ptr_ring *r) return NULL; } -/* Note: callers invoking this in a loop must use a compiler barrier, - * for example cpu_relax(). Callers must take consumer_lock - * if the ring is ever resized - see e.g. ptr_ring_empty. - */ +/* See __ptr_ring_peek above for locking rules. */ static inline bool __ptr_ring_empty(struct ptr_ring *r) { return !__ptr_ring_peek(r); -- cgit v1.2.3 From 6311b7ce42e0c1d6d944bc099dc47e936c20cf11 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Mon, 15 Jan 2018 12:42:25 +0100 Subject: netlink: extack: avoid parenthesized string constant warning NL_SET_ERR_MSG() and NL_SET_ERR_MSG_ATTR() lead to the following warning in newer versions of gcc: warning: array initialized from parenthesized string constant Just remove the parentheses, they're not needed in this context since anyway since there can be no operator precendence issues or similar. Signed-off-by: Johannes Berg Signed-off-by: David S. Miller --- include/linux/netlink.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netlink.h b/include/linux/netlink.h index 49b4257ce1ea..f3075d6c7e82 100644 --- a/include/linux/netlink.h +++ b/include/linux/netlink.h @@ -85,7 +85,7 @@ struct netlink_ext_ack { * to the lack of an output buffer.) */ #define NL_SET_ERR_MSG(extack, msg) do { \ - static const char __msg[] = (msg); \ + static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ if (__extack) \ @@ -101,7 +101,7 @@ struct netlink_ext_ack { } while (0) #define NL_SET_ERR_MSG_ATTR(extack, attr, msg) do { \ - static const char __msg[] = (msg); \ + static const char __msg[] = msg; \ struct netlink_ext_ack *__extack = (extack); \ \ if (__extack) { \ -- cgit v1.2.3 From c96f5471ce7d2aefd0dda560cc23f08ab00bc65d Mon Sep 17 00:00:00 2001 From: Josh Snyder Date: Mon, 18 Dec 2017 16:15:10 +0000 Subject: delayacct: Account blkio completion on the correct task Before commit: e33a9bba85a8 ("sched/core: move IO scheduling accounting from io_schedule_timeout() into scheduler") delayacct_blkio_end() was called after context-switching into the task which completed I/O. This resulted in double counting: the task would account a delay both waiting for I/O and for time spent in the runqueue. With e33a9bba85a8, delayacct_blkio_end() is called by try_to_wake_up(). In ttwu, we have not yet context-switched. This is more correct, in that the delay accounting ends when the I/O is complete. But delayacct_blkio_end() relies on 'get_current()', and we have not yet context-switched into the task whose I/O completed. This results in the wrong task having its delay accounting statistics updated. Instead of doing that, pass the task_struct being woken to delayacct_blkio_end(), so that it can update the statistics of the correct task. Signed-off-by: Josh Snyder Acked-by: Tejun Heo Acked-by: Balbir Singh Cc: Cc: Brendan Gregg Cc: Jens Axboe Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-block@vger.kernel.org Fixes: e33a9bba85a8 ("sched/core: move IO scheduling accounting from io_schedule_timeout() into scheduler") Link: http://lkml.kernel.org/r/1513613712-571-1-git-send-email-joshs@netflix.com Signed-off-by: Ingo Molnar --- include/linux/delayacct.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/delayacct.h b/include/linux/delayacct.h index 4178d2493547..5e335b6203f4 100644 --- a/include/linux/delayacct.h +++ b/include/linux/delayacct.h @@ -71,7 +71,7 @@ extern void delayacct_init(void); extern void __delayacct_tsk_init(struct task_struct *); extern void __delayacct_tsk_exit(struct task_struct *); extern void __delayacct_blkio_start(void); -extern void __delayacct_blkio_end(void); +extern void __delayacct_blkio_end(struct task_struct *); extern int __delayacct_add_tsk(struct taskstats *, struct task_struct *); extern __u64 __delayacct_blkio_ticks(struct task_struct *); extern void __delayacct_freepages_start(void); @@ -122,10 +122,10 @@ static inline void delayacct_blkio_start(void) __delayacct_blkio_start(); } -static inline void delayacct_blkio_end(void) +static inline void delayacct_blkio_end(struct task_struct *p) { if (current->delays) - __delayacct_blkio_end(); + __delayacct_blkio_end(p); delayacct_clear_flag(DELAYACCT_PF_BLKIO); } @@ -169,7 +169,7 @@ static inline void delayacct_tsk_free(struct task_struct *tsk) {} static inline void delayacct_blkio_start(void) {} -static inline void delayacct_blkio_end(void) +static inline void delayacct_blkio_end(struct task_struct *p) {} static inline int delayacct_add_tsk(struct taskstats *d, struct task_struct *tsk) -- cgit v1.2.3 From a3d6c976f71902388e444594daa902032b5a45fa Mon Sep 17 00:00:00 2001 From: Matthew Wilcox Date: Thu, 18 Jan 2018 16:34:08 -0800 Subject: sparse doesn't support struct randomization Without this patch, I drown in a sea of unknown attribute warnings Link: http://lkml.kernel.org/r/20180117024539.27354-1-willy@infradead.org Signed-off-by: Matthew Wilcox Acked-by: Kees Cook Cc: Ingo Molnar Cc: Josh Poimboeuf Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 2272ded07496..631354acfa72 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -219,7 +219,7 @@ /* Mark a function definition as prohibited from being cloned. */ #define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) -#ifdef RANDSTRUCT_PLUGIN +#if defined(RANDSTRUCT_PLUGIN) && !defined(__CHECKER__) #define __randomize_layout __attribute__((randomize_layout)) #define __no_randomize_layout __attribute__((no_randomize_layout)) #endif -- cgit v1.2.3 From 0d665e7b109d512b7cae3ccef6e8654714887844 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 19 Jan 2018 15:49:24 +0300 Subject: mm, page_vma_mapped: Drop faulty pointer arithmetics in check_pte() Tetsuo reported random crashes under memory pressure on 32-bit x86 system and tracked down to change that introduced page_vma_mapped_walk(). The root cause of the issue is the faulty pointer math in check_pte(). As ->pte may point to an arbitrary page we have to check that they are belong to the section before doing math. Otherwise it may lead to weird results. It wasn't noticed until now as mem_map[] is virtually contiguous on flatmem or vmemmap sparsemem. Pointer arithmetic just works against all 'struct page' pointers. But with classic sparsemem, it doesn't because each section memap is allocated separately and so consecutive pfns crossing two sections might have struct pages at completely unrelated addresses. Let's restructure code a bit and replace pointer arithmetic with operations on pfns. Signed-off-by: Kirill A. Shutemov Reported-and-tested-by: Tetsuo Handa Acked-by: Michal Hocko Fixes: ace71a19cec5 ("mm: introduce page_vma_mapped_walk()") Cc: stable@vger.kernel.org Signed-off-by: Linus Torvalds --- include/linux/swapops.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/swapops.h b/include/linux/swapops.h index 9c5a2628d6ce..1d3877c39a00 100644 --- a/include/linux/swapops.h +++ b/include/linux/swapops.h @@ -124,6 +124,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_DEVICE_WRITE); } +static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline struct page *device_private_entry_to_page(swp_entry_t entry) { return pfn_to_page(swp_offset(entry)); @@ -154,6 +159,11 @@ static inline bool is_write_device_private_entry(swp_entry_t entry) return false; } +static inline unsigned long device_private_entry_to_pfn(swp_entry_t entry) +{ + return 0; +} + static inline struct page *device_private_entry_to_page(swp_entry_t entry) { return NULL; @@ -189,6 +199,11 @@ static inline int is_write_migration_entry(swp_entry_t entry) return unlikely(swp_type(entry) == SWP_MIGRATION_WRITE); } +static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +{ + return swp_offset(entry); +} + static inline struct page *migration_entry_to_page(swp_entry_t entry) { struct page *p = pfn_to_page(swp_offset(entry)); @@ -218,6 +233,12 @@ static inline int is_migration_entry(swp_entry_t swp) { return 0; } + +static inline unsigned long migration_entry_to_pfn(swp_entry_t entry) +{ + return 0; +} + static inline struct page *migration_entry_to_page(swp_entry_t entry) { return NULL; -- cgit v1.2.3 From 6be7fa3c74d1e0cd50f2157b5c1524f152bf641e Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (VMware)" Date: Mon, 22 Jan 2018 22:32:51 -0500 Subject: ftrace, orc, x86: Handle ftrace dynamically allocated trampolines The function tracer can create a dynamically allocated trampoline that is called by the function mcount or fentry hook that is used to call the function callback that is registered. The problem is that the orc undwinder will bail if it encounters one of these trampolines. This breaks the stack trace of function callbacks, which include the stack tracer and setting the stack trace for individual functions. Since these dynamic trampolines are basically copies of the static ftrace trampolines defined in ftrace_*.S, we do not need to create new orc entries for the dynamic trampolines. Finding the return address on the stack will be identical as the functions that were copied to create the dynamic trampolines. When encountering a ftrace dynamic trampoline, we can just use the orc entry of the ftrace static function that was copied for that trampoline. Signed-off-by: Steven Rostedt (VMware) --- include/linux/ftrace.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 2bab81951ced..3319df9727aa 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -332,6 +332,8 @@ extern int ftrace_text_reserved(const void *start, const void *end); extern int ftrace_nr_registered_ops(void); +struct ftrace_ops *ftrace_ops_trampoline(unsigned long addr); + bool is_ftrace_trampoline(unsigned long addr); /* -- cgit v1.2.3 From ce48c146495a1a50e48cdbfbfaba3e708be7c07c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 22 Jan 2018 22:53:28 +0100 Subject: sched/core: Fix cpu.max vs. cpuhotplug deadlock Tejun reported the following cpu-hotplug lock (percpu-rwsem) read recursion: tg_set_cfs_bandwidth() get_online_cpus() cpus_read_lock() cfs_bandwidth_usage_inc() static_key_slow_inc() cpus_read_lock() Reported-by: Tejun Heo Tested-by: Tejun Heo Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20180122215328.GP3397@worktop Signed-off-by: Ingo Molnar --- include/linux/jump_label.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index c7b368c734af..e0340ca08d98 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -160,6 +160,8 @@ extern void arch_jump_label_transform_static(struct jump_entry *entry, extern int jump_label_text_reserved(void *start, void *end); extern void static_key_slow_inc(struct static_key *key); extern void static_key_slow_dec(struct static_key *key); +extern void static_key_slow_inc_cpuslocked(struct static_key *key); +extern void static_key_slow_dec_cpuslocked(struct static_key *key); extern void jump_label_apply_nops(struct module *mod); extern int static_key_count(struct static_key *key); extern void static_key_enable(struct static_key *key); @@ -222,6 +224,9 @@ static inline void static_key_slow_dec(struct static_key *key) atomic_dec(&key->enabled); } +#define static_key_slow_inc_cpuslocked(key) static_key_slow_inc(key) +#define static_key_slow_dec_cpuslocked(key) static_key_slow_dec(key) + static inline int jump_label_text_reserved(void *start, void *end) { return 0; @@ -416,6 +421,8 @@ extern bool ____wrong_branch_error(void); #define static_branch_inc(x) static_key_slow_inc(&(x)->key) #define static_branch_dec(x) static_key_slow_dec(&(x)->key) +#define static_branch_inc_cpuslocked(x) static_key_slow_inc_cpuslocked(&(x)->key) +#define static_branch_dec_cpuslocked(x) static_key_slow_dec_cpuslocked(&(x)->key) /* * Normal usage; boolean enable/disable. -- cgit v1.2.3 From 5132ede0fe8092b043dae09a7cc32b8ae7272baa Mon Sep 17 00:00:00 2001 From: Greg Kroah-Hartman Date: Wed, 24 Jan 2018 15:28:17 +0100 Subject: Revert "module: Add retpoline tag to VERMAGIC" This reverts commit 6cfb521ac0d5b97470883ff9b7facae264b7ab12. Turns out distros do not want to make retpoline as part of their "ABI", so this patch should not have been merged. Sorry Andi, this was my fault, I suggested it when your original patch was the "correct" way of doing this instead. Reported-by: Jiri Kosina Fixes: 6cfb521ac0d5 ("module: Add retpoline tag to VERMAGIC") Acked-by: Andi Kleen Cc: Thomas Gleixner Cc: David Woodhouse Cc: rusty@rustcorp.com.au Cc: arjan.van.de.ven@intel.com Cc: jeyu@kernel.org Cc: stable Signed-off-by: Greg Kroah-Hartman Signed-off-by: Linus Torvalds --- include/linux/vermagic.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vermagic.h b/include/linux/vermagic.h index 853291714ae0..bae807eb2933 100644 --- a/include/linux/vermagic.h +++ b/include/linux/vermagic.h @@ -31,17 +31,11 @@ #else #define MODULE_RANDSTRUCT_PLUGIN #endif -#ifdef RETPOLINE -#define MODULE_VERMAGIC_RETPOLINE "retpoline " -#else -#define MODULE_VERMAGIC_RETPOLINE "" -#endif #define VERMAGIC_STRING \ UTS_RELEASE " " \ MODULE_VERMAGIC_SMP MODULE_VERMAGIC_PREEMPT \ MODULE_VERMAGIC_MODULE_UNLOAD MODULE_VERMAGIC_MODVERSIONS \ MODULE_ARCH_VERMAGIC \ - MODULE_RANDSTRUCT_PLUGIN \ - MODULE_VERMAGIC_RETPOLINE + MODULE_RANDSTRUCT_PLUGIN -- cgit v1.2.3