From ae1ff3d623905947158fd3394854c23026337810 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Mon, 13 Jul 2015 14:31:28 +0300 Subject: iommu: iova: Move iova cache management to the iova library This is necessary to separate intel-iommu from the iova library. Signed-off-by: Sakari Ailus Signed-off-by: David Woodhouse --- include/linux/iova.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index 3920a19d8194..92f7177db2ce 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -68,8 +68,8 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -int iommu_iova_cache_init(void); -void iommu_iova_cache_destroy(void); +int iova_cache_get(void); +void iova_cache_put(void); struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); -- cgit v1.2.3 From 87db73aebf55554fefaa3eade0a28f282a1511b8 Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 7 Aug 2015 09:36:54 +0100 Subject: efi: Add support for EFI_MEMORY_RO attribute introduced by UEFIv2.5 The UEFI spec v2.5 introduces a new memory attribute EFI_MEMORY_RO, which is now the preferred attribute to convey that the nature of the contents of such a region allows it to be mapped read-only (i.e., it contains .text and .rodata only). The specification of the existing EFI_MEMORY_WP attribute has been updated to align more closely with its common use as a cacheability attribute rather than a permission attribute. Add the #define and add the attribute to the memory map dumping routine. Signed-off-by: Ard Biesheuvel Signed-off-by: Matt Fleming Reviewed-by: Laszlo Ersek Cc: H. Peter Anvin Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1438936621-5215-1-git-send-email-matt@codeblueprint.co.uk Signed-off-by: Ingo Molnar --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 85ef051ac6fb..26ca9e2fd30e 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -99,6 +99,7 @@ typedef struct { #define EFI_MEMORY_XP ((u64)0x0000000000004000ULL) /* execute-protect */ #define EFI_MEMORY_MORE_RELIABLE \ ((u64)0x0000000000010000ULL) /* higher reliability */ +#define EFI_MEMORY_RO ((u64)0x0000000000020000ULL) /* read-only */ #define EFI_MEMORY_RUNTIME ((u64)0x8000000000000000ULL) /* range requires runtime mapping */ #define EFI_MEMORY_DESCRIPTOR_VERSION 1 -- cgit v1.2.3 From a0175b9c76f59c1f5706f986d690e27ba06363dd Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 12 Aug 2015 10:22:41 +0200 Subject: iio: st_sensors: add debugfs register read hook This adds a debugfs hook to read/write registers in the ST sensors using debugfs. Proved to be awesome help when trying to debug why IRQs do not arrive. Signed-off-by: Linus Walleij Acked-by: Denis Ciocca Signed-off-by: Jonathan Cameron --- include/linux/iio/common/st_sensors.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iio/common/st_sensors.h b/include/linux/iio/common/st_sensors.h index 3c17cd7fdf06..2fe939c73cd2 100644 --- a/include/linux/iio/common/st_sensors.h +++ b/include/linux/iio/common/st_sensors.h @@ -271,6 +271,10 @@ void st_sensors_power_enable(struct iio_dev *indio_dev); void st_sensors_power_disable(struct iio_dev *indio_dev); +int st_sensors_debugfs_reg_access(struct iio_dev *indio_dev, + unsigned reg, unsigned writeval, + unsigned *readval); + int st_sensors_set_odr(struct iio_dev *indio_dev, unsigned int odr); int st_sensors_set_dataready_irq(struct iio_dev *indio_dev, bool enable); -- cgit v1.2.3 From 735ad074ffa72ccc4fdba8e54eb024df95545e7d Mon Sep 17 00:00:00 2001 From: Vladimir Barinov Date: Thu, 20 Aug 2015 22:37:39 +0300 Subject: iio: Support triggered events Support triggered events. This is useful for chips that don't have their own interrupt sources. It allows to use generic/standalone iio triggers for those drivers. Signed-off-by: Vladimir Barinov Signed-off-by: Jonathan Cameron --- include/linux/iio/iio.h | 3 +++ include/linux/iio/triggered_event.h | 11 +++++++++++ 2 files changed, 14 insertions(+) create mode 100644 include/linux/iio/triggered_event.h (limited to 'include/linux') diff --git a/include/linux/iio/iio.h b/include/linux/iio/iio.h index 7bb7f673cb3f..19c94c9acc81 100644 --- a/include/linux/iio/iio.h +++ b/include/linux/iio/iio.h @@ -294,6 +294,7 @@ static inline s64 iio_get_time_ns(void) #define INDIO_BUFFER_TRIGGERED 0x02 #define INDIO_BUFFER_SOFTWARE 0x04 #define INDIO_BUFFER_HARDWARE 0x08 +#define INDIO_EVENT_TRIGGERED 0x10 #define INDIO_ALL_BUFFER_MODES \ (INDIO_BUFFER_TRIGGERED | INDIO_BUFFER_HARDWARE | INDIO_BUFFER_SOFTWARE) @@ -457,6 +458,7 @@ struct iio_buffer_setup_ops { * @scan_index_timestamp:[INTERN] cache of the index to the timestamp * @trig: [INTERN] current device trigger (buffer modes) * @pollfunc: [DRIVER] function run on trigger being received + * @pollfunc_event: [DRIVER] function run on events trigger being received * @channels: [DRIVER] channel specification structure table * @num_channels: [DRIVER] number of channels specified in @channels. * @channel_attr_list: [INTERN] keep track of automatically created channel @@ -495,6 +497,7 @@ struct iio_dev { unsigned scan_index_timestamp; struct iio_trigger *trig; struct iio_poll_func *pollfunc; + struct iio_poll_func *pollfunc_event; struct iio_chan_spec const *channels; int num_channels; diff --git a/include/linux/iio/triggered_event.h b/include/linux/iio/triggered_event.h new file mode 100644 index 000000000000..8fe8537085bb --- /dev/null +++ b/include/linux/iio/triggered_event.h @@ -0,0 +1,11 @@ +#ifndef _LINUX_IIO_TRIGGERED_EVENT_H_ +#define _LINUX_IIO_TRIGGERED_EVENT_H_ + +#include + +int iio_triggered_event_setup(struct iio_dev *indio_dev, + irqreturn_t (*h)(int irq, void *p), + irqreturn_t (*thread)(int irq, void *p)); +void iio_triggered_event_cleanup(struct iio_dev *indio_dev); + +#endif -- cgit v1.2.3 From a39f1d5e4303f535840a44eee87326fd9743de7c Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 13 Aug 2015 15:46:04 -0700 Subject: mtd: spi-nor: add forward declaration for mtd_info This header can't actually stand alone, as it relies on the declaration (but not definition) of struct mtd_info. Let's fix that. Signed-off-by: Brian Norris Tested-by: Joachim Eastwood --- include/linux/mtd/spi-nor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index e5409524bb0a..85a24baea093 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -127,6 +127,8 @@ enum spi_nor_option_flags { SNOR_F_USE_FSR = BIT(0), }; +struct mtd_info; + /** * struct spi_nor - Structure for defining a the SPI NOR layer * @mtd: point to a mtd_info structure -- cgit v1.2.3 From 1976367173a47f801c67b5f456922d79c60d0d42 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Thu, 13 Aug 2015 15:46:05 -0700 Subject: mtd: spi-nor: embed struct mtd_info within struct spi_nor This reflects the proper layering, so let's do it. Signed-off-by: Brian Norris Tested-by: Joachim Eastwood --- include/linux/mtd/spi-nor.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 85a24baea093..495433d3f56d 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -162,7 +162,7 @@ struct mtd_info; * @priv: the private data */ struct spi_nor { - struct mtd_info *mtd; + struct mtd_info mtd; struct mutex lock; struct device *dev; u32 page_size; -- cgit v1.2.3 From a9cadf72bfb7185a680eb7599b9bda65d1515b9f Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Ezequiel=20Garc=C3=ADa?= Date: Fri, 21 Aug 2015 15:47:28 -0300 Subject: mtd: pxa3xx_nand: Remove unused platform-data flash specification The driver supports board files specificating the flash device, by passing a pxa3xx_nand_flash struct (with flash parameters) in the platform data struct. Currently this support is not being used by any board file. Moreover, we'd like to deprecate such usage in favor of using the device table in nand_ids.c. So let's remove the ad-hoc flash specification. Signed-off-by: Ezequiel Garcia Acked-by: Robert Jarzmik Signed-off-by: Brian Norris --- include/linux/platform_data/mtd-nand-pxa3xx.h | 27 --------------------------- 1 file changed, 27 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nand-pxa3xx.h b/include/linux/platform_data/mtd-nand-pxa3xx.h index ac4ea2e641c7..394d15597dc7 100644 --- a/include/linux/platform_data/mtd-nand-pxa3xx.h +++ b/include/linux/platform_data/mtd-nand-pxa3xx.h @@ -4,30 +4,6 @@ #include #include -struct pxa3xx_nand_timing { - unsigned int tCH; /* Enable signal hold time */ - unsigned int tCS; /* Enable signal setup time */ - unsigned int tWH; /* ND_nWE high duration */ - unsigned int tWP; /* ND_nWE pulse time */ - unsigned int tRH; /* ND_nRE high duration */ - unsigned int tRP; /* ND_nRE pulse width */ - unsigned int tR; /* ND_nWE high to ND_nRE low for read */ - unsigned int tWHR; /* ND_nWE high to ND_nRE low for status read */ - unsigned int tAR; /* ND_ALE low to ND_nRE low delay */ -}; - -struct pxa3xx_nand_flash { - char *name; - uint32_t chip_id; - unsigned int page_per_block; /* Pages per block (PG_PER_BLK) */ - unsigned int page_size; /* Page size in bytes (PAGE_SZ) */ - unsigned int flash_width; /* Width of Flash memory (DWIDTH_M) */ - unsigned int dfc_width; /* Width of flash controller(DWIDTH_C) */ - unsigned int num_blocks; /* Number of physical blocks in Flash */ - - struct pxa3xx_nand_timing *timing; /* NAND Flash timing */ -}; - /* * Current pxa3xx_nand controller has two chip select which * both be workable. @@ -63,9 +39,6 @@ struct pxa3xx_nand_platform_data { const struct mtd_partition *parts[NUM_CHIP_SELECT]; unsigned int nr_parts[NUM_CHIP_SELECT]; - - const struct pxa3xx_nand_flash * flash; - size_t num_flash; }; extern void pxa3xx_set_nand_info(struct pxa3xx_nand_platform_data *info); -- cgit v1.2.3 From 30035e45753b708e7d47a98398500ca005e02b86 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Wed, 29 Apr 2015 12:52:04 -0400 Subject: string: provide strscpy() The strscpy() API is intended to be used instead of strlcpy(), and instead of most uses of strncpy(). - Unlike strlcpy(), it doesn't read from memory beyond (src + size). - Unlike strlcpy() or strncpy(), the API provides an easy way to check for destination buffer overflow: an -E2BIG error return value. - The provided implementation is robust in the face of the source buffer being asynchronously changed during the copy, unlike the current implementation of strlcpy(). - Unlike strncpy(), the destination buffer will be NUL-terminated if the string in the source buffer is too long. - Also unlike strncpy(), the destination buffer will not be updated beyond the NUL termination, avoiding strncpy's behavior of zeroing the entire tail end of the destination buffer. (A memset() after the strscpy() can be used if this behavior is desired.) - The implementation should be reasonably performant on all platforms since it uses the asm/word-at-a-time.h API rather than simple byte copy. Kernel-to-kernel string copy is not considered to be performance critical in any case. Signed-off-by: Chris Metcalf --- include/linux/string.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/string.h b/include/linux/string.h index a8d90db9c4b0..9ef7795e65e4 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -25,6 +25,9 @@ extern char * strncpy(char *,const char *, __kernel_size_t); #ifndef __HAVE_ARCH_STRLCPY size_t strlcpy(char *, const char *, size_t); #endif +#ifndef __HAVE_ARCH_STRSCPY +ssize_t __must_check strscpy(char *, const char *, size_t); +#endif #ifndef __HAVE_ARCH_STRCAT extern char * strcat(char *, const char *); #endif -- cgit v1.2.3 From 11bff0b70cc003b995cf4c2acec4adab90957b02 Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 3 Sep 2015 18:35:36 +0200 Subject: mtd: spi-nor: Decouple SPI NOR's device_node from controller device The problem this patch is trying to address is such, that SPI NOR flash devices attached to a dedicated SPI NOR controller cannot read their properties from the associated struct device_node. A couple of facts first: 1) Each SPI NOR flash has a struct spi_nor associated with it. 2) Each SPI NOR flash has certain device properties associated with it, for example the OF property 'm25p,fast-read' is a good pick. These properties are used by the SPI NOR core to select which opcodes are sent to such SPI NOR flash. These properties are coming from spi_nor .dev->of_node . The problem is, that for SPI NOR controllers, the struct spi_nor .dev element points to the struct device of the SPI NOR controller, not the SPI NOR flash. Therefore, the associated dev->of_node also is the one of the controller and therefore the SPI NOR core code is trying to parse the SPI NOR controller's properties, not the properties of the SPI NOR flash. Note: The m25p80 driver is not affected, because the controller and the flash are the same device, so the associated device_node of the controller and the flash are the same. This patch adjusts the SPI NOR core such that the device_node is not picked from spi_nor .dev directly, but from a new separate spi_nor .flash_node element. This let's the SPI NOR controller drivers set up a different spi_nor .flash_node element for each SPI NOR flash. This patch also fixes the controller drivers to be compatible with this modification and correctly set the spi_nor .flash_node element. This patch is inspired by 5844feeaa4154d1c46d3462c7a4653d22356d8b4 mtd: nand: add common DT init code Signed-off-by: Marek Vasut Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 495433d3f56d..6379e107f2e5 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -134,6 +134,7 @@ struct mtd_info; * @mtd: point to a mtd_info structure * @lock: the lock for the read/write/erase/lock/unlock operations * @dev: point to a spi device, or a spi nor controller device. + * @flash_node: point to a device node describing this flash instance. * @page_size: the page size of the SPI NOR * @addr_width: number of address bytes * @erase_opcode: the opcode for erasing a sector @@ -165,6 +166,7 @@ struct spi_nor { struct mtd_info mtd; struct mutex lock; struct device *dev; + struct device_node *flash_node; u32 page_size; u8 addr_width; u8 erase_opcode; -- cgit v1.2.3 From 61528d888adf9470db73aaadf1ff9ca35942262a Mon Sep 17 00:00:00 2001 From: Marek Vasut Date: Thu, 3 Sep 2015 18:35:37 +0200 Subject: mtd: nand: Rename nand_chip .dn to .flash_node Use a more descriptive name for the device_node element in struct nand_chip . This name matches the element name used for device_node property of a flash in the spi-nor framework. Signed-off-by: Marek Vasut Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 272f42952f34..3140b3d94838 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -544,7 +544,7 @@ struct nand_buffers { * flash device * @IO_ADDR_W: [BOARDSPECIFIC] address to write the 8 I/O lines of the * flash device. - * @dn: [BOARDSPECIFIC] device node describing this instance + * @flash_node: [BOARDSPECIFIC] device node describing this instance * @read_byte: [REPLACEABLE] read one byte from the chip * @read_word: [REPLACEABLE] read one word from the chip * @write_byte: [REPLACEABLE] write a single byte to the chip on the @@ -647,7 +647,7 @@ struct nand_chip { void __iomem *IO_ADDR_R; void __iomem *IO_ADDR_W; - struct device_node *dn; + struct device_node *flash_node; uint8_t (*read_byte)(struct mtd_info *mtd); u16 (*read_word)(struct mtd_info *mtd); -- cgit v1.2.3 From f9f3ce835ddce3c669eee869253105f88819888b Mon Sep 17 00:00:00 2001 From: Jagan Teki Date: Wed, 19 Aug 2015 15:26:44 +0530 Subject: mtd: spi-nor: Zap unneeded write_enable from write_reg The 'write_enable' argument is unused and unneeded, so remove it from the API. Signed-off-by: Jagan Teki Cc: David Woodhouse Cc: Han Xu [Brian: fixed for nxp-spifi.c] Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 6379e107f2e5..e9c912d73141 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -186,8 +186,7 @@ struct spi_nor { int (*write_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg, u8 *buf, size_t len); int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len); - int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len, - int write_enable); + int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len); int (*read)(struct spi_nor *nor, loff_t from, size_t len, size_t *retlen, u_char *read_buf); -- cgit v1.2.3 From e0f5f3afd2cffa96291cd852056d83ff4e2e99c7 Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Fri, 14 Aug 2015 17:23:09 +0100 Subject: sched/fair: Make load tracking frequency scale-invariant Apply frequency scaling correction factor to per-entity load tracking to make it frequency invariant. Currently, load appears bigger when the CPU is running slower which affects load-balancing decisions. Each segment of the sched_avg.load_sum geometric series is now scaled by the current frequency so that the sched_avg.load_avg of each sched entity will be invariant from frequency scaling. Moreover, cfs_rq.runnable_load_sum is scaled by the current frequency as well. Signed-off-by: Dietmar Eggemann Signed-off-by: Morten Rasmussen Signed-off-by: Peter Zijlstra (Intel) Acked-by: Vincent Guittot Cc: Dietmar Eggemann Cc: Juri Lelli Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: daniel.lezcano@linaro.org Cc: mturquette@baylibre.com Cc: pang.xunlei@zte.com.cn Cc: rjw@rjwysocki.net Cc: sgurrappadi@nvidia.com Cc: yuyang.du@intel.com Link: http://lkml.kernel.org/r/1439569394-11974-2-git-send-email-morten.rasmussen@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a4ab9daa387c..c8d923ba429d 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1177,9 +1177,9 @@ struct load_weight { /* * The load_avg/util_avg accumulates an infinite geometric series. - * 1) load_avg factors the amount of time that a sched_entity is - * runnable on a rq into its weight. For cfs_rq, it is the aggregated - * such weights of all runnable and blocked sched_entities. + * 1) load_avg factors frequency scaling into the amount of time that a + * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the + * aggregated such weights of all runnable and blocked sched_entities. * 2) util_avg factors frequency scaling into the amount of time * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE]. * For cfs_rq, it is the aggregated such times of all runnable and -- cgit v1.2.3 From e3279a2e6d697e00e74f905851ee7cf532f72b2d Mon Sep 17 00:00:00 2001 From: Dietmar Eggemann Date: Sat, 15 Aug 2015 00:04:41 +0100 Subject: sched/fair: Make utilization tracking CPU scale-invariant Besides the existing frequency scale-invariance correction factor, apply CPU scale-invariance correction factor to utilization tracking to compensate for any differences in compute capacity. This could be due to micro-architectural differences (i.e. instructions per seconds) between cpus in HMP systems (e.g. big.LITTLE), and/or differences in the current maximum frequency supported by individual cpus in SMP systems. In the existing implementation utilization isn't comparable between cpus as it is relative to the capacity of each individual CPU. Each segment of the sched_avg.util_sum geometric series is now scaled by the CPU performance factor too so the sched_avg.util_avg of each sched entity will be invariant from the particular CPU of the HMP/SMP system on which the sched entity is scheduled. With this patch, the utilization of a CPU stays relative to the max CPU performance of the fastest CPU in the system. In contrast to utilization (sched_avg.util_sum), load (sched_avg.load_sum) should not be scaled by compute capacity. The utilization metric is based on running time which only makes sense when cpus are _not_ fully utilized (utilization cannot go beyond 100% even if more tasks are added), where load is runnable time which isn't limited by the capacity of the CPU and therefore is a better metric for overloaded scenarios. If we run two nice-0 busy loops on two cpus with different compute capacity their load should be similar since their compute demands are the same. We have to assume that the compute demand of any task running on a fully utilized CPU (no spare cycles = 100% utilization) is high and the same no matter of the compute capacity of its current CPU, hence we shouldn't scale load by CPU capacity. Signed-off-by: Dietmar Eggemann Signed-off-by: Morten Rasmussen Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/55CE7409.1000700@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c8d923ba429d..bd38b3ee9e83 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1180,7 +1180,7 @@ struct load_weight { * 1) load_avg factors frequency scaling into the amount of time that a * sched_entity is runnable on a rq into its weight. For cfs_rq, it is the * aggregated such weights of all runnable and blocked sched_entities. - * 2) util_avg factors frequency scaling into the amount of time + * 2) util_avg factors frequency and cpu scaling into the amount of time * that a sched_entity is running on a CPU, in the range [0..SCHED_LOAD_SCALE]. * For cfs_rq, it is the aggregated such times of all runnable and * blocked sched_entities. -- cgit v1.2.3 From b0e878759452314676fbdd71df4ac67e7d08de5d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 28 Aug 2015 14:06:07 +0200 Subject: perf/abi: Document some more aspects of the perf ABI Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 105 +++++++++++++++++++++++++++++++++++++++------ 1 file changed, 92 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index 092a0e8a479a..d61ee4459eee 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -140,27 +140,60 @@ struct hw_perf_event { }; #endif }; + /* + * If the event is a per task event, this will point to the task in + * question. See the comment in perf_event_alloc(). + */ struct task_struct *target; + +/* + * hw_perf_event::state flags; used to track the PERF_EF_* state. + */ +#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ +#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ +#define PERF_HES_ARCH 0x04 + int state; + + /* + * The last observed hardware counter value, updated with a + * local64_cmpxchg() such that pmu::read() can be called nested. + */ local64_t prev_count; + + /* + * The period to start the next sample with. + */ u64 sample_period; + + /* + * The period we started this sample with. + */ u64 last_period; + + /* + * However much is left of the current period; note that this is + * a full 64bit value and allows for generation of periods longer + * than hardware might allow. + */ local64_t period_left; + + /* + * State for throttling the event, see __perf_event_overflow() and + * perf_adjust_freq_unthr_context(). + */ u64 interrupts_seq; u64 interrupts; + /* + * State for freq target events, see __perf_event_overflow() and + * perf_adjust_freq_unthr_context(). + */ u64 freq_time_stamp; u64 freq_count_stamp; #endif }; -/* - * hw_perf_event::state flags - */ -#define PERF_HES_STOPPED 0x01 /* the counter is stopped */ -#define PERF_HES_UPTODATE 0x02 /* event->count up-to-date */ -#define PERF_HES_ARCH 0x04 - struct perf_event; /* @@ -210,7 +243,19 @@ struct pmu { /* * Try and initialize the event for this PMU. - * Should return -ENOENT when the @event doesn't match this PMU. + * + * Returns: + * -ENOENT -- @event is not for this PMU + * + * -ENODEV -- @event is for this PMU but PMU not present + * -EBUSY -- @event is for this PMU but PMU temporarily unavailable + * -EINVAL -- @event is for this PMU but @event is not valid + * -EOPNOTSUPP -- @event is for this PMU, @event is valid, but not supported + * -EACCESS -- @event is for this PMU, @event is valid, but no privilidges + * + * 0 -- @event is for this PMU and valid + * + * Other error return values are allowed. */ int (*event_init) (struct perf_event *event); @@ -221,27 +266,61 @@ struct pmu { void (*event_mapped) (struct perf_event *event); /*optional*/ void (*event_unmapped) (struct perf_event *event); /*optional*/ + /* + * Flags for ->add()/->del()/ ->start()/->stop(). There are + * matching hw_perf_event::state flags. + */ #define PERF_EF_START 0x01 /* start the counter when adding */ #define PERF_EF_RELOAD 0x02 /* reload the counter when starting */ #define PERF_EF_UPDATE 0x04 /* update the counter when stopping */ /* - * Adds/Removes a counter to/from the PMU, can be done inside - * a transaction, see the ->*_txn() methods. + * Adds/Removes a counter to/from the PMU, can be done inside a + * transaction, see the ->*_txn() methods. + * + * The add/del callbacks will reserve all hardware resources required + * to service the event, this includes any counter constraint + * scheduling etc. + * + * Called with IRQs disabled and the PMU disabled on the CPU the event + * is on. + * + * ->add() called without PERF_EF_START should result in the same state + * as ->add() followed by ->stop(). + * + * ->del() must always PERF_EF_UPDATE stop an event. If it calls + * ->stop() that must deal with already being stopped without + * PERF_EF_UPDATE. */ int (*add) (struct perf_event *event, int flags); void (*del) (struct perf_event *event, int flags); /* - * Starts/Stops a counter present on the PMU. The PMI handler - * should stop the counter when perf_event_overflow() returns - * !0. ->start() will be used to continue. + * Starts/Stops a counter present on the PMU. + * + * The PMI handler should stop the counter when perf_event_overflow() + * returns !0. ->start() will be used to continue. + * + * Also used to change the sample period. + * + * Called with IRQs disabled and the PMU disabled on the CPU the event + * is on -- will be called from NMI context with the PMU generates + * NMIs. + * + * ->stop() with PERF_EF_UPDATE will read the counter and update + * period/count values like ->read() would. + * + * ->start() with PERF_EF_RELOAD will reprogram the the counter + * value, must be preceded by a ->stop() with PERF_EF_UPDATE. */ void (*start) (struct perf_event *event, int flags); void (*stop) (struct perf_event *event, int flags); /* * Updates the counter value of the event. + * + * For sampling capable PMUs this will also update the software period + * hw_perf_event::period_left field. */ void (*read) (struct perf_event *event); -- cgit v1.2.3 From fbbe07011581990ef74dfac06dc8511b1a14badb Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:45 -0700 Subject: perf/core: Add a 'flags' parameter to the PMU transactional interfaces Currently, the PMU interface allows reading only one counter at a time. But some PMUs like the 24x7 counters in Power, support reading several counters at once. To leveage this functionality, extend the transaction interface to support a "transaction type". The first type, PERF_PMU_TXN_ADD, refers to the existing transactions, i.e. used to _schedule_ all the events on the PMU as a group. A second transaction type, PERF_PMU_TXN_READ, will be used in a follow-on patch, by the 24x7 counters to read several counters at once. Extend the transaction interfaces to the PMU to accept a 'txn_flags' parameter and use this parameter to ignore any transactions that are not of type PERF_PMU_TXN_ADD. Thanks to Peter Zijlstra for his input. Signed-off-by: Sukadev Bhattiprolu [peterz: s390 compile fix] Signed-off-by: Peter Zijlstra (Intel) Acked-by: Michael Ellerman Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-3-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index d61ee4459eee..ea3b5dd21a4c 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -201,6 +201,8 @@ struct perf_event; */ #define PERF_EVENT_TXN 0x1 +#define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ + /** * pmu::capabilities flags */ @@ -331,20 +333,26 @@ struct pmu { * * Start the transaction, after this ->add() doesn't need to * do schedulability tests. + * + * Optional. */ - void (*start_txn) (struct pmu *pmu); /* optional */ + void (*start_txn) (struct pmu *pmu, unsigned int txn_flags); /* * If ->start_txn() disabled the ->add() schedulability test * then ->commit_txn() is required to perform one. On success * the transaction is closed. On error the transaction is kept * open until ->cancel_txn() is called. + * + * Optional. */ - int (*commit_txn) (struct pmu *pmu); /* optional */ + int (*commit_txn) (struct pmu *pmu); /* * Will cancel the transaction, assumes ->del() is called * for each successful ->add() during the transaction. + * + * Optional. */ - void (*cancel_txn) (struct pmu *pmu); /* optional */ + void (*cancel_txn) (struct pmu *pmu); /* * Will return the value for perf_event_mmap_page::index for this event, -- cgit v1.2.3 From 4a00c16e552ea5e71756cd29cd2df7557ec9cac4 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:51 -0700 Subject: perf/core: Define PERF_PMU_TXN_READ interface Define a new PERF_PMU_TXN_READ interface to read a group of counters at once. pmu->start_txn() // Initialize before first event for each event in group pmu->read(event); // Queue each event to be read rc = pmu->commit_txn() // Read/update all queued counters Note that we use this interface with all PMUs. PMUs that implement this interface use the ->read() operation to _queue_ the counters to be read and use ->commit_txn() to actually read all the queued counters at once. PMUs that don't implement PERF_PMU_TXN_READ ignore ->start_txn() and ->commit_txn() and continue to read counters one at a time. Thanks to input from Peter Zijlstra. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-9-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index ea3b5dd21a4c..b83cea932f74 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -202,6 +202,7 @@ struct perf_event; #define PERF_EVENT_TXN 0x1 #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ +#define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ /** * pmu::capabilities flags -- cgit v1.2.3 From 8f3e5684d3fbd91ead283916676fa3dac22615e5 Mon Sep 17 00:00:00 2001 From: Sukadev Bhattiprolu Date: Thu, 3 Sep 2015 20:07:53 -0700 Subject: perf/core: Drop PERF_EVENT_TXN We currently use PERF_EVENT_TXN flag to determine if we are in the middle of a transaction. If in a transaction, we defer the schedulability checks from pmu->add() operation to the pmu->commit() operation. Now that we have "transaction types" (PERF_PMU_TXN_ADD, PERF_PMU_TXN_READ) we can use the type to determine if we are in a transaction and drop the PERF_EVENT_TXN flag. When PERF_EVENT_TXN is dropped, the cpuhw->group_flag on some architectures becomes unused, so drop that field as well. This is an extension of the Powerpc patch from Peter Zijlstra to s390, Sparc and x86 architectures. Signed-off-by: Sukadev Bhattiprolu Signed-off-by: Peter Zijlstra (Intel) Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Michael Ellerman Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1441336073-22750-11-git-send-email-sukadev@linux.vnet.ibm.com Signed-off-by: Ingo Molnar --- include/linux/perf_event.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index b83cea932f74..d841d33bcdc9 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -199,8 +199,6 @@ struct perf_event; /* * Common implementation detail of pmu::{start,commit,cancel}_txn */ -#define PERF_EVENT_TXN 0x1 - #define PERF_PMU_TXN_ADD 0x1 /* txn to add/schedule event on PMU */ #define PERF_PMU_TXN_READ 0x2 /* txn to read event group from PMU */ -- cgit v1.2.3 From 445b0eb058f5f31c844a731cb82e7441d0d9e578 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 27 Aug 2015 04:36:14 +0200 Subject: ACPI / property: Add support for data-only subnodes In some cases, the information expressed via device properties is hierarchical by nature. For example, the properties of a composite device consisting of multiple semi-dependent components may need to be represented in the form of a tree of property data sets corresponding to specific components of the device. Unfortunately, using ACPI device objects for this purpose turns out to be problematic, mostly due to the assumption made by some operating systems (that platform firmware generally needs to work with) that each device object in the ACPI namespace represents a device requiring a separate driver. That assumption leads to complications which reportedly are impractically difficult to overcome and a different approach is needed for the sake of interoperability. The approach implemented here is based on extending _DSD via pointers (links) to additional ACPI objects returning data packages formatted in accordance with the _DSD formatting rules defined by Section 6.2.5 of ACPI 6. Those additional objects are referred to as data-only subnodes of the device object containing the _DSD pointing to them. The links to them need to be located in a separate section of the _DSD data package following UUID dbb8e3e6-5886-4ba6-8795-1319f52a966b referred to as the Hierarchical Data Extension UUID as defined in [1]. Each of them is represented by a package of two strings. The first string in that package (the key) is regarded as the name of the data-only subnode pointed to by the link. The second string in it (the target) is expected to hold the ACPI namespace path (possibly utilizing the usual ACPI namespace search rules) of an ACPI object evaluating to a data package extending the _DSD. The device properties initialization code follows those links, creates a struct acpi_data_node object for each of them to store the data returned by the ACPI object pointed to by it and processes those data recursively (which may lead to the creation of more struct acpi_data_node objects if the returned data package contains the Hierarchical Data Extension UUID section with more links in it). All of the struct acpi_data_node objects are present until the the ACPI device object containing the _DSD with links to them is deleted and they are deleted along with that object. [1]: http://www.uefi.org/sites/default/files/resources/_DSD-hierarchical-data-extension-UUID-v1.pdf Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg --- include/linux/fwnode.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 0408545bce42..b08d6ba5c1e6 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -16,6 +16,7 @@ enum fwnode_type { FWNODE_INVALID = 0, FWNODE_OF, FWNODE_ACPI, + FWNODE_ACPI_DATA, FWNODE_PDATA, }; -- cgit v1.2.3 From 3a7a2ab839ad18c2d542b40f4a647c98d068e55a Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 27 Aug 2015 04:40:05 +0200 Subject: ACPI / property: Extend fwnode_property_* to data-only subnodes Modify is_acpi_node() to return "true" for ACPI data-only subnodes as well as for ACPI device objects and change the name of to_acpi_node() to to_acpi_device_node() so it is clear that it covers ACPI device objects only. Accordingly, introduce to_acpi_data_node() to cover data-only subnodes in an analogous way. With that, make the fwnode_property_* family of functions work with ACPI data-only subnodes introduced previously. Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg --- include/linux/acpi.h | 53 +++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 38 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7235c4851460..6be94ba4e980 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -49,7 +49,7 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) return adev ? adev->handle : NULL; } -#define ACPI_COMPANION(dev) to_acpi_node((dev)->fwnode) +#define ACPI_COMPANION(dev) to_acpi_device_node((dev)->fwnode) #define ACPI_COMPANION_SET(dev, adev) set_primary_fwnode(dev, (adev) ? \ acpi_fwnode_handle(adev) : NULL) #define ACPI_HANDLE(dev) acpi_device_handle(ACPI_COMPANION(dev)) @@ -69,7 +69,7 @@ static inline acpi_handle acpi_device_handle(struct acpi_device *adev) static inline bool has_acpi_companion(struct device *dev) { - return is_acpi_node(dev->fwnode); + return is_acpi_device_node(dev->fwnode); } static inline void acpi_preset_companion(struct device *dev, @@ -461,7 +461,22 @@ static inline bool is_acpi_node(struct fwnode_handle *fwnode) return false; } -static inline struct acpi_device *to_acpi_node(struct fwnode_handle *fwnode) +static inline bool is_acpi_device_node(struct fwnode_handle *fwnode) +{ + return false; +} + +static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode) +{ + return NULL; +} + +static inline bool is_acpi_data_node(struct fwnode_handle *fwnode) +{ + return false; +} + +static inline struct acpi_data_node *to_acpi_data_node(struct fwnode_handle *fwnode) { return NULL; } @@ -743,17 +758,16 @@ struct acpi_reference_args { #ifdef CONFIG_ACPI int acpi_dev_get_property(struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj); -int acpi_dev_get_property_array(struct acpi_device *adev, const char *name, - acpi_object_type type, - const union acpi_object **obj); int acpi_dev_get_property_reference(struct acpi_device *adev, const char *name, size_t index, struct acpi_reference_args *args); -int acpi_dev_prop_get(struct acpi_device *adev, const char *propname, - void **valptr); +int acpi_node_prop_get(struct fwnode_handle *fwnode, const char *propname, + void **valptr); int acpi_dev_prop_read_single(struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val); +int acpi_node_prop_read(struct fwnode_handle *fwnode, const char *propname, + enum dev_prop_type proptype, void *val, size_t nval); int acpi_dev_prop_read(struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval); @@ -766,13 +780,7 @@ static inline int acpi_dev_get_property(struct acpi_device *adev, { return -ENXIO; } -static inline int acpi_dev_get_property_array(struct acpi_device *adev, - const char *name, - acpi_object_type type, - const union acpi_object **obj) -{ - return -ENXIO; -} + static inline int acpi_dev_get_property_reference(struct acpi_device *adev, const char *name, const char *cells_name, size_t index, struct acpi_reference_args *args) @@ -780,6 +788,13 @@ static inline int acpi_dev_get_property_reference(struct acpi_device *adev, return -ENXIO; } +static inline int acpi_node_prop_get(struct fwnode_handle *fwnode, + const char *propname, + void **valptr) +{ + return -ENXIO; +} + static inline int acpi_dev_prop_get(struct acpi_device *adev, const char *propname, void **valptr) @@ -795,6 +810,14 @@ static inline int acpi_dev_prop_read_single(struct acpi_device *adev, return -ENXIO; } +static inline int acpi_node_prop_read(struct fwnode_handle *fwnode, + const char *propname, + enum dev_prop_type proptype, + void *val, size_t nval) +{ + return -ENXIO; +} + static inline int acpi_dev_prop_read(struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, -- cgit v1.2.3 From 504a33749971c36c54ba5ccb1364872dee1f17a7 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Thu, 27 Aug 2015 04:42:33 +0200 Subject: ACPI / property: Extend device_get_next_child_node() to data-only nodes Make device_get_next_child_node() work with ACPI data-only subnodes introduced previously. Namely, replace acpi_get_next_child() with acpi_get_next_subnode() that can handle (and return) child device objects as well as child data-only subnodes of the given device and modify the ACPI part of the GPIO subsystem to handle data-only subnodes returned by it. To that end, introduce acpi_node_get_gpiod() taking a struct fwnode_handle pointer as the first argument. That argument may point to an ACPI device object as well as to a data-only subnode and the function should do the right thing (ie. look for the matching GPIO descriptor correctly) in either case. Next, modify fwnode_get_named_gpiod() to use acpi_node_get_gpiod() instead of acpi_get_gpiod_by_index() which automatically causes devm_get_gpiod_from_child() to work with ACPI data-only subnodes that may be returned by device_get_next_child_node() which in turn is required by the users of that function (the gpio_keys_polled and gpio-leds drivers). Signed-off-by: Rafael J. Wysocki Tested-by: Mika Westerberg Acked-by: Linus Walleij --- include/linux/acpi.h | 17 ++++++++--------- 1 file changed, 8 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 6be94ba4e980..865d948c60e6 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -758,9 +758,9 @@ struct acpi_reference_args { #ifdef CONFIG_ACPI int acpi_dev_get_property(struct acpi_device *adev, const char *name, acpi_object_type type, const union acpi_object **obj); -int acpi_dev_get_property_reference(struct acpi_device *adev, - const char *name, size_t index, - struct acpi_reference_args *args); +int acpi_node_get_property_reference(struct fwnode_handle *fwnode, + const char *name, size_t index, + struct acpi_reference_args *args); int acpi_node_prop_get(struct fwnode_handle *fwnode, const char *propname, void **valptr); @@ -771,8 +771,8 @@ int acpi_node_prop_read(struct fwnode_handle *fwnode, const char *propname, int acpi_dev_prop_read(struct acpi_device *adev, const char *propname, enum dev_prop_type proptype, void *val, size_t nval); -struct acpi_device *acpi_get_next_child(struct device *dev, - struct acpi_device *child); +struct fwnode_handle *acpi_get_next_subnode(struct device *dev, + struct fwnode_handle *subnode); #else static inline int acpi_dev_get_property(struct acpi_device *adev, const char *name, acpi_object_type type, @@ -781,7 +781,7 @@ static inline int acpi_dev_get_property(struct acpi_device *adev, return -ENXIO; } -static inline int acpi_dev_get_property_reference(struct acpi_device *adev, +static inline int acpi_node_get_property_reference(struct fwnode_handle *fwnode, const char *name, const char *cells_name, size_t index, struct acpi_reference_args *args) { @@ -826,12 +826,11 @@ static inline int acpi_dev_prop_read(struct acpi_device *adev, return -ENXIO; } -static inline struct acpi_device *acpi_get_next_child(struct device *dev, - struct acpi_device *child) +static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev, + struct fwnode_handle *subnode) { return NULL; } - #endif #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From f0489a5ef4d011e29f78021ad13a543e8769d619 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 4 Sep 2015 13:47:23 +0530 Subject: PM / OPP: Rename opp init/free table routines free-table routines are opposite of init-table ones, and must be named to make that clear. Opposite of 'init' is 'exit', but those doesn't suit really well. Replace 'init' with 'add' and 'free' with 'remove'. Reported-by: Pavel Machek Reviewed-by: Stephen Boyd Signed-off-by: Viresh Kumar Acked-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index e817722ee3f0..20adcb4ce443 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -132,28 +132,28 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier( #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) -int of_init_opp_table(struct device *dev); -void of_free_opp_table(struct device *dev); -int of_cpumask_init_opp_table(cpumask_var_t cpumask); -void of_cpumask_free_opp_table(cpumask_var_t cpumask); +int of_add_opp_table(struct device *dev); +void of_remove_opp_table(struct device *dev); +int of_cpumask_add_opp_table(cpumask_var_t cpumask); +void of_cpumask_remove_opp_table(cpumask_var_t cpumask); int of_get_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask); int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask); #else -static inline int of_init_opp_table(struct device *dev) +static inline int of_add_opp_table(struct device *dev) { return -EINVAL; } -static inline void of_free_opp_table(struct device *dev) +static inline void of_remove_opp_table(struct device *dev) { } -static inline int of_cpumask_init_opp_table(cpumask_var_t cpumask) +static inline int of_cpumask_add_opp_table(cpumask_var_t cpumask) { return -ENOSYS; } -static inline void of_cpumask_free_opp_table(cpumask_var_t cpumask) +static inline void of_cpumask_remove_opp_table(cpumask_var_t cpumask) { } -- cgit v1.2.3 From 8f8d37b2537a28b5b2e3cb60dfc85a2a1303f99b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 4 Sep 2015 13:47:24 +0530 Subject: PM / OPP: Prefix exported opp routines with dev_pm_opp_ That's the naming convention followed in most of opp core, but few routines didn't follow this, fix them. Reviewed-by: Stephen Boyd Signed-off-by: Viresh Kumar Acked-by: Shawn Guo Signed-off-by: Rafael J. Wysocki --- include/linux/pm_opp.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h index 20adcb4ce443..9a2e50337af9 100644 --- a/include/linux/pm_opp.h +++ b/include/linux/pm_opp.h @@ -132,37 +132,37 @@ static inline struct srcu_notifier_head *dev_pm_opp_get_notifier( #endif /* CONFIG_PM_OPP */ #if defined(CONFIG_PM_OPP) && defined(CONFIG_OF) -int of_add_opp_table(struct device *dev); -void of_remove_opp_table(struct device *dev); -int of_cpumask_add_opp_table(cpumask_var_t cpumask); -void of_cpumask_remove_opp_table(cpumask_var_t cpumask); -int of_get_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask); -int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask); +int dev_pm_opp_of_add_table(struct device *dev); +void dev_pm_opp_of_remove_table(struct device *dev); +int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask); +void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask); +int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); +int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask); #else -static inline int of_add_opp_table(struct device *dev) +static inline int dev_pm_opp_of_add_table(struct device *dev) { return -EINVAL; } -static inline void of_remove_opp_table(struct device *dev) +static inline void dev_pm_opp_of_remove_table(struct device *dev) { } -static inline int of_cpumask_add_opp_table(cpumask_var_t cpumask) +static inline int dev_pm_opp_of_cpumask_add_table(cpumask_var_t cpumask) { return -ENOSYS; } -static inline void of_cpumask_remove_opp_table(cpumask_var_t cpumask) +static inline void dev_pm_opp_of_cpumask_remove_table(cpumask_var_t cpumask) { } -static inline int of_get_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask) +static inline int dev_pm_opp_of_get_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) { return -ENOSYS; } -static inline int set_cpus_sharing_opps(struct device *cpu_dev, cpumask_var_t cpumask) +static inline int dev_pm_opp_set_sharing_cpus(struct device *cpu_dev, cpumask_var_t cpumask) { return -ENOSYS; } -- cgit v1.2.3 From c40a2c8817e42273a4627c48c884b805475a733f Mon Sep 17 00:00:00 2001 From: Oliver Neukum Date: Mon, 7 Sep 2015 16:05:38 +0200 Subject: CDC: common parser for extra headers CDC drivers all implement their own parser for the extra headers. This patch fixes the code duplication introducing a single common parser in usbnet. Signed-off-by: Oliver Neukum Signed-off-by: David S. Miller --- include/linux/usb/cdc.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/linux/usb/cdc.h (limited to 'include/linux') diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h new file mode 100644 index 000000000000..cd8f2e1c3bdf --- /dev/null +++ b/include/linux/usb/cdc.h @@ -0,0 +1,47 @@ +/* + * USB CDC common helpers + * + * Copyright (c) 2015 Oliver Neukum + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License + * version 2 as published by the Free Software Foundation. + */ + +#include + +/* + * inofficial magic numbers + */ + +#define CDC_PHONET_MAGIC_NUMBER 0xAB + +/* + * parsing CDC headers + */ + +struct usb_cdc_parsed_header { + struct usb_cdc_union_desc *usb_cdc_union_desc; + struct usb_cdc_header_desc *usb_cdc_header_desc; + + struct usb_cdc_call_mgmt_descriptor *usb_cdc_call_mgmt_descriptor; + struct usb_cdc_acm_descriptor *usb_cdc_acm_descriptor; + struct usb_cdc_country_functional_desc *usb_cdc_country_functional_desc; + struct usb_cdc_network_terminal_desc *usb_cdc_network_terminal_desc; + struct usb_cdc_ether_desc *usb_cdc_ether_desc; + struct usb_cdc_dmm_desc *usb_cdc_dmm_desc; + struct usb_cdc_mdlm_desc *usb_cdc_mdlm_desc; + struct usb_cdc_mdlm_detail_desc *usb_cdc_mdlm_detail_desc; + struct usb_cdc_obex_desc *usb_cdc_obex_desc; + struct usb_cdc_ncm_desc *usb_cdc_ncm_desc; + struct usb_cdc_mbim_desc *usb_cdc_mbim_desc; + struct usb_cdc_mbim_extended_desc *usb_cdc_mbim_extended_desc; + + bool phonet_magic_present; +}; + + +int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr, + struct usb_interface *intf, + u8 *buffer, + int buflen); -- cgit v1.2.3 From ad1e7b97b3adb91d46f3adb70a7867a50fc274cf Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 15 Sep 2015 13:25:03 -0700 Subject: cdc: Fix build warning. In file included from drivers/usb/gadget/function/u_serial.h:16:0, from drivers/usb/gadget/function/f_acm.c:23: >> include/linux/usb/cdc.h:47:5: warning: 'struct usb_interface' declared inside parameter list int buflen); ^ >> include/linux/usb/cdc.h:47:5: warning: its scope is only this definition or declaration, which is probably not what you want Reported-by: kbuild test robot Signed-off-by: David S. Miller --- include/linux/usb/cdc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index cd8f2e1c3bdf..959d0c838113 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -40,7 +40,7 @@ struct usb_cdc_parsed_header { bool phonet_magic_present; }; - +struct usb_interface; int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr, struct usb_interface *intf, u8 *buffer, -- cgit v1.2.3 From b84ee0d7f375ed7840c7c110d46eac24cf94b2a2 Mon Sep 17 00:00:00 2001 From: Stephen Rothwell Date: Wed, 16 Sep 2015 11:10:16 +1000 Subject: cdc: add header guards Signed-off-by: Stephen Rothwell Signed-off-by: David S. Miller --- include/linux/usb/cdc.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/cdc.h b/include/linux/usb/cdc.h index 959d0c838113..b5706f94ee9e 100644 --- a/include/linux/usb/cdc.h +++ b/include/linux/usb/cdc.h @@ -7,6 +7,8 @@ * modify it under the terms of the GNU General Public License * version 2 as published by the Free Software Foundation. */ +#ifndef __LINUX_USB_CDC_H +#define __LINUX_USB_CDC_H #include @@ -45,3 +47,5 @@ int cdc_parse_cdc_header(struct usb_cdc_parsed_header *hdr, struct usb_interface *intf, u8 *buffer, int buflen); + +#endif /* __LINUX_USB_CDC_H */ -- cgit v1.2.3 From a6f5f0dd4e21191ce35030dd4d6421e1cca10ee4 Mon Sep 17 00:00:00 2001 From: Alexandra Yates Date: Tue, 15 Sep 2015 10:32:46 -0700 Subject: PM / sleep: Report interrupt that caused system wakeup Add a sysfs attribute, /sys/power/pm_wakeup_irq, reporting the IRQ number of the first wakeup interrupt (that is, the first interrupt from an IRQ line armed for system wakeup) seen by the kernel during the most recent system suspend/resume cycle. This feature will be useful for system wakeup diagnostics of spurious wakeup interrupts. Signed-off-by: Alexandra Yates [ rjw: Fixed up pm_wakeup_irq definition ] Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 5efe743ce1e8..33aaf9a9596c 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -387,10 +387,12 @@ extern int unregister_pm_notifier(struct notifier_block *nb); /* drivers/base/power/wakeup.c */ extern bool events_check_enabled; +extern unsigned int pm_wakeup_irq; extern bool pm_wakeup_pending(void); extern void pm_system_wakeup(void); extern void pm_wakeup_clear(void); +extern void pm_system_irq_wakeup(unsigned int irq_number); extern bool pm_get_wakeup_count(unsigned int *count, bool block); extern bool pm_save_wakeup_count(unsigned int count); extern void pm_wakep_autosleep_enabled(bool set); @@ -440,6 +442,7 @@ static inline int unregister_pm_notifier(struct notifier_block *nb) static inline bool pm_wakeup_pending(void) { return false; } static inline void pm_system_wakeup(void) {} static inline void pm_wakeup_clear(void) {} +static inline void pm_system_irq_wakeup(unsigned int irq_number) {} static inline void lock_system_sleep(void) {} static inline void unlock_system_sleep(void) {} -- cgit v1.2.3 From 1ed1328792ff46e4bb86a3d7f7be2971f4549f6c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 16 Sep 2015 12:53:17 -0400 Subject: sched, cgroup: replace signal_struct->group_rwsem with a global percpu_rwsem Note: This commit was originally committed as d59cfc09c32a but got reverted by 0c986253b939 due to the performance regression from the percpu_rwsem write down/up operations added to cgroup task migration path. percpu_rwsem changes which alleviate the performance issue are pending for v4.4-rc1 merge window. Re-apply. The cgroup side of threadgroup locking uses signal_struct->group_rwsem to synchronize against threadgroup changes. This per-process rwsem adds small overhead to thread creation, exit and exec paths, forces cgroup code paths to do lock-verify-unlock-retry dance in a couple places and makes it impossible to atomically perform operations across multiple processes. This patch replaces signal_struct->group_rwsem with a global percpu_rwsem cgroup_threadgroup_rwsem which is cheaper on the reader side and contained in cgroups proper. This patch converts one-to-one. This does make writer side heavier and lower the granularity; however, cgroup process migration is a fairly cold path, we do want to optimize thread operations over it and cgroup migration operations don't take enough time for the lower granularity to matter. Signed-off-by: Tejun Heo Link: http://lkml.kernel.org/g/55F8097A.7000206@de.ibm.com Cc: Ingo Molnar Cc: Peter Zijlstra --- include/linux/cgroup-defs.h | 27 +++++++++++++++++++++++++-- include/linux/init_task.h | 8 -------- include/linux/sched.h | 12 ------------ 3 files changed, 25 insertions(+), 22 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 8492721b39be..4d8fcf2187dc 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -473,8 +473,31 @@ struct cgroup_subsys { unsigned int depends_on; }; -void cgroup_threadgroup_change_begin(struct task_struct *tsk); -void cgroup_threadgroup_change_end(struct task_struct *tsk); +extern struct percpu_rw_semaphore cgroup_threadgroup_rwsem; + +/** + * cgroup_threadgroup_change_begin - threadgroup exclusion for cgroups + * @tsk: target task + * + * Called from threadgroup_change_begin() and allows cgroup operations to + * synchronize against threadgroup changes using a percpu_rw_semaphore. + */ +static inline void cgroup_threadgroup_change_begin(struct task_struct *tsk) +{ + percpu_down_read(&cgroup_threadgroup_rwsem); +} + +/** + * cgroup_threadgroup_change_end - threadgroup exclusion for cgroups + * @tsk: target task + * + * Called from threadgroup_change_end(). Counterpart of + * cgroup_threadcgroup_change_begin(). + */ +static inline void cgroup_threadgroup_change_end(struct task_struct *tsk) +{ + percpu_up_read(&cgroup_threadgroup_rwsem); +} #else /* CONFIG_CGROUPS */ diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e38681f4912d..d0b380ee7d67 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -25,13 +25,6 @@ extern struct files_struct init_files; extern struct fs_struct init_fs; -#ifdef CONFIG_CGROUPS -#define INIT_GROUP_RWSEM(sig) \ - .group_rwsem = __RWSEM_INITIALIZER(sig.group_rwsem), -#else -#define INIT_GROUP_RWSEM(sig) -#endif - #ifdef CONFIG_CPUSETS #define INIT_CPUSET_SEQ(tsk) \ .mems_allowed_seq = SEQCNT_ZERO(tsk.mems_allowed_seq), @@ -64,7 +57,6 @@ extern struct fs_struct init_fs; INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ __MUTEX_INITIALIZER(sig.cred_guard_mutex), \ - INIT_GROUP_RWSEM(sig) \ } extern struct nsproxy init_nsproxy; diff --git a/include/linux/sched.h b/include/linux/sched.h index b7b9501b41af..a4ab9daa387c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -762,18 +762,6 @@ struct signal_struct { unsigned audit_tty_log_passwd; struct tty_audit_buf *tty_audit_buf; #endif -#ifdef CONFIG_CGROUPS - /* - * group_rwsem prevents new tasks from entering the threadgroup and - * member tasks from exiting,a more specifically, setting of - * PF_EXITING. fork and exit paths are protected with this rwsem - * using threadgroup_change_begin/end(). Users which require - * threadgroup to remain stable should use threadgroup_[un]lock() - * which also takes care of exec path. Currently, cgroup is the - * only user. - */ - struct rw_semaphore group_rwsem; -#endif oom_flags_t oom_flags; short oom_score_adj; /* OOM kill score adjustment */ -- cgit v1.2.3 From 7b7d1968e4c8d8392e8e63906d45d0bcad079037 Mon Sep 17 00:00:00 2001 From: Guo Zeng Date: Thu, 17 Sep 2015 05:23:20 +0000 Subject: regmap: irq: add support for chips who have separate unmask registers Some chips have separate unmask registers from mask registers for some consideration of concurrency SMP write performance. And this patch adds a flag for it. An user will be CSR SiRFSoC ARM chips. Signed-off-by: Guo Zeng Signed-off-by: Barry Song Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 8fc0bfd8edc4..f98fe9f5faa2 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -800,6 +800,8 @@ struct regmap_irq { * * @status_base: Base status register address. * @mask_base: Base mask register address. + * @unmask_base: Base unmask register address. for chips who have + * separate mask and unmask registers * @ack_base: Base ack address. If zero then the chip is clear on read. * Using zero value is possible with @use_ack bit. * @wake_base: Base address for wake enables. If zero unsupported. @@ -820,6 +822,7 @@ struct regmap_irq_chip { unsigned int status_base; unsigned int mask_base; + unsigned int unmask_base; unsigned int ack_base; unsigned int wake_base; unsigned int irq_reg_stride; -- cgit v1.2.3 From a650fdd9427f1f5236f83d2d8137bea9b452fa53 Mon Sep 17 00:00:00 2001 From: Guo Zeng Date: Thu, 17 Sep 2015 05:23:21 +0000 Subject: regmap: irq: add ack_invert flag for chips using cleared bits as ack An user will be CSR SiRFSoC ARM chips. Signed-off-by: Guo Zeng Signed-off-by: Barry Song Signed-off-by: Mark Brown --- include/linux/regmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index f98fe9f5faa2..f36c9f96d32d 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -809,6 +809,7 @@ struct regmap_irq { * @init_ack_masked: Ack all masked interrupts once during initalization. * @mask_invert: Inverted mask register: cleared bits are masked out. * @use_ack: Use @ack register even if it is zero. + * @ack_invert: Inverted ack register: cleared bits for ack. * @wake_invert: Inverted wake register: cleared bits are wake enabled. * @runtime_pm: Hold a runtime PM lock on the device when accessing it. * @@ -829,6 +830,7 @@ struct regmap_irq_chip { bool init_ack_masked:1; bool mask_invert:1; bool use_ack:1; + bool ack_invert:1; bool wake_invert:1; bool runtime_pm:1; -- cgit v1.2.3 From c15f6ed3a18f10cdc33f64906ab353f17a6df114 Mon Sep 17 00:00:00 2001 From: Nicolas Boichat Date: Mon, 17 Aug 2015 11:52:54 +0800 Subject: spi: bitbang: Replace spinlock by mutex chipselect (in the case of spi-gpio: spi_gpio_chipselect, which calls gpiod_set_raw_value_cansleep) can sleep, so we should not hold a spinlock while calling it from spi_bitbang_setup. This issue was introduced by this commit, which converted spi-gpio to cansleep variants: d9dda5a191 "spi: spi-gpio: Use 'cansleep' variants to access GPIO" Replacing the lock variable by a mutex fixes the issue: This is safe as all instances where the lock is used are called from contexts that can sleep. Finally, update spi-ppc4xx and and spi-s3c24xx to use mutex functions, as they directly hold the lock for similar purpose. Signed-off-by: Nicolas Boichat Signed-off-by: Mark Brown --- include/linux/spi/spi_bitbang.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi_bitbang.h b/include/linux/spi/spi_bitbang.h index 85578d4be034..154788ed218c 100644 --- a/include/linux/spi/spi_bitbang.h +++ b/include/linux/spi/spi_bitbang.h @@ -4,7 +4,7 @@ #include struct spi_bitbang { - spinlock_t lock; + struct mutex lock; u8 busy; u8 use_dma; u8 flags; /* extra spi->mode support */ -- cgit v1.2.3 From 54552d03023cfd485cedf8d7471d1554139d58aa Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Wed, 2 Sep 2015 14:21:29 +0200 Subject: ieee802154: 6lowpan: check on valid 802.15.4 frame This patch adds frame control checks to check if the received frame is something which could contain a 6LoWPAN packet. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 1dc1f4ed4001..db01492814d3 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -205,6 +205,31 @@ enum { IEEE802154_SCAN_IN_PROGRESS = 0xfc, }; +/* frame control handling */ +#define IEEE802154_FCTL_FTYPE 0x0003 +#define IEEE802154_FCTL_INTRA_PAN 0x0040 + +#define IEEE802154_FTYPE_DATA 0x0001 + +/* + * ieee802154_is_data - check if type is IEEE802154_FTYPE_DATA + * @fc: frame control bytes in little-endian byteorder + */ +static inline int ieee802154_is_data(__le16 fc) +{ + return (fc & cpu_to_le16(IEEE802154_FCTL_FTYPE)) == + cpu_to_le16(IEEE802154_FTYPE_DATA); +} + +/** + * ieee802154_is_intra_pan - check if intra pan id communication + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee802154_is_intra_pan(__le16 fc) +{ + return fc & cpu_to_le16(IEEE802154_FCTL_INTRA_PAN); +} + /** * ieee802154_is_valid_psdu_len - check if psdu len is valid * available lengths: -- cgit v1.2.3 From b07461a8e45b7a62ef7fb46e4f6ada66f63406a8 Mon Sep 17 00:00:00 2001 From: Taku Izumi Date: Thu, 17 Sep 2015 10:09:37 -0500 Subject: PCI/AER: Clear error status registers during enumeration and restore AER errors might be recorded when powering-on devices. These errors can be ignored, so firmware usually clears them before the OS enumerates devices. However, firmware is not involved when devices are added via hotplug, so the OS may discover power-up errors that should be ignored. The same may happen when powering up devices when resuming after suspend. Clear the AER error status registers during enumeration and resume. [bhelgaas: changelog, remove repetitive comments] Signed-off-by: Taku Izumi Signed-off-by: Bjorn Helgaas --- include/linux/aer.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/aer.h b/include/linux/aer.h index 4fef65e57023..744b997d6a94 100644 --- a/include/linux/aer.h +++ b/include/linux/aer.h @@ -42,6 +42,7 @@ struct aer_capability_regs { int pci_enable_pcie_error_reporting(struct pci_dev *dev); int pci_disable_pcie_error_reporting(struct pci_dev *dev); int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev); +int pci_cleanup_aer_error_status_regs(struct pci_dev *dev); #else static inline int pci_enable_pcie_error_reporting(struct pci_dev *dev) { @@ -55,6 +56,10 @@ static inline int pci_cleanup_aer_uncorrect_error_status(struct pci_dev *dev) { return -EINVAL; } +static inline int pci_cleanup_aer_error_status_regs(struct pci_dev *dev) +{ + return -EINVAL; +} #endif void cper_print_aer(struct pci_dev *dev, int cper_severity, -- cgit v1.2.3 From f81c11af617ca4bad5028e9e431feae8d4166bc7 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 4 Sep 2015 17:50:08 +0100 Subject: of/pci: Add of_pci_check_probe_only to parse "linux,pci-probe-only" Both pci-host-generic and Pseries parse the "linux,pci-probe-only" property to engage the PCI_PROBE_ONLY mode, and both have a subtle bug that can be triggered if the property has no parameter. Provide a generic, safe implementation that can be used by both. [bhelgaas: fold in #include ] Signed-off-by: Marc Zyngier Signed-off-by: Bjorn Helgaas Reviewed-by: Rob Herring --- include/linux/of_pci.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index 29fd3fe1c035..38c0533a3359 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -17,6 +17,7 @@ int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin); int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); void of_pci_dma_configure(struct pci_dev *pci_dev); +void of_pci_check_probe_only(void); #else static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq) { @@ -53,6 +54,8 @@ of_get_pci_domain_nr(struct device_node *node) } static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { } + +static inline void of_pci_check_probe_only(void) { } #endif #if defined(CONFIG_OF_ADDRESS) -- cgit v1.2.3 From fb884253a919148677c5bf347ffb62c539370440 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 15 Sep 2015 20:03:49 -0500 Subject: netfilter: Remove !CONFIG_NETFITLER definition of nf_hook_thresh The !CONFIG_NETFILTER definition of nf_hook_thresh calls okfn when the CONFIG_NETFITLER defintion does not, making it buggy. As the !CONFIG_NETFILTER defintion of nf_hook_thresh is not used remove it rather than fix it. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netfilter.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 36a652531791..1abac85ec907 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -344,15 +344,6 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) #else /* !CONFIG_NETFILTER */ #define NF_HOOK(pf, hook, sk, skb, indev, outdev, okfn) (okfn)(sk, skb) #define NF_HOOK_COND(pf, hook, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb) -static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, - struct sock *sk, - struct sk_buff *skb, - struct net_device *indev, - struct net_device *outdev, - int (*okfn)(struct sock *sk, struct sk_buff *), int thresh) -{ - return okfn(sk, skb); -} static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, -- cgit v1.2.3 From b11b1f652dccde707d568f4012b01a8ec5bd5f57 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 15 Sep 2015 20:03:50 -0500 Subject: netfilter: Store net in nf_hook_state Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netfilter.h | 5 ++++- include/linux/netfilter_ingress.h | 2 +- 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 1abac85ec907..889ac0e11f01 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -54,6 +54,7 @@ struct nf_hook_state { struct net_device *in; struct net_device *out; struct sock *sk; + struct net *net; struct list_head *hook_list; int (*okfn)(struct sock *, struct sk_buff *); }; @@ -65,6 +66,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, struct net_device *indev, struct net_device *outdev, struct sock *sk, + struct net *net, int (*okfn)(struct sock *, struct sk_buff *)) { p->hook = hook; @@ -73,6 +75,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->in = indev; p->out = outdev; p->sk = sk; + p->net = net; p->hook_list = hook_list; p->okfn = okfn; } @@ -181,7 +184,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct nf_hook_state state; nf_hook_state_init(&state, hook_list, hook, thresh, - pf, indev, outdev, sk, okfn); + pf, indev, outdev, sk, net, okfn); return nf_hook_slow(skb, &state); } return 1; diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index cb0727fe2b3d..187feabe557c 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -17,7 +17,7 @@ static inline int nf_hook_ingress(struct sk_buff *skb) nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL, - skb->dev, NULL, NULL); + skb->dev, NULL, dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); } -- cgit v1.2.3 From 7a7735044e1eab1415697b3139e758d24a600099 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 15 Sep 2015 20:03:51 -0500 Subject: netfilter: Pass net to nf_hook_thresh Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netfilter.h | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 889ac0e11f01..042148dc1e22 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -170,6 +170,7 @@ int nf_hook_slow(struct sk_buff *skb, struct nf_hook_state *state); * value indicates the packet has been consumed by the hook. */ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, + struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, @@ -177,7 +178,6 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, int (*okfn)(struct sock *, struct sk_buff *), int thresh) { - struct net *net = dev_net(indev ? indev : outdev); struct list_head *hook_list = &net->nf.hooks[pf][hook]; if (nf_hook_list_active(hook_list, pf, hook)) { @@ -195,7 +195,8 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, struct net_device *outdev, int (*okfn)(struct sock *, struct sk_buff *)) { - return nf_hook_thresh(pf, hook, sk, skb, indev, outdev, okfn, INT_MIN); + struct net *net = dev_net(indev ? indev : outdev); + return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN); } /* Activate hook; either okfn or kfree_skb called, unless a hook @@ -221,7 +222,8 @@ NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk, struct net_device *out, int (*okfn)(struct sock *, struct sk_buff *), int thresh) { - int ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, thresh); + struct net *net = dev_net(in ? in : out); + int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh); if (ret == 1) ret = okfn(sk, skb); return ret; @@ -232,10 +234,11 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct sock *, struct sk_buff *), bool cond) { + struct net *net = dev_net(in ? in : out); int ret; if (!cond || - ((ret = nf_hook_thresh(pf, hook, sk, skb, in, out, okfn, INT_MIN)) == 1)) + ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1)) ret = okfn(sk, skb); return ret; } -- cgit v1.2.3 From 2b4aa3cec4873005a0d5155395b34641584b3a4e Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 15 Sep 2015 20:04:07 -0500 Subject: net: Remove dev_queue_xmit_sk A function with weird arguments that it will never use to accomdate a netfilter callback prototype is absolutely in the core of the networking stack. Frankly it does not make sense and it causes a lot of confusion as to why arguments that are never used are being passed to the function. As I am preparing to make a second change to arguments to the okfn even the names stops making sense. As I have removed the two callers of this function remove this confusion from the networking stack. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 88a00694eda5..e664f87c8e4c 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2213,11 +2213,7 @@ int dev_close(struct net_device *dev); int dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb); -int dev_queue_xmit_sk(struct sock *sk, struct sk_buff *skb); -static inline int dev_queue_xmit(struct sk_buff *skb) -{ - return dev_queue_xmit_sk(skb->sk, skb); -} +int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); int register_netdevice(struct net_device *dev); void unregister_netdevice_queue(struct net_device *dev, struct list_head *head); -- cgit v1.2.3 From 04eb44890e5bb3cc855e5c0f18a05eb7311364b7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 15 Sep 2015 20:04:15 -0500 Subject: bridge: Add br_netif_receive_skb remove netif_receive_skb_sk netif_receive_skb_sk is only called once in the bridge code, replace it with a bridge specific function that calls netif_receive_skb. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netdevice.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index e664f87c8e4c..97ab5c9a7069 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2985,11 +2985,7 @@ static inline void dev_consume_skb_any(struct sk_buff *skb) int netif_rx(struct sk_buff *skb); int netif_rx_ni(struct sk_buff *skb); -int netif_receive_skb_sk(struct sock *sk, struct sk_buff *skb); -static inline int netif_receive_skb(struct sk_buff *skb) -{ - return netif_receive_skb_sk(skb->sk, skb); -} +int netif_receive_skb(struct sk_buff *skb); gro_result_t napi_gro_receive(struct napi_struct *napi, struct sk_buff *skb); void napi_gro_flush(struct napi_struct *napi, bool flush_old); struct sk_buff *napi_get_frags(struct napi_struct *napi); -- cgit v1.2.3 From 29a26a56803855a79dbd028cd61abee56237d6e5 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 15 Sep 2015 20:04:16 -0500 Subject: netfilter: Pass struct net into the netfilter hooks Pass a network namespace parameter into the netfilter hooks. At the call site of the netfilter hooks the path a packet is taking through the network stack is well known which allows the network namespace to be easily and reliabily. This allows the replacement of magic code like "dev_net(state->in?:state->out)" that appears at the start of most netfilter hooks with "state->net". In almost all cases the network namespace passed in is derived from the first network device passed in, guaranteeing those paths will not see any changes in practice. The exceptions are: xfrm/xfrm_output.c:xfrm_output_resume() xs_net(skb_dst(skb)->xfrm) ipvs/ip_vs_xmit.c:ip_vs_nat_send_or_cont() ip_vs_conn_net(cp) ipvs/ip_vs_xmit.c:ip_vs_send_or_cont() ip_vs_conn_net(cp) ipv4/raw.c:raw_send_hdrinc() sock_net(sk) ipv6/ip6_output.c:ip6_xmit() sock_net(sk) ipv6/ndisc.c:ndisc_send_skb() dev_net(skb->dev) not dev_net(dst->dev) ipv6/raw.c:raw6_send_hdrinc() sock_net(sk) br_netfilter_hooks.c:br_nf_pre_routing_finish() dev_net(skb->dev) before skb->dev is set to nf_bridge->physindev In all cases these exceptions seem to be a better expression for the network namespace the packet is being processed in then the historic "dev_net(in?in:out)". I am documenting them in case something odd pops up and someone starts trying to track down what happened. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netfilter.h | 27 ++++++++++++--------------- 1 file changed, 12 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 042148dc1e22..295f2650b5dc 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -190,12 +190,11 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, return 1; } -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, - struct sk_buff *skb, struct net_device *indev, - struct net_device *outdev, +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sock *, struct sk_buff *)) { - struct net *net = dev_net(indev ? indev : outdev); return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN); } @@ -217,12 +216,11 @@ static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, */ static inline int -NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk, +NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct sock *, struct sk_buff *), int thresh) { - struct net *net = dev_net(in ? in : out); int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh); if (ret == 1) ret = okfn(sk, skb); @@ -230,11 +228,10 @@ NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct sock *sk, } static inline int -NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk, +NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct sock *, struct sk_buff *), bool cond) { - struct net *net = dev_net(in ? in : out); int ret; if (!cond || @@ -244,11 +241,11 @@ NF_HOOK_COND(uint8_t pf, unsigned int hook, struct sock *sk, } static inline int -NF_HOOK(uint8_t pf, unsigned int hook, struct sock *sk, struct sk_buff *skb, +NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, int (*okfn)(struct sock *, struct sk_buff *)) { - return NF_HOOK_THRESH(pf, hook, sk, skb, in, out, okfn, INT_MIN); + return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN); } /* Call setsockopt() */ @@ -348,11 +345,11 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) } #else /* !CONFIG_NETFILTER */ -#define NF_HOOK(pf, hook, sk, skb, indev, outdev, okfn) (okfn)(sk, skb) -#define NF_HOOK_COND(pf, hook, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb) -static inline int nf_hook(u_int8_t pf, unsigned int hook, struct sock *sk, - struct sk_buff *skb, struct net_device *indev, - struct net_device *outdev, +#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(sk, skb) +#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb) +static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, + struct sock *sk, struct sk_buff *skb, + struct net_device *indev, struct net_device *outdev, int (*okfn)(struct sock *, struct sk_buff *)) { return 1; -- cgit v1.2.3 From 0c4b51f0054ce85c0ec578ab818f0631834573eb Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Tue, 15 Sep 2015 20:04:18 -0500 Subject: netfilter: Pass net into okfn This is immediately motivated by the bridge code that chains functions that call into netfilter. Without passing net into the okfns the bridge code would need to guess about the best expression for the network namespace to process packets in. As net is frequently one of the first things computed in continuation functions after netfilter has done it's job passing in the desired network namespace is in many cases a code simplification. To support this change the function dst_output_okfn is introduced to simplify passing dst_output as an okfn. For the moment dst_output_okfn just silently drops the struct net. Signed-off-by: "Eric W. Biederman" Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- include/linux/netfilter.h | 26 ++++++++++++++------------ include/linux/netfilter_bridge.h | 2 +- 3 files changed, 16 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 97ab5c9a7069..b791405958b4 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2212,7 +2212,7 @@ int dev_open(struct net_device *dev); int dev_close(struct net_device *dev); int dev_close_many(struct list_head *head, bool unlink); void dev_disable_lro(struct net_device *dev); -int dev_loopback_xmit(struct sock *sk, struct sk_buff *newskb); +int dev_loopback_xmit(struct net *net, struct sock *sk, struct sk_buff *newskb); int dev_queue_xmit(struct sk_buff *skb); int dev_queue_xmit_accel(struct sk_buff *skb, void *accel_priv); int register_netdevice(struct net_device *dev); diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 295f2650b5dc..0b4d4560f33d 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -56,7 +56,7 @@ struct nf_hook_state { struct sock *sk; struct net *net; struct list_head *hook_list; - int (*okfn)(struct sock *, struct sk_buff *); + int (*okfn)(struct net *, struct sock *, struct sk_buff *); }; static inline void nf_hook_state_init(struct nf_hook_state *p, @@ -67,7 +67,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, struct net_device *outdev, struct sock *sk, struct net *net, - int (*okfn)(struct sock *, struct sk_buff *)) + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { p->hook = hook; p->thresh = thresh; @@ -175,7 +175,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sock *, struct sk_buff *), + int (*okfn)(struct net *, struct sock *, struct sk_buff *), int thresh) { struct list_head *hook_list = &net->nf.hooks[pf][hook]; @@ -193,7 +193,7 @@ static inline int nf_hook_thresh(u_int8_t pf, unsigned int hook, static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sock *, struct sk_buff *)) + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return nf_hook_thresh(pf, hook, net, sk, skb, indev, outdev, okfn, INT_MIN); } @@ -219,31 +219,33 @@ static inline int NF_HOOK_THRESH(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, - int (*okfn)(struct sock *, struct sk_buff *), int thresh) + int (*okfn)(struct net *, struct sock *, struct sk_buff *), + int thresh) { int ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, thresh); if (ret == 1) - ret = okfn(sk, skb); + ret = okfn(net, sk, skb); return ret; } static inline int NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, - int (*okfn)(struct sock *, struct sk_buff *), bool cond) + int (*okfn)(struct net *, struct sock *, struct sk_buff *), + bool cond) { int ret; if (!cond || ((ret = nf_hook_thresh(pf, hook, net, sk, skb, in, out, okfn, INT_MIN)) == 1)) - ret = okfn(sk, skb); + ret = okfn(net, sk, skb); return ret; } static inline int NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *in, struct net_device *out, - int (*okfn)(struct sock *, struct sk_buff *)) + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return NF_HOOK_THRESH(pf, hook, net, sk, skb, in, out, okfn, INT_MIN); } @@ -345,12 +347,12 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) } #else /* !CONFIG_NETFILTER */ -#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(sk, skb) -#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(sk, skb) +#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(net, sk, skb) +#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(net, sk, skb) static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, - int (*okfn)(struct sock *, struct sk_buff *)) + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) { return 1; } diff --git a/include/linux/netfilter_bridge.h b/include/linux/netfilter_bridge.h index 2437b8a5d7a9..2ed40c402b5e 100644 --- a/include/linux/netfilter_bridge.h +++ b/include/linux/netfilter_bridge.h @@ -17,7 +17,7 @@ enum nf_br_hook_priorities { #if IS_ENABLED(CONFIG_BRIDGE_NETFILTER) -int br_handle_frame_finish(struct sock *sk, struct sk_buff *skb); +int br_handle_frame_finish(struct net *net, struct sock *sk, struct sk_buff *skb); static inline void br_drop_fake_rtable(struct sk_buff *skb) { -- cgit v1.2.3 From 58d607d3e52f2b15902f58a1161da9fb3b0f6d47 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 15 Sep 2015 15:24:20 -0700 Subject: tcp: provide skb->hash to synack packets In commit b73c3d0e4f0e ("net: Save TX flow hash in sock and set in skbuf on xmit"), Tom provided a l4 hash to most outgoing TCP packets. We'd like to provide one as well for SYNACK packets, so that all packets of a given flow share same txhash, to later enable bonding driver to also use skb->hash to perform slave selection. Note that a SYNACK retransmit shuffles the tx hash, as Tom did in commit 265f94ff54d62 ("net: Recompute sk_txhash on negative routing advice") for established sockets. This has nice effect making TCP flows resilient to some kind of black holes, even at connection establish phase. Signed-off-by: Eric Dumazet Cc: Tom Herbert Cc: Mahesh Bandewar Acked-by: Tom Herbert Signed-off-by: David S. Miller --- include/linux/tcp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 48c3696e8645..937b97893d5f 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -113,6 +113,7 @@ struct tcp_request_sock { struct inet_request_sock req; const struct tcp_request_sock_ops *af_specific; bool tfo_listener; + u32 txhash; u32 rcv_isn; u32 snt_isn; u32 snt_synack; /* synack sent time */ -- cgit v1.2.3 From d6157e4f18173ad24441aa9ca04e7e9121a9b4c7 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 10 Aug 2015 16:41:45 +0200 Subject: ARM: at91: remove useless includes in platform_data/atmel.h include/linux/platform_data/atmel.h has a lot of useless includes, remove them. Signed-off-by: Alexandre Belloni Signed-off-by: Nicolas Ferre --- include/linux/platform_data/atmel.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h index 527a85c61924..4d67a5e82c83 100644 --- a/include/linux/platform_data/atmel.h +++ b/include/linux/platform_data/atmel.h @@ -9,15 +9,7 @@ #include #include -#include -#include -#include -#include -#include -#include -#include #include -#include /* * at91: 6 USARTs and one DBGU port (SAM9260) -- cgit v1.2.3 From fa128fd735bd236b6b04d3fedfed7a784137c185 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Sep 2015 11:56:28 -0400 Subject: jump_label: make static_key_enabled() work on static_key_true/false types too static_key_enabled() can be used on struct static_key but not on its wrapper types static_key_true and static_key_false. The function is useful for debugging and management of static keys. Update it so that it can be used for the wrapper types too. Signed-off-by: Tejun Heo Acked-by: Peter Zijlstra (Intel) Cc: Andrew Morton --- include/linux/jump_label.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jump_label.h b/include/linux/jump_label.h index 7f653e8f6690..c9ca050de846 100644 --- a/include/linux/jump_label.h +++ b/include/linux/jump_label.h @@ -216,11 +216,6 @@ static inline int jump_label_apply_nops(struct module *mod) #define STATIC_KEY_INIT STATIC_KEY_INIT_FALSE #define jump_label_enabled static_key_enabled -static inline bool static_key_enabled(struct static_key *key) -{ - return static_key_count(key) > 0; -} - static inline void static_key_enable(struct static_key *key) { int count = static_key_count(key); @@ -267,6 +262,17 @@ struct static_key_false { #define DEFINE_STATIC_KEY_FALSE(name) \ struct static_key_false name = STATIC_KEY_FALSE_INIT +extern bool ____wrong_branch_error(void); + +#define static_key_enabled(x) \ +({ \ + if (!__builtin_types_compatible_p(typeof(*x), struct static_key) && \ + !__builtin_types_compatible_p(typeof(*x), struct static_key_true) &&\ + !__builtin_types_compatible_p(typeof(*x), struct static_key_false)) \ + ____wrong_branch_error(); \ + static_key_count((struct static_key *)x) > 0; \ +}) + #ifdef HAVE_JUMP_LABEL /* @@ -325,8 +331,6 @@ struct static_key_false { * See jump_label_type() / jump_label_init_type(). */ -extern bool ____wrong_branch_error(void); - #define static_branch_likely(x) \ ({ \ bool branch; \ -- cgit v1.2.3 From 49d1dc4b81797f88270832b11e9f73809e7e7209 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Sep 2015 11:56:28 -0400 Subject: cgroup: implement static_key based cgroup_subsys_enabled() and cgroup_subsys_on_dfl() Whether a subsys is enabled and attached to the default hierarchy seldom changes and may be tested in the hot paths. This patch implements static_key based cgroup_subsys_enabled() and cgroup_subsys_on_dfl() tests. The following patches will update the users and remove duplicate mechanisms. Signed-off-by: Tejun Heo Acked-by: Zefan Li --- include/linux/cgroup.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index eb7ca55f72ef..c3a9f1eb4097 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -17,6 +17,7 @@ #include #include #include +#include #include @@ -50,6 +51,26 @@ extern struct css_set init_css_set; #include #undef SUBSYS +#define SUBSYS(_x) \ + extern struct static_key_true _x ## _cgrp_subsys_enabled_key; \ + extern struct static_key_true _x ## _cgrp_subsys_on_dfl_key; +#include +#undef SUBSYS + +/** + * cgroup_subsys_enabled - fast test on whether a subsys is enabled + * @ss: subsystem in question + */ +#define cgroup_subsys_enabled(ss) \ + static_branch_likely(&ss ## _enabled_key) + +/** + * cgroup_subsys_on_dfl - fast test on whether a subsys is on default hierarchy + * @ss: subsystem in question + */ +#define cgroup_subsys_on_dfl(ss) \ + static_branch_likely(&ss ## _on_dfl_key) + bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, -- cgit v1.2.3 From fc5ed1e95410ad73b2ab8f33cd90eb3bcf6c98a1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Sep 2015 11:56:28 -0400 Subject: cgroup: replace cgroup_subsys->disabled tests with cgroup_subsys_enabled() Replace cgroup_subsys->disabled tests in controllers with cgroup_subsys_enabled(). cgroup_subsys_enabled() requires literal subsys name as its parameter and thus can't be used for cgroup core which iterates through controllers. For cgroup core, introduce and use cgroup_ssid_enabled() which uses slower static_key_enabled() test and can be indexed by subsys ID. This leaves cgroup_subsys->disabled unused. Removed. Signed-off-by: Tejun Heo Acked-by: Zefan Li Cc: Johannes Weiner Cc: Michal Hocko --- include/linux/cgroup-defs.h | 1 - include/linux/hugetlb_cgroup.h | 4 +--- include/linux/memcontrol.h | 4 +--- 3 files changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 4d8fcf2187dc..c5d41c3c1f00 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -419,7 +419,6 @@ struct cgroup_subsys { struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); - int disabled; int early_init; /* diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index bcc853eccc85..7edd30515298 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -48,9 +48,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) static inline bool hugetlb_cgroup_disabled(void) { - if (hugetlb_cgrp_subsys.disabled) - return true; - return false; + return !cgroup_subsys_enabled(hugetlb_cgrp_subsys); } extern int hugetlb_cgroup_charge_cgroup(int idx, unsigned long nr_pages, diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ad800e62cb7a..9aa7820c2177 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -347,9 +347,7 @@ ino_t page_cgroup_ino(struct page *page); static inline bool mem_cgroup_disabled(void) { - if (memory_cgrp_subsys.disabled) - return true; - return false; + return !cgroup_subsys_enabled(memory_cgrp_subsys); } /* -- cgit v1.2.3 From 9e10a130d9b62af976d17d120c95f3650769312c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Sep 2015 11:56:28 -0400 Subject: cgroup: replace cgroup_on_dfl() tests in controllers with cgroup_subsys_on_dfl() cgroup_on_dfl() tests whether the cgroup's root is the default hierarchy; however, an individual controller is only interested in whether the controller is attached to the default hierarchy and never tests a cgroup which doesn't belong to the hierarchy that the controller is attached to. This patch replaces cgroup_on_dfl() tests in controllers with faster static_key based cgroup_subsys_on_dfl(). This leaves cgroup core as the only user of cgroup_on_dfl() and the function is moved from the header file to cgroup.c. Signed-off-by: Tejun Heo Acked-by: Zefan Li Cc: Vivek Goyal Cc: Jens Axboe Cc: Johannes Weiner Cc: Michal Hocko --- include/linux/cgroup.h | 58 -------------------------------------------------- 1 file changed, 58 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index c3a9f1eb4097..355bf2ed8867 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -433,64 +433,6 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, return task_css(task, subsys_id)->cgroup; } -/** - * cgroup_on_dfl - test whether a cgroup is on the default hierarchy - * @cgrp: the cgroup of interest - * - * The default hierarchy is the v2 interface of cgroup and this function - * can be used to test whether a cgroup is on the default hierarchy for - * cases where a subsystem should behave differnetly depending on the - * interface version. - * - * The set of behaviors which change on the default hierarchy are still - * being determined and the mount option is prefixed with __DEVEL__. - * - * List of changed behaviors: - * - * - Mount options "noprefix", "xattr", "clone_children", "release_agent" - * and "name" are disallowed. - * - * - When mounting an existing superblock, mount options should match. - * - * - Remount is disallowed. - * - * - rename(2) is disallowed. - * - * - "tasks" is removed. Everything should be at process granularity. Use - * "cgroup.procs" instead. - * - * - "cgroup.procs" is not sorted. pids will be unique unless they got - * recycled inbetween reads. - * - * - "release_agent" and "notify_on_release" are removed. Replacement - * notification mechanism will be implemented. - * - * - "cgroup.clone_children" is removed. - * - * - "cgroup.subtree_populated" is available. Its value is 0 if the cgroup - * and its descendants contain no task; otherwise, 1. The file also - * generates kernfs notification which can be monitored through poll and - * [di]notify when the value of the file changes. - * - * - cpuset: tasks will be kept in empty cpusets when hotplug happens and - * take masks of ancestors with non-empty cpus/mems, instead of being - * moved to an ancestor. - * - * - cpuset: a task can be moved into an empty cpuset, and again it takes - * masks of ancestors. - * - * - memcg: use_hierarchy is on by default and the cgroup file for the flag - * is not created. - * - * - blkcg: blk-throttle becomes properly hierarchical. - * - * - debug: disallowed on the default hierarchy. - */ -static inline bool cgroup_on_dfl(const struct cgroup *cgrp) -{ - return cgrp->root == &cgrp_dfl_root; -} - /* no synchronization, the result can only be used as a hint */ static inline bool cgroup_has_tasks(struct cgroup *cgrp) { -- cgit v1.2.3 From 339e5b44eda2150baad183def6b7030fad5ec44e Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 18 Sep 2015 13:58:34 -0500 Subject: PCI: Add msi_controller setup_irqs() method for special multivector setup Add a msi_controller setup_irqs() method so MSI chip providers can implement their own multivector MSI setup. [bhelgaas: changelog] Signed-off-by: Lucas Stach Signed-off-by: Bjorn Helgaas Reviewed-by: Pratyush Anand --- include/linux/msi.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index ad939d0ba816..0be5db110bfa 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -163,6 +163,8 @@ struct msi_controller { int (*setup_irq)(struct msi_controller *chip, struct pci_dev *dev, struct msi_desc *desc); + int (*setup_irqs)(struct msi_controller *chip, struct pci_dev *dev, + int nvec, int type); void (*teardown_irq)(struct msi_controller *chip, unsigned int irq); }; -- cgit v1.2.3 From 97b59c3a91d5ee4777658ff2136d1fdf13bd23d0 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:54 -0500 Subject: netfilter: ebtables: Simplify the arguments to ebt_do_table Nearly everything thing of interest to ebt_do_table is already present in nf_hook_state. Simplify ebt_do_table by just passing in the skb, nf_hook_state, and the table. This make the code easier to read and maintenance easier. To support this create an nf_hook_state on the stack in ebt_broute (the only caller without a nf_hook_state already available). This new nf_hook_state adds no new computations to ebt_broute, but does use a few more bytes of stack. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_bridge/ebtables.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_bridge/ebtables.h b/include/linux/netfilter_bridge/ebtables.h index 8ca6d6464ea3..2ea517c7c6b9 100644 --- a/include/linux/netfilter_bridge/ebtables.h +++ b/include/linux/netfilter_bridge/ebtables.h @@ -111,9 +111,9 @@ struct ebt_table { extern struct ebt_table *ebt_register_table(struct net *net, const struct ebt_table *table); extern void ebt_unregister_table(struct net *net, struct ebt_table *table); -extern unsigned int ebt_do_table(unsigned int hook, struct sk_buff *skb, - const struct net_device *in, const struct net_device *out, - struct ebt_table *table); +extern unsigned int ebt_do_table(struct sk_buff *skb, + const struct nf_hook_state *state, + struct ebt_table *table); /* Used in the kernel match() functions */ #define FWINV(bool,invflg) ((bool) ^ !!(info->invflags & invflg)) -- cgit v1.2.3 From 6cb8ff3f1a535b1d8eb5ea318932513d08eb3da7 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:55 -0500 Subject: inet netfilter: Remove hook from ip6t_do_table, arp_do_table, ipt_do_table The values of ops->hooknum and state->hook are guaraneted to be equal making the hook argument to ip6t_do_table, arp_do_table, and ipt_do_table is unnecessary. Remove the unnecessary hook argument. In the callers use state->hook instead of ops->hooknum for clarity and to reduce the number of cachelines the callers touch. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_arp/arp_tables.h | 1 - include/linux/netfilter_ipv4/ip_tables.h | 1 - include/linux/netfilter_ipv6/ip6_tables.h | 1 - 3 files changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_arp/arp_tables.h b/include/linux/netfilter_arp/arp_tables.h index c22a7fb8d0df..6f074db2f23d 100644 --- a/include/linux/netfilter_arp/arp_tables.h +++ b/include/linux/netfilter_arp/arp_tables.h @@ -53,7 +53,6 @@ extern struct xt_table *arpt_register_table(struct net *net, const struct arpt_replace *repl); extern void arpt_unregister_table(struct xt_table *table); extern unsigned int arpt_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table); diff --git a/include/linux/netfilter_ipv4/ip_tables.h b/include/linux/netfilter_ipv4/ip_tables.h index 4073510da485..aa598f942c01 100644 --- a/include/linux/netfilter_ipv4/ip_tables.h +++ b/include/linux/netfilter_ipv4/ip_tables.h @@ -64,7 +64,6 @@ struct ipt_error { extern void *ipt_alloc_initial_table(const struct xt_table *); extern unsigned int ipt_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table); diff --git a/include/linux/netfilter_ipv6/ip6_tables.h b/include/linux/netfilter_ipv6/ip6_tables.h index b40d2b635778..0f76e5c674f9 100644 --- a/include/linux/netfilter_ipv6/ip6_tables.h +++ b/include/linux/netfilter_ipv6/ip6_tables.h @@ -30,7 +30,6 @@ extern struct xt_table *ip6t_register_table(struct net *net, const struct ip6t_replace *repl); extern void ip6t_unregister_table(struct net *net, struct xt_table *table); extern unsigned int ip6t_do_table(struct sk_buff *skb, - unsigned int hook, const struct nf_hook_state *state, struct xt_table *table); -- cgit v1.2.3 From 156c196f6038610770588a708b9e0f7df2ead74a Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:32:58 -0500 Subject: netfilter: x_tables: Pass struct net in xt_action_param As xt_action_param lives on the stack this does not bloat any persistent data structures. This is a first step in making netfilter code that needs to know which network namespace it is executing in simpler. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/x_tables.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/x_tables.h b/include/linux/netfilter/x_tables.h index b006b719183f..c5577410c25d 100644 --- a/include/linux/netfilter/x_tables.h +++ b/include/linux/netfilter/x_tables.h @@ -13,6 +13,7 @@ * @target: the target extension * @matchinfo: per-match data * @targetinfo: per-target data + * @net network namespace through which the action was invoked * @in: input netdevice * @out: output netdevice * @fragoff: packet is a fragment, this is the data offset @@ -24,7 +25,6 @@ * Fields written to by extensions: * * @hotdrop: drop packet if we had inspection problems - * Network namespace obtainable using dev_net(in/out) */ struct xt_action_param { union { @@ -34,6 +34,7 @@ struct xt_action_param { union { const void *matchinfo, *targinfo; }; + struct net *net; const struct net_device *in, *out; int fragoff; unsigned int thoff; -- cgit v1.2.3 From 06198b34a3e09e06d9aecaa3727e0d37206cea77 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 18 Sep 2015 14:33:06 -0500 Subject: netfilter: Pass priv instead of nf_hook_ops to netfilter hooks Only pass the void *priv parameter out of the nf_hook_ops. That is all any of the functions are interested now, and by limiting what is passed it becomes simpler to change implementation details. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 0b4d4560f33d..987c74cd523c 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -80,7 +80,7 @@ static inline void nf_hook_state_init(struct nf_hook_state *p, p->okfn = okfn; } -typedef unsigned int nf_hookfn(const struct nf_hook_ops *ops, +typedef unsigned int nf_hookfn(void *priv, struct sk_buff *skb, const struct nf_hook_state *state); -- cgit v1.2.3 From 4a07c222d3afb00e1113834fee38d23a8e5d71dc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Sep 2015 17:54:22 -0400 Subject: cgroup: replace "cgroup.populated" with "cgroup.events" memcg already uses "memory.events" for event reporting and other controllers may need event reporting too. Let's standardize on "$SUBSYS.events" interface file for reporting events which don't happen too frequently and thus can share event notification. "cgroup.populated" is replaced with "populated" field in "cgroup.events" and documentation is updated accordingly. Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner --- include/linux/cgroup-defs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index c5d41c3c1f00..d95cc88e9dc2 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -226,7 +226,7 @@ struct cgroup { struct kernfs_node *kn; /* cgroup kernfs entry */ struct kernfs_node *procs_kn; /* kn for "cgroup.procs" */ - struct kernfs_node *populated_kn; /* kn for "cgroup.subtree_populated" */ + struct kernfs_node *events_kn; /* kn for "cgroup.events" */ /* * The bitmask of subsystems enabled on the child cgroups. -- cgit v1.2.3 From 7dbdb199d3bf88f043ea17e97113eb28d5b100bc Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Sep 2015 17:54:23 -0400 Subject: cgroup: replace cftype->mode with CFTYPE_WORLD_WRITABLE cftype->mode allows controllers to give arbitrary permissions to interface knobs. Except for "cgroup.event_control", the existing uses are spurious. * Some explicitly specify S_IRUGO | S_IWUSR even though that's the default. * "cpuset.memory_pressure" specifies S_IRUGO while also setting a write callback which returns -EACCES. All it needs to do is simply not setting a write callback. "cgroup.event_control" uses cftype->mode to make the file world-writable. It's a misdesigned interface and we don't want controllers to be tweaking interface file permissions in general. This patch removes cftype->mode and all its spurious uses and implements CFTYPE_WORLD_WRITABLE for "cgroup.event_control" which is marked as compatibility-only. Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner --- include/linux/cgroup-defs.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index d95cc88e9dc2..10d814bcd487 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -76,6 +76,7 @@ enum { CFTYPE_ONLY_ON_ROOT = (1 << 0), /* only create on root cgrp */ CFTYPE_NOT_ON_ROOT = (1 << 1), /* don't create on root cgrp */ CFTYPE_NO_PREFIX = (1 << 3), /* (DON'T USE FOR NEW FILES) no subsys prefix */ + CFTYPE_WORLD_WRITABLE = (1 << 4), /* (DON'T USE FOR NEW FILES) S_IWUGO */ /* internal flags, do not use outside cgroup core proper */ __CFTYPE_ONLY_ON_DFL = (1 << 16), /* only on default hierarchy */ @@ -324,11 +325,6 @@ struct cftype { */ char name[MAX_CFTYPE_NAME]; unsigned long private; - /* - * If not 0, file mode is set to this value, otherwise it will - * be figured out automatically - */ - umode_t mode; /* * The maximum length of string, excluding trailing nul, that can -- cgit v1.2.3 From 6f60eade2433cb3a38687d5f8a4f44b92c6c51bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Sep 2015 17:54:23 -0400 Subject: cgroup: generalize obtaining the handles of and notifying cgroup files cgroup core handles creations and removals of cgroup interface files as described by cftypes. There are cases where the handle for a given file instance is necessary, for example, to generate a file modified event. Currently, this is handled by explicitly matching the callback method pointer and storing the file handle manually in cgroup_add_file(). While this simple approach works for cgroup core files, it can't for controller interface files. This patch generalizes cgroup interface file handle handling. struct cgroup_file is defined and each cftype can optionally tell cgroup core to store the file handle by setting ->file_offset. A file handle remains accessible as long as the containing css is accessible. Both "cgroup.procs" and "cgroup.events" are converted to use the new generic mechanism instead of hooking directly into cgroup_add_file(). Also, cgroup_file_notify() which takes a struct cgroup_file and generates a file modified event on it is added and replaces explicit kernfs_notify() invocations. This generalizes cgroup file handle handling and allows controllers to generate file modified notifications. Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner --- include/linux/cgroup-defs.h | 26 ++++++++++++++++++++++++-- include/linux/cgroup.h | 13 +++++++++++++ 2 files changed, 37 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 10d814bcd487..df589a097539 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -83,6 +83,17 @@ enum { __CFTYPE_NOT_ON_DFL = (1 << 17), /* not on default hierarchy */ }; +/* + * cgroup_file is the handle for a file instance created in a cgroup which + * is used, for example, to generate file changed notifications. This can + * be obtained by setting cftype->file_offset. + */ +struct cgroup_file { + /* do not access any fields from outside cgroup core */ + struct list_head node; /* anchored at css->files */ + struct kernfs_node *kn; +}; + /* * Per-subsystem/per-cgroup state maintained by the system. This is the * fundamental structural building block that controllers deal with. @@ -123,6 +134,9 @@ struct cgroup_subsys_state { */ u64 serial_nr; + /* all cgroup_files associated with this css */ + struct list_head files; + /* percpu_ref killing and RCU release */ struct rcu_head rcu_head; struct work_struct destroy_work; @@ -226,8 +240,8 @@ struct cgroup { int populated_cnt; struct kernfs_node *kn; /* cgroup kernfs entry */ - struct kernfs_node *procs_kn; /* kn for "cgroup.procs" */ - struct kernfs_node *events_kn; /* kn for "cgroup.events" */ + struct cgroup_file procs_file; /* handle for "cgroup.procs" */ + struct cgroup_file events_file; /* handle for "cgroup.events" */ /* * The bitmask of subsystems enabled on the child cgroups. @@ -335,6 +349,14 @@ struct cftype { /* CFTYPE_* flags */ unsigned int flags; + /* + * If non-zero, should contain the offset from the start of css to + * a struct cgroup_file field. cgroup will record the handle of + * the created file into it. The recorded handle can be used as + * long as the containing css remains accessible. + */ + unsigned int file_offset; + /* * Fields used for internal bookkeeping. Initialized automatically * during registration. diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 355bf2ed8867..fb717f2cba5b 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -490,6 +490,19 @@ static inline void pr_cont_cgroup_path(struct cgroup *cgrp) pr_cont_kernfs_path(cgrp->kn); } +/** + * cgroup_file_notify - generate a file modified event for a cgroup_file + * @cfile: target cgroup_file + * + * @cfile must have been obtained by setting cftype->file_offset. + */ +static inline void cgroup_file_notify(struct cgroup_file *cfile) +{ + /* might not have been created due to one of the CFTYPE selector flags */ + if (cfile->kn) + kernfs_notify(cfile->kn); +} + #else /* !CONFIG_CGROUPS */ struct cgroup_subsys_state; -- cgit v1.2.3 From 6b7bc0618ff1a333d2265131b124e966335d5dee Mon Sep 17 00:00:00 2001 From: Martin Sperl Date: Mon, 22 Jun 2015 13:02:04 +0000 Subject: spi: add transfer histogram statistics via sysfs report transfer sizes as a histogram via the following files: /sys/class/spi_master/spi*/statistics/transfer_bytes_histo_* /sys/class/spi_master/spi*/spi*.*/statistics/transfer_bytes_histo_* Signed-off-by: Martin Sperl Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 269e8afd3e2a..5b6fdc48eba7 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -51,6 +51,8 @@ extern struct bus_type spi_bus_type; * @bytes_tx: number of bytes sent to device * @bytes_rx: number of bytes received from device * + * @transfer_bytes_histo: + * transfer bytes histogramm */ struct spi_statistics { spinlock_t lock; /* lock for the whole structure */ @@ -68,6 +70,8 @@ struct spi_statistics { unsigned long long bytes_rx; unsigned long long bytes_tx; +#define SPI_STATISTICS_HISTO_SIZE 17 + unsigned long transfer_bytes_histo[SPI_STATISTICS_HISTO_SIZE]; }; void spi_statistics_add_transfer_stats(struct spi_statistics *stats, -- cgit v1.2.3 From 5cfc5220a63b1008e7198fb4f91c3ef763e46657 Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Fri, 18 Sep 2015 10:14:41 +0300 Subject: ARM: pxa: Remove unused clock_enable field from struct pxa2xx_spi_master Use for struct pxa2xx_spi_master clock_enable field was removed years ago from the pxa2xx-spi driver by the commit 2f1a74e5a2de ("[ARM] pxa: make pxa2xx_spi driver use ssp_request()/ssp_free()"). Therefore remove it from structure definition, documentation and from couple affected board files. Signed-off-by: Jarkko Nikula Acked-by: Mark Brown Signed-off-by: Robert Jarzmik --- include/linux/spi/pxa2xx_spi.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/pxa2xx_spi.h b/include/linux/spi/pxa2xx_spi.h index 6d36dacec4ba..9ec4c147abbc 100644 --- a/include/linux/spi/pxa2xx_spi.h +++ b/include/linux/spi/pxa2xx_spi.h @@ -23,7 +23,6 @@ struct dma_chan; /* device.platform_data for SSP controller devices */ struct pxa2xx_spi_master { - u32 clock_enable; u16 num_chipselect; u8 enable_dma; -- cgit v1.2.3 From 59796edcf21c7c19d58a223001f9ed13746c51c2 Mon Sep 17 00:00:00 2001 From: Prarit Bhargava Date: Thu, 10 Sep 2015 10:17:59 +0300 Subject: mei: make modules.alias UUID information easier to read scripts/mod/file2alias.c:add_uuid() convert UUID into a single string which does not conform to the standard little endian UUID formatting. This patch changes add_uuid() to output same format as %pUL and modifies the mei driver to match the change. Signed-off-by: Prarit Bhargava Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- include/linux/mod_devicetable.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 688997a24aad..5e8a0ad22cbc 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -601,10 +601,6 @@ struct ipack_device_id { #define MEI_CL_MODULE_PREFIX "mei:" #define MEI_CL_NAME_SIZE 32 -#define MEI_CL_UUID_FMT "%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x%02x" -#define MEI_CL_UUID_ARGS(_u) \ - _u[0], _u[1], _u[2], _u[3], _u[4], _u[5], _u[6], _u[7], \ - _u[8], _u[9], _u[10], _u[11], _u[12], _u[13], _u[14], _u[15] /** * struct mei_cl_device_id - MEI client device identifier -- cgit v1.2.3 From b26864cad1c9f66f4966726ba7bc81d2b9b8f990 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 10 Sep 2015 10:18:01 +0300 Subject: mei: bus: add client protocol version to the device alias The device alias now looks like mei:S:uuid:N:* In that way we can bind different drivers to clients with different protocol versions if required. Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- include/linux/mod_devicetable.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 5e8a0ad22cbc..6975cbf1435b 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -601,11 +601,13 @@ struct ipack_device_id { #define MEI_CL_MODULE_PREFIX "mei:" #define MEI_CL_NAME_SIZE 32 +#define MEI_CL_VERSION_ANY 0xff /** * struct mei_cl_device_id - MEI client device identifier * @name: helper name * @uuid: client uuid + * @version: client protocol version * @driver_info: information used by the driver. * * identifies mei client device by uuid and name @@ -613,6 +615,7 @@ struct ipack_device_id { struct mei_cl_device_id { char name[MEI_CL_NAME_SIZE]; uuid_le uuid; + __u8 version; kernel_ulong_t driver_info; }; -- cgit v1.2.3 From baeacd0376975bee0fdf6378e1c24587ab0b6ad4 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 10 Sep 2015 10:18:02 +0300 Subject: mei: bus: export uuid and protocol version to mei_cl bus drivers Export the uuid and the protocol version of the underlying me client for me client bus drivers usage. Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- include/linux/mei_cl_bus.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 0962b2ca628a..9834f832b098 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -85,6 +85,9 @@ int mei_cl_register_event_cb(struct mei_cl_device *device, #define MEI_CL_EVENT_TX 1 #define MEI_CL_EVENT_NOTIF 2 +const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev); +u8 mei_cldev_ver(const struct mei_cl_device *cldev); + void *mei_cl_get_drvdata(const struct mei_cl_device *device); void mei_cl_set_drvdata(struct mei_cl_device *device, void *data); -- cgit v1.2.3 From 01a14edeaf0456c28e2b9af3afdc0807ec6c20bd Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 10 Sep 2015 10:18:03 +0300 Subject: mei: bus: export mei_cldev_enabled function Let me client device driver query of the device is connected and hence enabled. Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- include/linux/mei_cl_bus.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 9834f832b098..30d1c8b94a27 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -93,5 +93,6 @@ void mei_cl_set_drvdata(struct mei_cl_device *device, void *data); int mei_cl_enable_device(struct mei_cl_device *device); int mei_cl_disable_device(struct mei_cl_device *device); +bool mei_cldev_enabled(struct mei_cl_device *cldev); #endif /* _LINUX_MEI_CL_BUS_H */ -- cgit v1.2.3 From 893913822e829f7a37824f6041ff964076374191 Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 10 Sep 2015 10:18:04 +0300 Subject: mei: bus: complete variable rename of type struct mei_cl_device In the commit 5c079ae11921 ("mei: bus: fix drivers and devices names confusion") we set the variables of type struct mei_cl_device to 'cldev' but few places were left out, namely mei_cl_bus.h header and the mei nfc drivers. Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- include/linux/mei_cl_bus.h | 29 ++++++++++++++--------------- 1 file changed, 14 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index 30d1c8b94a27..c364df750405 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -8,7 +8,7 @@ struct mei_cl_device; struct mei_device; -typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *device, +typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *cldev, u32 events, void *context); /** @@ -62,22 +62,21 @@ struct mei_cl_driver { const struct mei_cl_device_id *id_table; - int (*probe)(struct mei_cl_device *dev, + int (*probe)(struct mei_cl_device *cldev, const struct mei_cl_device_id *id); - int (*remove)(struct mei_cl_device *dev); + int (*remove)(struct mei_cl_device *cldev); }; -int __mei_cl_driver_register(struct mei_cl_driver *driver, - struct module *owner); -#define mei_cl_driver_register(driver) \ - __mei_cl_driver_register(driver, THIS_MODULE) +int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner); +#define mei_cl_driver_register(cldrv) \ + __mei_cl_driver_register(cldrv, THIS_MODULE) -void mei_cl_driver_unregister(struct mei_cl_driver *driver); +void mei_cl_driver_unregister(struct mei_cl_driver *cldrv); -ssize_t mei_cl_send(struct mei_cl_device *device, u8 *buf, size_t length); -ssize_t mei_cl_recv(struct mei_cl_device *device, u8 *buf, size_t length); +ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length); +ssize_t mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); -int mei_cl_register_event_cb(struct mei_cl_device *device, +int mei_cl_register_event_cb(struct mei_cl_device *cldev, unsigned long event_mask, mei_cl_event_cb_t read_cb, void *context); @@ -88,11 +87,11 @@ int mei_cl_register_event_cb(struct mei_cl_device *device, const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev); u8 mei_cldev_ver(const struct mei_cl_device *cldev); -void *mei_cl_get_drvdata(const struct mei_cl_device *device); -void mei_cl_set_drvdata(struct mei_cl_device *device, void *data); +void *mei_cl_get_drvdata(const struct mei_cl_device *cldev); +void mei_cl_set_drvdata(struct mei_cl_device *cldev, void *data); -int mei_cl_enable_device(struct mei_cl_device *device); -int mei_cl_disable_device(struct mei_cl_device *device); +int mei_cl_enable_device(struct mei_cl_device *cldev); +int mei_cl_disable_device(struct mei_cl_device *cldev); bool mei_cldev_enabled(struct mei_cl_device *cldev); #endif /* _LINUX_MEI_CL_BUS_H */ -- cgit v1.2.3 From d49dc5e76fc917e5dfef76cb56fe3b3868deed5d Mon Sep 17 00:00:00 2001 From: Tomas Winkler Date: Thu, 10 Sep 2015 10:18:05 +0300 Subject: mei: bus: use mei_cldev_ prefix for the API functions Use mei_cldev_ prefix for all mei client bus api functions in order to resolve prefix conflict with functions that handle client function and are defined in client.c Signed-off-by: Tomas Winkler Signed-off-by: Greg Kroah-Hartman --- include/linux/mei_cl_bus.h | 33 +++++++++++++++++---------------- 1 file changed, 17 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mei_cl_bus.h b/include/linux/mei_cl_bus.h index c364df750405..e746919530f5 100644 --- a/include/linux/mei_cl_bus.h +++ b/include/linux/mei_cl_bus.h @@ -8,8 +8,8 @@ struct mei_cl_device; struct mei_device; -typedef void (*mei_cl_event_cb_t)(struct mei_cl_device *cldev, - u32 events, void *context); +typedef void (*mei_cldev_event_cb_t)(struct mei_cl_device *cldev, + u32 events, void *context); /** * struct mei_cl_device - MEI device handle @@ -45,7 +45,7 @@ struct mei_cl_device { char name[MEI_CL_NAME_SIZE]; struct work_struct event_work; - mei_cl_event_cb_t event_cb; + mei_cldev_event_cb_t event_cb; void *event_context; unsigned long events_mask; unsigned long events; @@ -67,18 +67,19 @@ struct mei_cl_driver { int (*remove)(struct mei_cl_device *cldev); }; -int __mei_cl_driver_register(struct mei_cl_driver *cldrv, struct module *owner); -#define mei_cl_driver_register(cldrv) \ - __mei_cl_driver_register(cldrv, THIS_MODULE) +int __mei_cldev_driver_register(struct mei_cl_driver *cldrv, + struct module *owner); +#define mei_cldev_driver_register(cldrv) \ + __mei_cldev_driver_register(cldrv, THIS_MODULE) -void mei_cl_driver_unregister(struct mei_cl_driver *cldrv); +void mei_cldev_driver_unregister(struct mei_cl_driver *cldrv); -ssize_t mei_cl_send(struct mei_cl_device *cldev, u8 *buf, size_t length); -ssize_t mei_cl_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); +ssize_t mei_cldev_send(struct mei_cl_device *cldev, u8 *buf, size_t length); +ssize_t mei_cldev_recv(struct mei_cl_device *cldev, u8 *buf, size_t length); -int mei_cl_register_event_cb(struct mei_cl_device *cldev, - unsigned long event_mask, - mei_cl_event_cb_t read_cb, void *context); +int mei_cldev_register_event_cb(struct mei_cl_device *cldev, + unsigned long event_mask, + mei_cldev_event_cb_t read_cb, void *context); #define MEI_CL_EVENT_RX 0 #define MEI_CL_EVENT_TX 1 @@ -87,11 +88,11 @@ int mei_cl_register_event_cb(struct mei_cl_device *cldev, const uuid_le *mei_cldev_uuid(const struct mei_cl_device *cldev); u8 mei_cldev_ver(const struct mei_cl_device *cldev); -void *mei_cl_get_drvdata(const struct mei_cl_device *cldev); -void mei_cl_set_drvdata(struct mei_cl_device *cldev, void *data); +void *mei_cldev_get_drvdata(const struct mei_cl_device *cldev); +void mei_cldev_set_drvdata(struct mei_cl_device *cldev, void *data); -int mei_cl_enable_device(struct mei_cl_device *cldev); -int mei_cl_disable_device(struct mei_cl_device *cldev); +int mei_cldev_enable(struct mei_cl_device *cldev); +int mei_cldev_disable(struct mei_cl_device *cldev); bool mei_cldev_enabled(struct mei_cl_device *cldev); #endif /* _LINUX_MEI_CL_BUS_H */ -- cgit v1.2.3 From ff6f46483f0e14b829cd5a71bafcbb1c136a84b6 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 10 Sep 2015 15:06:20 -0700 Subject: spmi: Auto-populate driver.owner in spmi_driver_register() Populate the owner field of the spmi driver when spmi_driver_register() is called in a similar fashion to how other *_driver_register() functions do it. This saves driver writers from having to do this themselves. Cc: Andy Gross Cc: Gilad Avidov Signed-off-by: Stephen Boyd Signed-off-by: Greg Kroah-Hartman --- include/linux/spmi.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spmi.h b/include/linux/spmi.h index f84212cd3b7d..1396a255d2a2 100644 --- a/include/linux/spmi.h +++ b/include/linux/spmi.h @@ -153,7 +153,9 @@ static inline struct spmi_driver *to_spmi_driver(struct device_driver *d) return container_of(d, struct spmi_driver, driver); } -int spmi_driver_register(struct spmi_driver *sdrv); +#define spmi_driver_register(sdrv) \ + __spmi_driver_register(sdrv, THIS_MODULE) +int __spmi_driver_register(struct spmi_driver *sdrv, struct module *owner); /** * spmi_driver_unregister() - unregister an SPMI client driver -- cgit v1.2.3 From 66e8c57da6bf6b847a48a5a6fda59512f733ed78 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 25 Aug 2015 20:45:18 +0200 Subject: rcu: Change _wait_rcu_gp() to work around GCC bug 67055 Code like this in inline functions confuses some recent versions of gcc: const int n = const-expr; whatever_t array[n]; For more details, see: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67055#c13 This compiler bug results in the following failure after 114b7fd4b (rcu: Create rcu_sync infrastructure): In file included from include/linux/rcupdate.h:429:0, from include/linux/rcu_sync.h:5, from kernel/rcu/sync.c:1: include/linux/rcutiny.h: In function 'rcu_barrier_sched': include/linux/rcutiny.h:55:20: internal compiler error: Segmentation fault static inline void rcu_barrier_sched(void) This commit therefore eliminates the constant local variable in favor of direct use of the expression. Reported-and-tested-by: Mark Salter Reported-by: Guenter Roeck Signed-off-by: Oleg Nesterov Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index ff476515f716..581abf848566 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -230,12 +230,11 @@ void __wait_rcu_gp(bool checktiny, int n, call_rcu_func_t *crcu_array, struct rcu_synchronize *rs_array); #define _wait_rcu_gp(checktiny, ...) \ -do { \ - call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ - const int __n = ARRAY_SIZE(__crcu_array); \ - struct rcu_synchronize __rs_array[__n]; \ - \ - __wait_rcu_gp(checktiny, __n, __crcu_array, __rs_array); \ +do { \ + call_rcu_func_t __crcu_array[] = { __VA_ARGS__ }; \ + struct rcu_synchronize __rs_array[ARRAY_SIZE(__crcu_array)]; \ + __wait_rcu_gp(checktiny, ARRAY_SIZE(__crcu_array), \ + __crcu_array, __rs_array); \ } while (0) #define wait_rcu_gp(...) _wait_rcu_gp(false, __VA_ARGS__) -- cgit v1.2.3 From 8203d6d0ee784cfb2ebf89053f7fe399abc867d7 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Sun, 2 Aug 2015 13:53:17 -0700 Subject: rcu: Use single-stage IPI algorithm for RCU expedited grace period The current preemptible-RCU expedited grace-period algorithm invokes synchronize_sched_expedited() to enqueue all tasks currently running in a preemptible-RCU read-side critical section, then waits for all the ->blkd_tasks lists to drain. This works, but results in both an IPI and a double context switch even on CPUs that do not happen to be running in a preemptible RCU read-side critical section. This commit implements a new algorithm that causes less OS jitter. This new algorithm IPIs all online CPUs that are not idle (from an RCU perspective), but refrains from self-IPIs. If a CPU receiving this IPI is not in a preemptible RCU read-side critical section (or is just now exiting one), it pushes quiescence up the rcu_node tree, otherwise, it sets a flag that will be handled by the upcoming outermost rcu_read_unlock(), which will then push quiescence up the tree. The expedited grace period must of course wait on any pre-existing blocked readers, and newly blocked readers must be queued carefully based on the state of both the normal and the expedited grace periods. This new queueing approach also avoids the need to update boost state, courtesy of the fact that blocked tasks are no longer ever migrated to the root rcu_node structure. Signed-off-by: Paul E. McKenney --- include/linux/sched.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index a4ab9daa387c..7fa8c4d372e7 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1330,10 +1330,12 @@ struct sched_dl_entity { union rcu_special { struct { - bool blocked; - bool need_qs; - } b; - short s; + u8 blocked; + u8 need_qs; + u8 exp_need_qs; + u8 pad; /* Otherwise the compiler can store garbage here. */ + } b; /* Bits. */ + u32 s; /* Set of bits. */ }; struct rcu_node; -- cgit v1.2.3 From b05b7c7cc0324524dcda7fa7c2be1255290ee416 Mon Sep 17 00:00:00 2001 From: Muhammad Hamza Farooq Date: Fri, 11 Sep 2015 16:42:38 +0200 Subject: ti-st: use worker instead of calling st_int_write in wake up The wake up method is called with the port lock held. The st_int_write method calls port->ops->write with tries to acquire the lock again, causing CPU to wait infinitely. Right way to do is to write data to port in worker thread. Signed-off-by: Muhammad Hamza Farooq Signed-off-by: Jacob Siverskog Signed-off-by: Greg Kroah-Hartman --- include/linux/ti_wilink_st.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ti_wilink_st.h b/include/linux/ti_wilink_st.h index d4217eff489f..0a0d56834c8e 100644 --- a/include/linux/ti_wilink_st.h +++ b/include/linux/ti_wilink_st.h @@ -158,6 +158,7 @@ struct st_data_s { unsigned long ll_state; void *kim_data; struct tty_struct *tty; + struct work_struct work_write_wakeup; }; /* -- cgit v1.2.3 From 2785968cd122b22b289db565b7438f2200984044 Mon Sep 17 00:00:00 2001 From: Marc Kleine-Budde Date: Sat, 9 May 2015 17:47:52 +0200 Subject: can: headers: make header files self contained This patch adds the missing #include-s to the dev.h and led.h, so that they can be used without including further header files. Signed-off-by: Marc Kleine-Budde --- include/linux/can/dev.h | 3 ++- include/linux/can/led.h | 1 + 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index c3a9c8fc60fa..56dcadd83716 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -14,9 +14,10 @@ #define _CAN_DEV_H #include -#include #include #include +#include +#include /* * CAN mode diff --git a/include/linux/can/led.h b/include/linux/can/led.h index 146de4506d21..2746f7c2f87d 100644 --- a/include/linux/can/led.h +++ b/include/linux/can/led.h @@ -11,6 +11,7 @@ #include #include +#include enum can_led_event { CAN_LED_EVENT_OPEN, -- cgit v1.2.3 From 472912a2b5e2027efd58aa47f78acb2373675187 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 18 Sep 2015 18:01:59 -0400 Subject: memcg: generate file modified notifications on "memory.events" cgroup core only recently grew generic notification support. Wire up "memory.events" so that it triggers a file modified event whenever its content changes. v2: Refreshed on top of mem_cgroup relocation. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Li Zefan --- include/linux/memcontrol.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 9aa7820c2177..c83c699a6605 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -213,6 +213,9 @@ struct mem_cgroup { /* OOM-Killer disable */ int oom_kill_disable; + /* handle for "memory.events" */ + struct cgroup_file events_file; + /* protect arrays of thresholds */ struct mutex thresholds_lock; @@ -286,6 +289,7 @@ static inline void mem_cgroup_events(struct mem_cgroup *memcg, unsigned int nr) { this_cpu_add(memcg->stat->events[idx], nr); + cgroup_file_notify(&memcg->events_file); } bool mem_cgroup_low(struct mem_cgroup *root, struct mem_cgroup *memcg); -- cgit v1.2.3 From 730a43fbc135e593cc3de3b1b895e49c05c8e2dc Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Thu, 3 Sep 2015 18:03:38 +0200 Subject: mtd: nand: add nand_check_erased helper functions Add two helper functions to help NAND controller drivers test whether a specific NAND region is erased or not. Signed-off-by: Boris Brezillon Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 3140b3d94838..c4d8e308f453 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -1030,4 +1030,9 @@ struct nand_sdr_timings { /* get timing characteristics from ONFI timing mode. */ const struct nand_sdr_timings *onfi_async_timing_mode_to_sdr_timings(int mode); + +int nand_check_erased_ecc_chunk(void *data, int datalen, + void *ecc, int ecclen, + void *extraoob, int extraooblen, + int threshold); #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From 0f1c28ae74bb1a34d36fca2db5161611d58b3148 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 18 Sep 2015 11:36:14 -0700 Subject: tcp: usec resolution SYN/ACK RTT Currently SYN/ACK RTT is measured in jiffies. For LAN the SYN/ACK RTT is often measured as 0ms or sometimes 1ms, which would affect RTT estimation and min RTT samping used by some congestion control. This patch improves SYN/ACK RTT to be usec resolution if platform supports it. While the timestamping of SYN/ACK is done in request sock, the RTT measurement is carefully arranged to avoid storing another u64 timestamp in tcp_sock. For regular handshake w/o SYNACK retransmission, the RTT is sampled right after the child socket is created and right before the request sock is released (tcp_check_req() in tcp_minisocks.c) For Fast Open the child socket is already created when SYN/ACK was sent, the RTT is sampled in tcp_rcv_state_process() after processing the final ACK an right before the request socket is released. If the SYN/ACK was retransmistted or SYN-cookie was used, we rely on TCP timestamps to measure the RTT. The sample is taken at the same place in tcp_rcv_state_process() after the timestamp values are validated in tcp_validate_incoming(). Note that we do not store TS echo value in request_sock for SYN-cookies, because the value is already stored in tp->rx_opt used by tcp_ack_update_rtt(). One side benefit is that the RTT measurement now happens before initializing congestion control (of the passive side). Therefore the congestion control can use the SYN/ACK RTT. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 937b97893d5f..fcb573be75d9 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -112,11 +112,11 @@ struct tcp_request_sock_ops; struct tcp_request_sock { struct inet_request_sock req; const struct tcp_request_sock_ops *af_specific; + struct skb_mstamp snt_synack; /* first SYNACK sent time */ bool tfo_listener; u32 txhash; u32 rcv_isn; u32 snt_isn; - u32 snt_synack; /* synack sent time */ u32 last_oow_ack_time; /* last SYNACK */ u32 rcv_nxt; /* the ack # by SYNACK. For * FastOpen it's the seq# -- cgit v1.2.3 From 79c452adb159dc9abc507ea13faec8d115a78758 Mon Sep 17 00:00:00 2001 From: Cyrille Pitchen Date: Fri, 18 Sep 2015 17:49:25 +0200 Subject: mtd: spi-nor: remove unused read_xfer/write_xfer hooks struct spi_nor_xfer_cfg and read_xfer/write_xfer hooks were never used by any driver. Do some cleanup by removing them. Signed-off-by: Cyrille Pitchen Reviewed-by: Marek Vasut Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index e9c912d73141..672595a381c5 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -87,33 +87,6 @@ enum read_mode { SPI_NOR_QUAD, }; -/** - * struct spi_nor_xfer_cfg - Structure for defining a Serial Flash transfer - * @wren: command for "Write Enable", or 0x00 for not required - * @cmd: command for operation - * @cmd_pins: number of pins to send @cmd (1, 2, 4) - * @addr: address for operation - * @addr_pins: number of pins to send @addr (1, 2, 4) - * @addr_width: number of address bytes - * (3,4, or 0 for address not required) - * @mode: mode data - * @mode_pins: number of pins to send @mode (1, 2, 4) - * @mode_cycles: number of mode cycles (0 for mode not required) - * @dummy_cycles: number of dummy cycles (0 for dummy not required) - */ -struct spi_nor_xfer_cfg { - u8 wren; - u8 cmd; - u8 cmd_pins; - u32 addr; - u8 addr_pins; - u8 addr_width; - u8 mode; - u8 mode_pins; - u8 mode_cycles; - u8 dummy_cycles; -}; - #define SPI_NOR_MAX_CMD_SIZE 8 enum spi_nor_ops { SPI_NOR_OPS_READ = 0, @@ -144,14 +117,11 @@ struct mtd_info; * @flash_read: the mode of the read * @sst_write_second: used by the SST write operation * @flags: flag options for the current SPI-NOR (SNOR_F_*) - * @cfg: used by the read_xfer/write_xfer * @cmd_buf: used by the write_reg * @prepare: [OPTIONAL] do some preparations for the * read/write/erase/lock/unlock operations * @unprepare: [OPTIONAL] do some post work after the * read/write/erase/lock/unlock operations - * @read_xfer: [OPTIONAL] the read fundamental primitive - * @write_xfer: [OPTIONAL] the writefundamental primitive * @read_reg: [DRIVER-SPECIFIC] read out the register * @write_reg: [DRIVER-SPECIFIC] write data to the register * @read: [DRIVER-SPECIFIC] read data from the SPI NOR @@ -176,15 +146,10 @@ struct spi_nor { enum read_mode flash_read; bool sst_write_second; u32 flags; - struct spi_nor_xfer_cfg cfg; u8 cmd_buf[SPI_NOR_MAX_CMD_SIZE]; int (*prepare)(struct spi_nor *nor, enum spi_nor_ops ops); void (*unprepare)(struct spi_nor *nor, enum spi_nor_ops ops); - int (*read_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg, - u8 *buf, size_t len); - int (*write_xfer)(struct spi_nor *nor, struct spi_nor_xfer_cfg *cfg, - u8 *buf, size_t len); int (*read_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len); int (*write_reg)(struct spi_nor *nor, u8 opcode, u8 *buf, int len); -- cgit v1.2.3 From e76e397a7f99bffbd91fb21f96370cbb165b5bcd Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 16 Sep 2015 10:42:13 +0100 Subject: mfd: arizona: Add registers for ADC microphone detection The newer devices support using a software comparison to determine whether a 3/4 pole jack is present. Add the registers necessary for this. Signed-off-by: Charles Keepax Acked-by: Lee Jones Signed-off-by: Chanwoo Choi --- include/linux/mfd/arizona/pdata.h | 3 +++ include/linux/mfd/arizona/registers.h | 6 +++--- 2 files changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 1dc385850ba2..92f31ffaf866 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -124,6 +124,9 @@ struct arizona_pdata { /** Channel to use for headphone detection */ unsigned int hpdet_channel; + /** Use software comparison to determine mic presence */ + bool micd_software_compare; + /** Extra debounce timeout used during initial mic detection (ms) */ unsigned int micd_detect_debounce; diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index fdd70b3c7418..ab219c9b0b08 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -2359,9 +2359,9 @@ #define ARIZONA_ACCDET_SRC_MASK 0x2000 /* ACCDET_SRC */ #define ARIZONA_ACCDET_SRC_SHIFT 13 /* ACCDET_SRC */ #define ARIZONA_ACCDET_SRC_WIDTH 1 /* ACCDET_SRC */ -#define ARIZONA_ACCDET_MODE_MASK 0x0003 /* ACCDET_MODE - [1:0] */ -#define ARIZONA_ACCDET_MODE_SHIFT 0 /* ACCDET_MODE - [1:0] */ -#define ARIZONA_ACCDET_MODE_WIDTH 2 /* ACCDET_MODE - [1:0] */ +#define ARIZONA_ACCDET_MODE_MASK 0x0007 /* ACCDET_MODE - [2:0] */ +#define ARIZONA_ACCDET_MODE_SHIFT 0 /* ACCDET_MODE - [2:0] */ +#define ARIZONA_ACCDET_MODE_WIDTH 3 /* ACCDET_MODE - [2:0] */ /* * R667 (0x29B) - Headphone Detect 1 -- cgit v1.2.3 From 1ce376897e02e623ba357594604ca655df61053b Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 16 Sep 2015 10:42:14 +0100 Subject: mfd: arizona: Add register bits for general purpose switch The switch is typically used in conjunction with the MICDET clamp in order to suppress pops and clicks associated with jack insertion. Signed-off-by: Charles Keepax Acked-by: Lee Jones Signed-off-by: Chanwoo Choi --- include/linux/mfd/arizona/pdata.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/pdata.h b/include/linux/mfd/arizona/pdata.h index 92f31ffaf866..57b45caaea80 100644 --- a/include/linux/mfd/arizona/pdata.h +++ b/include/linux/mfd/arizona/pdata.h @@ -184,6 +184,9 @@ struct arizona_pdata { /** GPIO for primary IRQ (used for edge triggered emulation) */ int irq_gpio; + + /** General purpose switch control */ + unsigned int gpsw; }; #endif -- cgit v1.2.3 From b8ba9edb911102b452c815879f520bfe2713fab6 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 16 Sep 2015 10:42:15 +0100 Subject: mfd: arizona: Add TST_CAP bits for headphone detection On wm5110/8280 some additional settings are required to get accurate measurements at the top end of the headphone detection range. This patch adds the bits required for this. Signed-off-by: Charles Keepax Acked-by: Lee Jones Signed-off-by: Chanwoo Choi --- include/linux/mfd/arizona/registers.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index ab219c9b0b08..c7c11c900196 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -242,6 +242,7 @@ #define ARIZONA_HP1_SHORT_CIRCUIT_CTRL 0x4A0 #define ARIZONA_HP2_SHORT_CIRCUIT_CTRL 0x4A1 #define ARIZONA_HP3_SHORT_CIRCUIT_CTRL 0x4A2 +#define ARIZONA_HP_TEST_CTRL_1 0x4A4 #define ARIZONA_SPK_CTRL_2 0x4B5 #define ARIZONA_SPK_CTRL_3 0x4B6 #define ARIZONA_DAC_COMP_1 0x4DC @@ -3701,6 +3702,13 @@ #define ARIZONA_HP3_SC_ENA_SHIFT 12 /* HP3_SC_ENA */ #define ARIZONA_HP3_SC_ENA_WIDTH 1 /* HP3_SC_ENA */ +/* + * R1188 (0x4A4) HP Test Ctrl 1 + */ +#define ARIZONA_HP1_TST_CAP_SEL_MASK 0x0003 /* HP1_TST_CAP_SEL - [1:0] */ +#define ARIZONA_HP1_TST_CAP_SEL_SHIFT 0 /* HP1_TST_CAP_SEL - [1:0] */ +#define ARIZONA_HP1_TST_CAP_SEL_WIDTH 2 /* HP1_TST_CAP_SEL - [1:0] */ + /* * R1244 (0x4DC) - DAC comp 1 */ -- cgit v1.2.3 From 87a93e4eceb495f93e3f37b100334d2641765b6c Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Fri, 18 Sep 2015 11:30:43 +0200 Subject: ieee802154: change needed headroom/tailroom This patch cleanups needed_headroom, needed_tailroom and hard_header_len fields for wpan and lowpan interfaces. For wpan interfaces the worst case mac header len should be part of needed_headroom, currently this is set as hard_header_len, but hard_header_len should be set to the minimum header length which xmit call assumes and this is the minimum frame length of 802.15.4. The hard_header_len value will check inside send callbacl of AF_PACKET raw sockets. For lowpan interfaces, if fragmentation isn't needed the skb will call dev_hard_header for 802154 layer and queue it afterwards. This happens without new skb allocation, so we need the same headroom and tailroom lengths like 802154 inside 802154 6lowpan layer. At least we assume as minimum header length an ipv6 header size. Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index db01492814d3..205ce4e1ac32 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -31,6 +31,17 @@ #define IEEE802154_ACK_PSDU_LEN 5 #define IEEE802154_MIN_PSDU_LEN 9 #define IEEE802154_FCS_LEN 2 +#define IEEE802154_MAX_AUTH_TAG_LEN 16 + +/* General MAC frame format: + * 2 bytes: Frame Control + * 1 byte: Sequence Number + * 20 bytes: Addressing fields + * 14 bytes: Auxiliary Security Header + */ +#define IEEE802154_MAX_HEADER_LEN (2 + 1 + 20 + 14) +#define IEEE802154_MIN_HEADER_LEN (IEEE802154_ACK_PSDU_LEN - \ + IEEE802154_FCS_LEN) #define IEEE802154_PAN_ID_BROADCAST 0xffff #define IEEE802154_ADDR_SHORT_BROADCAST 0xffff -- cgit v1.2.3 From 79750ac4257763ff595a8b2cdc7ba580f0b0c8e0 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Mon, 21 Sep 2015 11:24:33 +0200 Subject: ieee802154: add helpers for frame control checks This patch introduce two static inline functions. The first to get the frame control field from an sk_buff. The second is for checking on the acknowledgment request bit on the frame control field. Later we can introduce more functions to check on the frame control fields. These will deprecate the current behaviour which requires a host-byteorder conversion and manually bit handling. Reviewed-by: Stefan Schmidt Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index 205ce4e1ac32..aca228b81464 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -25,6 +25,8 @@ #include #include +#include +#include #include #define IEEE802154_MTU 127 @@ -218,6 +220,7 @@ enum { /* frame control handling */ #define IEEE802154_FCTL_FTYPE 0x0003 +#define IEEE802154_FCTL_ACKREQ 0x0020 #define IEEE802154_FCTL_INTRA_PAN 0x0040 #define IEEE802154_FTYPE_DATA 0x0001 @@ -232,6 +235,15 @@ static inline int ieee802154_is_data(__le16 fc) cpu_to_le16(IEEE802154_FTYPE_DATA); } +/** + * ieee802154_is_ackreq - check if acknowledgment request bit is set + * @fc: frame control bytes in little-endian byteorder + */ +static inline bool ieee802154_is_ackreq(__le16 fc) +{ + return fc & cpu_to_le16(IEEE802154_FCTL_ACKREQ); +} + /** * ieee802154_is_intra_pan - check if intra pan id communication * @fc: frame control bytes in little-endian byteorder -- cgit v1.2.3 From 7ac8bf9bc9ba82aea763ef30671a34c6a2a39922 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Tue, 22 Sep 2015 11:56:04 +0200 Subject: EDAC: Carve out debugfs functionality ... into a separate compilation unit and drop a couple of CONFIG_EDAC_DEBUG ifdefferies. Rename edac_create_debug_nodes() to edac_create_debugfs_nodes(), while at it. No functionality change. Cc: Signed-off-by: Borislav Petkov --- include/linux/edac.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/edac.h b/include/linux/edac.h index da3b72e95db3..b3d87e5822f8 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -769,12 +769,10 @@ struct mem_ctl_info { /* the internal state of this controller instance */ int op_state; -#ifdef CONFIG_EDAC_DEBUG struct dentry *debugfs; u8 fake_inject_layer[EDAC_MAX_LAYERS]; u32 fake_inject_ue; u16 fake_inject_count; -#endif }; /* -- cgit v1.2.3 From 2a1d3ab8986d1b2f598ffc42351d94166fa0f022 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Mon, 21 Sep 2015 11:01:10 +0200 Subject: genirq: Handle force threading of irqs with primary and thread handler Force threading of interrupts does not really deal with interrupts which are requested with a primary and a threaded handler. The current policy is to leave them alone and let the primary handler run in interrupt context, but we set the ONESHOT flag for those interrupts as well. Kohji Okuno debugged a problem with the SDHCI driver where the interrupt thread waits for a hardware interrupt to trigger, which can't work well because the hardware interrupt is masked due to the ONESHOT flag being set. He proposed to set the ONESHOT flag only if the interrupt does not provide a thread handler. Though that does not work either because these interrupts can be shared. So the other interrupt would rightfully get the ONESHOT flag set and therefor the same situation would happen again. To deal with this proper, we need to force thread the primary handler of such interrupts as well. That means that the primary interrupt handler is treated as any other primary interrupt handler which is not marked IRQF_NO_THREAD. The threaded handler becomes a separate thread so the SDHCI flow logic can be handled gracefully. The same issue was reported against 4.1-rt. Reported-and-tested-by: Kohji Okuno Reported-By: Michal Smucr Reported-and-tested-by: Nathan Sullivan Signed-off-by: Thomas Gleixner Cc: Sebastian Andrzej Siewior Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1509211058080.5606@nanos Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index be7e75c945e9..ad16809c8596 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -102,6 +102,7 @@ typedef irqreturn_t (*irq_handler_t)(int, void *); * @flags: flags (see IRQF_* above) * @thread_fn: interrupt handler function for threaded interrupts * @thread: thread pointer for threaded interrupts + * @secondary: pointer to secondary irqaction (force threading) * @thread_flags: flags related to @thread * @thread_mask: bitmask for keeping track of @thread activity * @dir: pointer to the proc/irq/NN/name entry @@ -113,6 +114,7 @@ struct irqaction { struct irqaction *next; irq_handler_t thread_fn; struct task_struct *thread; + struct irqaction *secondary; unsigned int irq; unsigned int flags; unsigned long thread_flags; -- cgit v1.2.3 From 69fb4dcada7704beeba86e3f401a66c726aa8504 Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Sat, 1 Aug 2015 10:39:38 +0200 Subject: power: Add an axp20x-usb-power driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This adds a driver for the usb power_supply bits of the axp20x PMICs. I initially started writing my own driver, before coming aware of Bruno Prémont's excellent earlier RFC with a driver for this. My driver was lacking CURRENT_MAX and VOLTAGE_MIN support Bruno's drvier has, so I've copied the code for those from his driver. Note that the AC-power-supply and battery charger bits will need separate drivers. Each one needs its own devictree child-node so that other devicetree nodes can reference the right power-supply, and thus each one will get its own mfd-cell / platform_device and platform-driver. Cc: Bruno Prémont Acked-by: Lee Jones Signed-off-by: Bruno Prémont Signed-off-by: Hans de Goede Signed-off-by: Sebastian Reichel --- include/linux/mfd/axp20x.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/axp20x.h b/include/linux/mfd/axp20x.h index cc8ad1e1a307..b24c771cebd5 100644 --- a/include/linux/mfd/axp20x.h +++ b/include/linux/mfd/axp20x.h @@ -11,6 +11,8 @@ #ifndef __LINUX_MFD_AXP20X_H #define __LINUX_MFD_AXP20X_H +#include + enum { AXP152_ID = 0, AXP202_ID, @@ -438,4 +440,26 @@ struct axp288_extcon_pdata { struct gpio_desc *gpio_mux_cntl; }; +/* generic helper function for reading 9-16 bit wide regs */ +static inline int axp20x_read_variable_width(struct regmap *regmap, + unsigned int reg, unsigned int width) +{ + unsigned int reg_val, result; + int err; + + err = regmap_read(regmap, reg, ®_val); + if (err) + return err; + + result = reg_val << (width - 8); + + err = regmap_read(regmap, reg + 1, ®_val); + if (err) + return err; + + result |= reg_val; + + return result; +} + #endif /* __LINUX_MFD_AXP20X_H */ -- cgit v1.2.3 From e3abc8ff0fc18b3925fd5d5c5fbd1613856f4e7c Mon Sep 17 00:00:00 2001 From: Emmanuel Grumbach Date: Sun, 16 Aug 2015 11:13:22 +0300 Subject: mac80211: allow to transmit A-MSDU within A-MPDU Advertise the capability to send A-MSDU within A-MPDU in the AddBA request sent by mac80211. Let the driver know about the peer's capabilities. Signed-off-by: Emmanuel Grumbach Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index cfa906f28b7a..19eb9ecd6cf3 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1379,6 +1379,7 @@ struct ieee80211_ht_operation { /* block-ack parameters */ +#define IEEE80211_ADDBA_PARAM_AMSDU_MASK 0x0001 #define IEEE80211_ADDBA_PARAM_POLICY_MASK 0x0002 #define IEEE80211_ADDBA_PARAM_TID_MASK 0x003C #define IEEE80211_ADDBA_PARAM_BUF_SIZE_MASK 0xFFC0 -- cgit v1.2.3 From 0edd5faeb07bfd3ec5402f9467e4c169dcd131e8 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Fri, 28 Aug 2015 14:31:48 +0200 Subject: wireless: mark element IDs 8 and 9 reserved These were never used in the tree, and are marked as reserved in the IEEE 802.11 documentation (ANA). Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 19eb9ecd6cf3..f79a02a69d26 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1746,8 +1746,7 @@ enum ieee80211_eid { WLAN_EID_TIM = 5, WLAN_EID_IBSS_PARAMS = 6, WLAN_EID_COUNTRY = 7, - WLAN_EID_HP_PARAMS = 8, - WLAN_EID_HP_TABLE = 9, + /* 8, 9 reserved */ WLAN_EID_REQUEST = 10, WLAN_EID_QBSS_LOAD = 11, WLAN_EID_EDCA_PARAM_SET = 12, -- cgit v1.2.3 From 75ea8ca844fd35ce365e35e1617b239892d31f72 Mon Sep 17 00:00:00 2001 From: Marcel Ziswiler Date: Sun, 20 Sep 2015 00:13:37 +0200 Subject: power: charger-manager: comment spelling fixes By accident I stumbled over a few misspelled words in the charger-manager header file which this patch fixes. Namely: - Extcon rather than Exton - constraint rather than constratint - existence rather than existance - difference rather than diffential While at it also add a missing space before a closing comment star forward-slash. Signed-off-by: Marcel Ziswiler Signed-off-by: Sebastian Reichel --- include/linux/power/charger-manager.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/power/charger-manager.h b/include/linux/power/charger-manager.h index eadf28cb2fc9..c4fa907c8f14 100644 --- a/include/linux/power/charger-manager.h +++ b/include/linux/power/charger-manager.h @@ -65,7 +65,7 @@ struct charger_cable { const char *extcon_name; const char *name; - /* The charger-manager use Exton framework*/ + /* The charger-manager use Extcon framework */ struct extcon_specific_cable_nb extcon_dev; struct work_struct wq; struct notifier_block nb; @@ -94,7 +94,7 @@ struct charger_cable { * the charger will be maintained with disabled state. * @cables: * the array of charger cables to enable/disable charger - * and set current limit according to constratint data of + * and set current limit according to constraint data of * struct charger_cable if only charger cable included * in the array of charger cables is attached/detached. * @num_cables: the number of charger cables. @@ -148,7 +148,7 @@ struct charger_regulator { * @polling_interval_ms: interval in millisecond at which * charger manager will monitor battery health * @battery_present: - * Specify where information for existance of battery can be obtained + * Specify where information for existence of battery can be obtained * @psy_charger_stat: the names of power-supply for chargers * @num_charger_regulator: the number of entries in charger_regulators * @charger_regulators: array of charger regulators @@ -156,7 +156,7 @@ struct charger_regulator { * @thermal_zone : the name of thermal zone for battery * @temp_min : Minimum battery temperature for charging. * @temp_max : Maximum battery temperature for charging. - * @temp_diff : Temperature diffential to restart charging. + * @temp_diff : Temperature difference to restart charging. * @measure_battery_temp: * true: measure battery temperature * false: measure ambient temperature -- cgit v1.2.3 From 4530eddb59494b89650d6bcd980fc7f7717ad80c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 11 Sep 2015 15:00:19 -0400 Subject: cgroup, memcg, cpuset: implement cgroup_taskset_for_each_leader() It wasn't explicitly documented but, when a process is being migrated, cpuset and memcg depend on cgroup_taskset_first() returning the threadgroup leader; however, this approach is somewhat ghetto and would no longer work for the planned multi-process migration. This patch introduces explicit cgroup_taskset_for_each_leader() which iterates over only the threadgroup leaders and replaces cgroup_taskset_first() usages for accessing the leader with it. This prepares both memcg and cpuset for multi-process migration. This patch also updates the documentation for cgroup_taskset_for_each() to clarify the iteration rules and removes comments mentioning task ordering in tasksets. v2: A previous patch which added threadgroup leader test was dropped. Patch updated accordingly. Signed-off-by: Tejun Heo Acked-by: Zefan Li Acked-by: Michal Hocko Cc: Johannes Weiner --- include/linux/cgroup.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index fb717f2cba5b..e9c3eac074e2 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -232,11 +232,33 @@ void css_task_iter_end(struct css_task_iter *it); * cgroup_taskset_for_each - iterate cgroup_taskset * @task: the loop cursor * @tset: taskset to iterate + * + * @tset may contain multiple tasks and they may belong to multiple + * processes. When there are multiple tasks in @tset, if a task of a + * process is in @tset, all tasks of the process are in @tset. Also, all + * are guaranteed to share the same source and destination csses. + * + * Iteration is not in any specific order. */ #define cgroup_taskset_for_each(task, tset) \ for ((task) = cgroup_taskset_first((tset)); (task); \ (task) = cgroup_taskset_next((tset))) +/** + * cgroup_taskset_for_each_leader - iterate group leaders in a cgroup_taskset + * @leader: the loop cursor + * @tset: takset to iterate + * + * Iterate threadgroup leaders of @tset. For single-task migrations, @tset + * may not contain any. + */ +#define cgroup_taskset_for_each_leader(leader, tset) \ + for ((leader) = cgroup_taskset_first((tset)); (leader); \ + (leader) = cgroup_taskset_next((tset))) \ + if ((leader) != (leader)->group_leader) \ + ; \ + else + /* * Inline functions. */ -- cgit v1.2.3 From 4ad2ddce1a096dea71d42969515d3a64f0d9246f Mon Sep 17 00:00:00 2001 From: Stefan Koch Date: Tue, 25 Aug 2015 21:10:05 +0200 Subject: usb: interface authorization: Declare authorized attribute The attribute authorized shows the authorization state for an interface. Signed-off-by: Stefan Koch Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 447fe29b55b4..3deccab2ce0a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -178,6 +178,7 @@ struct usb_interface { unsigned needs_altsetting0:1; /* switch to altsetting 0 is pending */ unsigned needs_binding:1; /* needs delayed unbind/rebind */ unsigned resetting_device:1; /* true: bandwidth alloc after reset */ + unsigned authorized:1; /* used for interface authorization */ struct device dev; /* interface specific device info */ struct device *usb_dev; -- cgit v1.2.3 From 6b2bd3c8c69c4817a9a2feb4597021d486c105f4 Mon Sep 17 00:00:00 2001 From: Stefan Koch Date: Tue, 25 Aug 2015 21:10:06 +0200 Subject: usb: interface authorization: Introduces the default interface authorization Interfaces are allowed per default. This can disabled or enabled (again) by writing 0 or 1 to /sys/bus/usb/devices/usbX/interface_authorized_default Signed-off-by: Stefan Koch Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index d2784c10bfe2..36ce3d215cad 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -120,6 +120,7 @@ struct usb_hcd { #define HCD_FLAG_WAKEUP_PENDING 4 /* root hub is resuming? */ #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ +#define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -131,6 +132,14 @@ struct usb_hcd { #define HCD_RH_RUNNING(hcd) ((hcd)->flags & (1U << HCD_FLAG_RH_RUNNING)) #define HCD_DEAD(hcd) ((hcd)->flags & (1U << HCD_FLAG_DEAD)) + /* + * Specifies if interfaces are authorized by default + * or they require explicit user space authorization; this bit is + * settable through /sys/class/usb_host/X/interface_authorized_default + */ +#define HCD_INTF_AUTHORIZED(hcd) \ + ((hcd)->flags & (1U << HCD_FLAG_INTF_AUTHORIZED)) + /* Flags that get set only during HCD registration or removal. */ unsigned rh_registered:1;/* is root hub registered? */ unsigned rh_pollable:1; /* may we poll the root hub? */ -- cgit v1.2.3 From ff8e2c560eca32043ed097099debac488a4bd99f Mon Sep 17 00:00:00 2001 From: Stefan Koch Date: Tue, 25 Aug 2015 21:10:11 +0200 Subject: usb: interface authorization: Use a flag for the default device authorization With this patch a flag instead of a variable is used for the default device authorization. Signed-off-by: Stefan Koch Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 36ce3d215cad..4d147277b30d 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -58,12 +58,6 @@ * * Since "struct usb_bus" is so thin, you can't share much code in it. * This framework is a layer over that, and should be more sharable. - * - * @authorized_default: Specifies if new devices are authorized to - * connect by default or they require explicit - * user space authorization; this bit is settable - * through /sys/class/usb_host/X/authorized_default. - * For the rest is RO, so we don't lock to r/w it. */ /*-------------------------------------------------------------------------*/ @@ -121,6 +115,7 @@ struct usb_hcd { #define HCD_FLAG_RH_RUNNING 5 /* root hub is running? */ #define HCD_FLAG_DEAD 6 /* controller has died? */ #define HCD_FLAG_INTF_AUTHORIZED 7 /* authorize interfaces? */ +#define HCD_FLAG_DEV_AUTHORIZED 8 /* authorize devices? */ /* The flags can be tested using these macros; they are likely to * be slightly faster than test_bit(). @@ -140,6 +135,14 @@ struct usb_hcd { #define HCD_INTF_AUTHORIZED(hcd) \ ((hcd)->flags & (1U << HCD_FLAG_INTF_AUTHORIZED)) + /* + * Specifies if devices are authorized by default + * or they require explicit user space authorization; this bit is + * settable through /sys/class/usb_host/X/authorized_default + */ +#define HCD_DEV_AUTHORIZED(hcd) \ + ((hcd)->flags & (1U << HCD_FLAG_DEV_AUTHORIZED)) + /* Flags that get set only during HCD registration or removal. */ unsigned rh_registered:1;/* is root hub registered? */ unsigned rh_pollable:1; /* may we poll the root hub? */ @@ -150,7 +153,6 @@ struct usb_hcd { * support the new root-hub polling mechanism. */ unsigned uses_new_polling:1; unsigned wireless:1; /* Wireless USB HCD */ - unsigned authorized_default:1; unsigned has_tt:1; /* Integrated TT in root hub */ unsigned amd_resume_bug:1; /* AMD remote wakeup quirk */ unsigned can_do_streams:1; /* HC supports streams */ -- cgit v1.2.3 From 7dc87ff8815ef43717c936faea79013855e3dbef Mon Sep 17 00:00:00 2001 From: Fugang Duan Date: Mon, 7 Sep 2015 10:54:59 +0800 Subject: ARM: imx7d: add imx7d iomux-gpr field define Add imx7d iomux-gpr field define. Signed-off-by: Fugang Duan Signed-off-by: Shawn Guo --- include/linux/mfd/syscon/imx7-iomuxc-gpr.h | 47 ++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) create mode 100644 include/linux/mfd/syscon/imx7-iomuxc-gpr.h (limited to 'include/linux') diff --git a/include/linux/mfd/syscon/imx7-iomuxc-gpr.h b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h new file mode 100644 index 000000000000..4585d6105d68 --- /dev/null +++ b/include/linux/mfd/syscon/imx7-iomuxc-gpr.h @@ -0,0 +1,47 @@ +/* + * Copyright (C) 2015 Freescale Semiconductor, Inc. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#ifndef __LINUX_IMX7_IOMUXC_GPR_H +#define __LINUX_IMX7_IOMUXC_GPR_H + +#define IOMUXC_GPR0 0x00 +#define IOMUXC_GPR1 0x04 +#define IOMUXC_GPR2 0x08 +#define IOMUXC_GPR3 0x0c +#define IOMUXC_GPR4 0x10 +#define IOMUXC_GPR5 0x14 +#define IOMUXC_GPR6 0x18 +#define IOMUXC_GPR7 0x1c +#define IOMUXC_GPR8 0x20 +#define IOMUXC_GPR9 0x24 +#define IOMUXC_GPR10 0x28 +#define IOMUXC_GPR11 0x2c +#define IOMUXC_GPR12 0x30 +#define IOMUXC_GPR13 0x34 +#define IOMUXC_GPR14 0x38 +#define IOMUXC_GPR15 0x3c +#define IOMUXC_GPR16 0x40 +#define IOMUXC_GPR17 0x44 +#define IOMUXC_GPR18 0x48 +#define IOMUXC_GPR19 0x4c +#define IOMUXC_GPR20 0x50 +#define IOMUXC_GPR21 0x54 +#define IOMUXC_GPR22 0x58 + +/* For imx7d iomux gpr register field define */ +#define IMX7D_GPR1_IRQ_MASK (0x1 << 12) +#define IMX7D_GPR1_ENET1_TX_CLK_SEL_MASK (0x1 << 13) +#define IMX7D_GPR1_ENET2_TX_CLK_SEL_MASK (0x1 << 14) +#define IMX7D_GPR1_ENET_TX_CLK_SEL_MASK (0x3 << 13) +#define IMX7D_GPR1_ENET1_CLK_DIR_MASK (0x1 << 17) +#define IMX7D_GPR1_ENET2_CLK_DIR_MASK (0x1 << 18) +#define IMX7D_GPR1_ENET_CLK_DIR_MASK (0x3 << 17) + +#define IMX7D_GPR5_CSI_MUX_CONTROL_MIPI (0x1 << 4) + +#endif /* __LINUX_IMX7_IOMUXC_GPR_H */ -- cgit v1.2.3 From cb334648a10c7fa6f0f163c22602f4dc1c6d56b4 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:05:47 -0700 Subject: arcnet: Use normal kernel spacing style Standardized spacing is easier to read. git diff -w shows no differences. objdiff shows no differences. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 90 +++++++++++++++++++++++------------------------ 1 file changed, 45 insertions(+), 45 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index df0356220730..ccfd1d2f984b 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -51,7 +51,7 @@ /* * Debugging bitflags: each option can be enabled individually. - * + * * Note: only debug flags included in the ARCNET_DEBUG_MAX define will * actually be available. GCC will (at least, GCC 2.7.0 will) notice * lines using a BUGLVL not in ARCNET_DEBUG_MAX and automatically optimize @@ -77,33 +77,33 @@ #endif #ifndef ARCNET_DEBUG -#define ARCNET_DEBUG (D_NORMAL|D_EXTRA) +#define ARCNET_DEBUG (D_NORMAL | D_EXTRA) #endif extern int arcnet_debug; /* macros to simplify debug checking */ -#define BUGLVL(x) if ((ARCNET_DEBUG_MAX)&arcnet_debug&(x)) -#define BUGMSG2(x,msg,args...) do { BUGLVL(x) printk(msg, ## args); } while (0) -#define BUGMSG(x,msg,args...) \ - BUGMSG2(x, "%s%6s: " msg, \ - x==D_NORMAL ? KERN_WARNING \ - : x < D_DURING ? KERN_INFO : KERN_DEBUG, \ - dev->name , ## args) +#define BUGLVL(x) if ((ARCNET_DEBUG_MAX) & arcnet_debug & (x)) +#define BUGMSG2(x, msg, args...) do { BUGLVL(x) printk(msg, ## args); } while (0) +#define BUGMSG(x, msg, args...) \ + BUGMSG2(x, "%s%6s: " msg, \ + x == D_NORMAL ? KERN_WARNING \ + : x < D_DURING ? KERN_INFO : KERN_DEBUG, \ + dev->name, ## args) /* see how long a function call takes to run, expressed in CPU cycles */ -#define TIME(name, bytes, call) BUGLVL(D_TIMING) { \ - unsigned long _x, _y; \ - _x = get_cycles(); \ - call; \ - _y = get_cycles(); \ - BUGMSG(D_TIMING, \ - "%s: %d bytes in %lu cycles == " \ - "%lu Kbytes/100Mcycle\n",\ - name, bytes, _y - _x, \ - 100000000 / 1024 * bytes / (_y - _x + 1));\ - } \ - else { \ - call;\ +#define TIME(name, bytes, call) BUGLVL(D_TIMING) { \ + unsigned long _x, _y; \ + _x = get_cycles(); \ + call; \ + _y = get_cycles(); \ + BUGMSG(D_TIMING, \ + "%s: %d bytes in %lu cycles == " \ + "%lu Kbytes/100Mcycle\n", \ + name, bytes, _y - _x, \ + 100000000 / 1024 * bytes / (_y - _x + 1)); \ + } \ + else { \ + call; \ } @@ -189,16 +189,16 @@ struct ArcProto { int mtu; /* largest possible packet */ int is_ip; /* This is a ip plugin - not a raw thing */ - void (*rx) (struct net_device * dev, int bufnum, - struct archdr * pkthdr, int length); - int (*build_header) (struct sk_buff * skb, struct net_device *dev, - unsigned short ethproto, uint8_t daddr); + void (*rx)(struct net_device *dev, int bufnum, + struct archdr *pkthdr, int length); + int (*build_header)(struct sk_buff *skb, struct net_device *dev, + unsigned short ethproto, uint8_t daddr); /* these functions return '1' if the skb can now be freed */ - int (*prepare_tx) (struct net_device * dev, struct archdr * pkt, int length, - int bufnum); - int (*continue_tx) (struct net_device * dev, int bufnum); - int (*ack_tx) (struct net_device * dev, int acked); + int (*prepare_tx)(struct net_device *dev, struct archdr *pkt, int length, + int bufnum); + int (*continue_tx)(struct net_device *dev, int bufnum); + int (*ack_tx)(struct net_device *dev, int acked); }; extern struct ArcProto *arc_proto_map[256], *arc_proto_default, @@ -263,13 +263,13 @@ struct arcnet_local { * situations in which we (for example) want to pre-load a transmit * buffer, or start receiving while we copy a received packet to * memory. - * + * * The rules: only the interrupt handler is allowed to _add_ buffers to * the queue; thus, this doesn't require a lock. Both the interrupt * handler and the transmit function will want to _remove_ buffers, so * we need to handle the situation where they try to do it at the same * time. - * + * * If next_buf == first_free_buf, the queue is empty. Since there are * only four possible buffers, the queue should never be full. */ @@ -298,17 +298,17 @@ struct arcnet_local { /* hardware-specific functions */ struct { struct module *owner; - void (*command) (struct net_device * dev, int cmd); - int (*status) (struct net_device * dev); - void (*intmask) (struct net_device * dev, int mask); - int (*reset) (struct net_device * dev, int really_reset); - void (*open) (struct net_device * dev); - void (*close) (struct net_device * dev); - - void (*copy_to_card) (struct net_device * dev, int bufnum, int offset, - void *buf, int count); - void (*copy_from_card) (struct net_device * dev, int bufnum, int offset, - void *buf, int count); + void (*command)(struct net_device *dev, int cmd); + int (*status)(struct net_device *dev); + void (*intmask)(struct net_device *dev, int mask); + int (*reset)(struct net_device *dev, int really_reset); + void (*open)(struct net_device *dev); + void (*close)(struct net_device *dev); + + void (*copy_to_card)(struct net_device *dev, int bufnum, int offset, + void *buf, int count); + void (*copy_from_card)(struct net_device *dev, int bufnum, int offset, + void *buf, int count); } hw; void __iomem *mem_start; /* pointer to ioremap'ed MMIO */ @@ -325,7 +325,7 @@ struct arcnet_local { #if ARCNET_DEBUG_MAX & D_SKB void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc); #else -#define arcnet_dump_skb(dev,skb,desc) ; +#define arcnet_dump_skb(dev, skb, desc) ; #endif void arcnet_unregister_proto(struct ArcProto *proto); @@ -335,7 +335,7 @@ struct net_device *alloc_arcdev(const char *name); int arcnet_open(struct net_device *dev); int arcnet_close(struct net_device *dev); netdev_tx_t arcnet_send_packet(struct sk_buff *skb, - struct net_device *dev); + struct net_device *dev); void arcnet_timeout(struct net_device *dev); #endif /* __KERNEL__ */ -- cgit v1.2.3 From 01a1d5ac4e1a5890fd6c0d0ae900e1b6e4f851d6 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:05:48 -0700 Subject: arcnet: Add and remove blank lines Use a more current kernel line style. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index ccfd1d2f984b..78687885eb81 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -34,7 +34,6 @@ */ #define RECON_THRESHOLD 30 - /* * Define this to the minimum "timeout" value. If a transmit takes longer * than TX_TIMEOUT jiffies, Linux will abort the TX and retry. On a large @@ -44,11 +43,9 @@ */ #define TX_TIMEOUT (HZ * 200 / 1000) - /* Display warnings about the driver being an ALPHA version. */ #undef ALPHA_WARNING - /* * Debugging bitflags: each option can be enabled individually. * @@ -106,7 +103,6 @@ extern int arcnet_debug; call; \ } - /* * Time needed to reset the card - in ms (milliseconds). This works on my * SMC PC100. I can't find a reference that tells me just how long I @@ -182,7 +178,6 @@ extern int arcnet_debug; #define ARC_CAN_10MBIT 2 /* card uses COM20022, supporting 10MBit, but default is 2.5MBit. */ - /* information needed to define an encapsulation driver */ struct ArcProto { char suffix; /* a for RFC1201, e for ether-encap, etc. */ @@ -204,7 +199,6 @@ struct ArcProto { extern struct ArcProto *arc_proto_map[256], *arc_proto_default, *arc_bcast_proto, *arc_raw_proto; - /* * "Incoming" is information needed for each address that could be sending * to us. Mostly for partially-received split packets. @@ -216,7 +210,6 @@ struct Incoming { numpackets; /* number of packets in split */ }; - /* only needed for RFC1201 */ struct Outgoing { struct ArcProto *proto; /* protocol driver that owns this: @@ -230,7 +223,6 @@ struct Outgoing { numsegs; /* number of segments */ }; - struct arcnet_local { uint8_t config, /* current value of CONFIG register */ timeout, /* Extended timeout for COM20020 */ @@ -251,7 +243,6 @@ struct arcnet_local { char *card_name; /* card ident string */ int card_flags; /* special card features */ - /* On preemtive and SMB a lock is needed */ spinlock_t lock; @@ -314,14 +305,11 @@ struct arcnet_local { void __iomem *mem_start; /* pointer to ioremap'ed MMIO */ }; - #define ARCRESET(x) (lp->hw.reset(dev, (x))) #define ACOMMAND(x) (lp->hw.command(dev, (x))) #define ASTATUS() (lp->hw.status(dev)) #define AINTMASK(x) (lp->hw.intmask(dev, (x))) - - #if ARCNET_DEBUG_MAX & D_SKB void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc); #else -- cgit v1.2.3 From d77510f3436e0db9b5e72fa8159ce26c3ac88d2d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:05:53 -0700 Subject: arcnet: Neaten BUGMSG macro defines These macros are actually printk and pr_cont uses with a flag. Add a new BUGLVL_TEST macro which is just the "should use" test and not an odd "if ()" macro to simplify uses in a new patch. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 24 +++++++++++++++++------- 1 file changed, 17 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index 78687885eb81..ad610208fbba 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -78,14 +78,24 @@ #endif extern int arcnet_debug; +#define BUGLVL_TEST(x) ((x) & ARCNET_DEBUG_MAX & arcnet_debug) +#define BUGLVL(x) if (BUGLVL_TEST(x)) + /* macros to simplify debug checking */ -#define BUGLVL(x) if ((ARCNET_DEBUG_MAX) & arcnet_debug & (x)) -#define BUGMSG2(x, msg, args...) do { BUGLVL(x) printk(msg, ## args); } while (0) -#define BUGMSG(x, msg, args...) \ - BUGMSG2(x, "%s%6s: " msg, \ - x == D_NORMAL ? KERN_WARNING \ - : x < D_DURING ? KERN_INFO : KERN_DEBUG, \ - dev->name, ## args) +#define BUGMSG(x, fmt, ...) \ +do { \ + if (BUGLVL_TEST(x)) \ + printk("%s%6s: " fmt, \ + (x) == D_NORMAL ? KERN_WARNING : \ + (x) < D_DURING ? KERN_INFO : KERN_DEBUG, \ + dev->name, ##__VA_ARGS__); \ +} while (0) + +#define BUGMSG2(x, fmt, ...) \ +do { \ + if (BUGLVL_TEST(x)) \ + printk(fmt, ##__VA_ARGS__); \ +} while (0) /* see how long a function call takes to run, expressed in CPU cycles */ #define TIME(name, bytes, call) BUGLVL(D_TIMING) { \ -- cgit v1.2.3 From 72aeea4841c037b9b3abf65859673cbd7b6664a9 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:05:54 -0700 Subject: arcnet: Expand odd BUGLVL macro with if and uses Don't hide what should be obvious. Make the macro a simple test instead of using if and test. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index ad610208fbba..f07c66383b88 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -78,13 +78,12 @@ #endif extern int arcnet_debug; -#define BUGLVL_TEST(x) ((x) & ARCNET_DEBUG_MAX & arcnet_debug) -#define BUGLVL(x) if (BUGLVL_TEST(x)) +#define BUGLVL(x) ((x) & ARCNET_DEBUG_MAX & arcnet_debug) /* macros to simplify debug checking */ #define BUGMSG(x, fmt, ...) \ do { \ - if (BUGLVL_TEST(x)) \ + if (BUGLVL(x)) \ printk("%s%6s: " fmt, \ (x) == D_NORMAL ? KERN_WARNING : \ (x) < D_DURING ? KERN_INFO : KERN_DEBUG, \ @@ -93,12 +92,14 @@ do { \ #define BUGMSG2(x, fmt, ...) \ do { \ - if (BUGLVL_TEST(x)) \ + if (BUGLVL(x)) \ printk(fmt, ##__VA_ARGS__); \ } while (0) /* see how long a function call takes to run, expressed in CPU cycles */ -#define TIME(name, bytes, call) BUGLVL(D_TIMING) { \ +#define TIME(name, bytes, call) \ +do { \ + if (BUGLVL(D_TIMING)) { \ unsigned long _x, _y; \ _x = get_cycles(); \ call; \ @@ -108,10 +109,10 @@ do { \ "%lu Kbytes/100Mcycle\n", \ name, bytes, _y - _x, \ 100000000 / 1024 * bytes / (_y - _x + 1)); \ - } \ - else { \ + } else { \ call; \ - } + } \ +} while (0) /* * Time needed to reset the card - in ms (milliseconds). This works on my -- cgit v1.2.3 From a34c0932c3b2f28542825ffc5280d562c49ad42d Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:05:55 -0700 Subject: arcnet: Convert BUGMSG and BUGMSG2 to arc_prink and arc_cont These macros don't actually represent BUG uses but are more commonly used as logging macros, so use a more kernel style macro. Convert the BUGMSG from a netdev_ like use to actually use netdev_. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 32 +++++++++++++++++--------------- 1 file changed, 17 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index f07c66383b88..a678027ff6c2 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -81,34 +81,36 @@ extern int arcnet_debug; #define BUGLVL(x) ((x) & ARCNET_DEBUG_MAX & arcnet_debug) /* macros to simplify debug checking */ -#define BUGMSG(x, fmt, ...) \ +#define arc_printk(x, dev, fmt, ...) \ do { \ - if (BUGLVL(x)) \ - printk("%s%6s: " fmt, \ - (x) == D_NORMAL ? KERN_WARNING : \ - (x) < D_DURING ? KERN_INFO : KERN_DEBUG, \ - dev->name, ##__VA_ARGS__); \ + if (BUGLVL(x)) { \ + if ((x) == D_NORMAL) \ + netdev_warn(dev, fmt, ##__VA_ARGS__); \ + else if ((x) < D_DURING) \ + netdev_info(dev, fmt, ##__VA_ARGS__); \ + else \ + netdev_dbg(dev, fmt, ##__VA_ARGS__); \ + } \ } while (0) -#define BUGMSG2(x, fmt, ...) \ +#define arc_cont(x, fmt, ...) \ do { \ - if (BUGLVL(x)) \ - printk(fmt, ##__VA_ARGS__); \ + if (BUGLVL(x)) \ + pr_cont(fmt, ##__VA_ARGS__); \ } while (0) /* see how long a function call takes to run, expressed in CPU cycles */ -#define TIME(name, bytes, call) \ +#define TIME(dev, name, bytes, call) \ do { \ if (BUGLVL(D_TIMING)) { \ unsigned long _x, _y; \ _x = get_cycles(); \ call; \ _y = get_cycles(); \ - BUGMSG(D_TIMING, \ - "%s: %d bytes in %lu cycles == " \ - "%lu Kbytes/100Mcycle\n", \ - name, bytes, _y - _x, \ - 100000000 / 1024 * bytes / (_y - _x + 1)); \ + arc_printk(D_TIMING, dev, \ + "%s: %d bytes in %lu cycles == %lu Kbytes/100Mcycle\n", \ + name, bytes, _y - _x, \ + 100000000 / 1024 * bytes / (_y - _x + 1)); \ } else { \ call; \ } \ -- cgit v1.2.3 From 83df99b50f901cb7c72cf132a83f43bbaeb01362 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:06:01 -0700 Subject: arcnet: Convert arcnet_dump_skb macro to static inline Make sure the arguments are tested appropriately when not using this function. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index a678027ff6c2..1d8e36e13616 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -326,7 +326,10 @@ struct arcnet_local { #if ARCNET_DEBUG_MAX & D_SKB void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc); #else -#define arcnet_dump_skb(dev, skb, desc) ; +static inline +void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc) +{ +} #endif void arcnet_unregister_proto(struct ArcProto *proto); -- cgit v1.2.3 From d6d7d3ed56e3bfe7fd34108dbe23f0610e3d8621 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:06:02 -0700 Subject: arcnet: Wrap some long lines Just neatening. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h index 1d8e36e13616..9ca135d0f114 100644 --- a/include/linux/arcdevice.h +++ b/include/linux/arcdevice.h @@ -203,8 +203,8 @@ struct ArcProto { unsigned short ethproto, uint8_t daddr); /* these functions return '1' if the skb can now be freed */ - int (*prepare_tx)(struct net_device *dev, struct archdr *pkt, int length, - int bufnum); + int (*prepare_tx)(struct net_device *dev, struct archdr *pkt, + int length, int bufnum); int (*continue_tx)(struct net_device *dev, int bufnum); int (*ack_tx)(struct net_device *dev, int acked); }; @@ -309,10 +309,10 @@ struct arcnet_local { void (*open)(struct net_device *dev); void (*close)(struct net_device *dev); - void (*copy_to_card)(struct net_device *dev, int bufnum, int offset, - void *buf, int count); - void (*copy_from_card)(struct net_device *dev, int bufnum, int offset, - void *buf, int count); + void (*copy_to_card)(struct net_device *dev, int bufnum, + int offset, void *buf, int count); + void (*copy_from_card)(struct net_device *dev, int bufnum, + int offset, void *buf, int count); } hw; void __iomem *mem_start; /* pointer to ioremap'ed MMIO */ -- cgit v1.2.3 From 26c6d281688e8bb8154fa78c60e551d024f5d0b8 Mon Sep 17 00:00:00 2001 From: Joe Perches Date: Tue, 5 May 2015 10:06:03 -0700 Subject: arcnet: Move files out of include/linux These #include files don't need to be in the include/linux directory as they can be local to drivers/net/arcnet/ Move them and update the #include statements. Update the MAINTAINERS file pattern by deleting arcdevice from the NETWORKING block as arcnet is currently unmaintained. Signed-off-by: Joe Perches Signed-off-by: Michael Grzeschik --- include/linux/arcdevice.h | 346 ---------------------------------------------- include/linux/com20020.h | 145 ------------------- 2 files changed, 491 deletions(-) delete mode 100644 include/linux/arcdevice.h delete mode 100644 include/linux/com20020.h (limited to 'include/linux') diff --git a/include/linux/arcdevice.h b/include/linux/arcdevice.h deleted file mode 100644 index 9ca135d0f114..000000000000 --- a/include/linux/arcdevice.h +++ /dev/null @@ -1,346 +0,0 @@ -/* - * INET An implementation of the TCP/IP protocol suite for the LINUX - * operating system. NET is implemented using the BSD Socket - * interface as the means of communication with the user level. - * - * Definitions used by the ARCnet driver. - * - * Authors: Avery Pennarun and David Woodhouse - * - * This program is free software; you can redistribute it and/or - * modify it under the terms of the GNU General Public License - * as published by the Free Software Foundation; either version - * 2 of the License, or (at your option) any later version. - * - */ -#ifndef _LINUX_ARCDEVICE_H -#define _LINUX_ARCDEVICE_H - -#include -#include - -#ifdef __KERNEL__ -#include - -/* - * RECON_THRESHOLD is the maximum number of RECON messages to receive - * within one minute before printing a "cabling problem" warning. The - * default value should be fine. - * - * After that, a "cabling restored" message will be printed on the next IRQ - * if no RECON messages have been received for 10 seconds. - * - * Do not define RECON_THRESHOLD at all if you want to disable this feature. - */ -#define RECON_THRESHOLD 30 - -/* - * Define this to the minimum "timeout" value. If a transmit takes longer - * than TX_TIMEOUT jiffies, Linux will abort the TX and retry. On a large - * network, or one with heavy network traffic, this timeout may need to be - * increased. The larger it is, though, the longer it will be between - * necessary transmits - don't set this too high. - */ -#define TX_TIMEOUT (HZ * 200 / 1000) - -/* Display warnings about the driver being an ALPHA version. */ -#undef ALPHA_WARNING - -/* - * Debugging bitflags: each option can be enabled individually. - * - * Note: only debug flags included in the ARCNET_DEBUG_MAX define will - * actually be available. GCC will (at least, GCC 2.7.0 will) notice - * lines using a BUGLVL not in ARCNET_DEBUG_MAX and automatically optimize - * them out. - */ -#define D_NORMAL 1 /* important operational info */ -#define D_EXTRA 2 /* useful, but non-vital information */ -#define D_INIT 4 /* show init/probe messages */ -#define D_INIT_REASONS 8 /* show reasons for discarding probes */ -#define D_RECON 32 /* print a message whenever token is lost */ -#define D_PROTO 64 /* debug auto-protocol support */ -/* debug levels below give LOTS of output during normal operation! */ -#define D_DURING 128 /* trace operations (including irq's) */ -#define D_TX 256 /* show tx packets */ -#define D_RX 512 /* show rx packets */ -#define D_SKB 1024 /* show skb's */ -#define D_SKB_SIZE 2048 /* show skb sizes */ -#define D_TIMING 4096 /* show time needed to copy buffers to card */ -#define D_DEBUG 8192 /* Very detailed debug line for line */ - -#ifndef ARCNET_DEBUG_MAX -#define ARCNET_DEBUG_MAX (127) /* change to ~0 if you want detailed debugging */ -#endif - -#ifndef ARCNET_DEBUG -#define ARCNET_DEBUG (D_NORMAL | D_EXTRA) -#endif -extern int arcnet_debug; - -#define BUGLVL(x) ((x) & ARCNET_DEBUG_MAX & arcnet_debug) - -/* macros to simplify debug checking */ -#define arc_printk(x, dev, fmt, ...) \ -do { \ - if (BUGLVL(x)) { \ - if ((x) == D_NORMAL) \ - netdev_warn(dev, fmt, ##__VA_ARGS__); \ - else if ((x) < D_DURING) \ - netdev_info(dev, fmt, ##__VA_ARGS__); \ - else \ - netdev_dbg(dev, fmt, ##__VA_ARGS__); \ - } \ -} while (0) - -#define arc_cont(x, fmt, ...) \ -do { \ - if (BUGLVL(x)) \ - pr_cont(fmt, ##__VA_ARGS__); \ -} while (0) - -/* see how long a function call takes to run, expressed in CPU cycles */ -#define TIME(dev, name, bytes, call) \ -do { \ - if (BUGLVL(D_TIMING)) { \ - unsigned long _x, _y; \ - _x = get_cycles(); \ - call; \ - _y = get_cycles(); \ - arc_printk(D_TIMING, dev, \ - "%s: %d bytes in %lu cycles == %lu Kbytes/100Mcycle\n", \ - name, bytes, _y - _x, \ - 100000000 / 1024 * bytes / (_y - _x + 1)); \ - } else { \ - call; \ - } \ -} while (0) - -/* - * Time needed to reset the card - in ms (milliseconds). This works on my - * SMC PC100. I can't find a reference that tells me just how long I - * should wait. - */ -#define RESETtime (300) - -/* - * These are the max/min lengths of packet payload, not including the - * arc_hardware header, but definitely including the soft header. - * - * Note: packet sizes 254, 255, 256 are impossible because of the way - * ARCnet registers work That's why RFC1201 defines "exception" packets. - * In non-RFC1201 protocols, we have to just tack some extra bytes on the - * end. - */ -#define MTU 253 /* normal packet max size */ -#define MinTU 257 /* extended packet min size */ -#define XMTU 508 /* extended packet max size */ - -/* status/interrupt mask bit fields */ -#define TXFREEflag 0x01 /* transmitter available */ -#define TXACKflag 0x02 /* transmitted msg. ackd */ -#define RECONflag 0x04 /* network reconfigured */ -#define TESTflag 0x08 /* test flag */ -#define EXCNAKflag 0x08 /* excesive nak flag */ -#define RESETflag 0x10 /* power-on-reset */ -#define RES1flag 0x20 /* reserved - usually set by jumper */ -#define RES2flag 0x40 /* reserved - usually set by jumper */ -#define NORXflag 0x80 /* receiver inhibited */ - -/* Flags used for IO-mapped memory operations */ -#define AUTOINCflag 0x40 /* Increase location with each access */ -#define IOMAPflag 0x02 /* (for 90xx) Use IO mapped memory, not mmap */ -#define ENABLE16flag 0x80 /* (for 90xx) Enable 16-bit mode */ - -/* in the command register, the following bits have these meanings: - * 0-2 command - * 3-4 page number (for enable rcv/xmt command) - * 7 receive broadcasts - */ -#define NOTXcmd 0x01 /* disable transmitter */ -#define NORXcmd 0x02 /* disable receiver */ -#define TXcmd 0x03 /* enable transmitter */ -#define RXcmd 0x04 /* enable receiver */ -#define CONFIGcmd 0x05 /* define configuration */ -#define CFLAGScmd 0x06 /* clear flags */ -#define TESTcmd 0x07 /* load test flags */ - -/* flags for "clear flags" command */ -#define RESETclear 0x08 /* power-on-reset */ -#define CONFIGclear 0x10 /* system reconfigured */ - -#define EXCNAKclear 0x0E /* Clear and acknowledge the excive nak bit */ - -/* flags for "load test flags" command */ -#define TESTload 0x08 /* test flag (diagnostic) */ - -/* byte deposited into first address of buffers on reset */ -#define TESTvalue 0321 /* that's octal for 0xD1 :) */ - -/* for "enable receiver" command */ -#define RXbcasts 0x80 /* receive broadcasts */ - -/* flags for "define configuration" command */ -#define NORMALconf 0x00 /* 1-249 byte packets */ -#define EXTconf 0x08 /* 250-504 byte packets */ - -/* card feature flags, set during auto-detection. - * (currently only used by com20020pci) - */ -#define ARC_IS_5MBIT 1 /* card default speed is 5MBit */ -#define ARC_CAN_10MBIT 2 /* card uses COM20022, supporting 10MBit, - but default is 2.5MBit. */ - -/* information needed to define an encapsulation driver */ -struct ArcProto { - char suffix; /* a for RFC1201, e for ether-encap, etc. */ - int mtu; /* largest possible packet */ - int is_ip; /* This is a ip plugin - not a raw thing */ - - void (*rx)(struct net_device *dev, int bufnum, - struct archdr *pkthdr, int length); - int (*build_header)(struct sk_buff *skb, struct net_device *dev, - unsigned short ethproto, uint8_t daddr); - - /* these functions return '1' if the skb can now be freed */ - int (*prepare_tx)(struct net_device *dev, struct archdr *pkt, - int length, int bufnum); - int (*continue_tx)(struct net_device *dev, int bufnum); - int (*ack_tx)(struct net_device *dev, int acked); -}; - -extern struct ArcProto *arc_proto_map[256], *arc_proto_default, - *arc_bcast_proto, *arc_raw_proto; - -/* - * "Incoming" is information needed for each address that could be sending - * to us. Mostly for partially-received split packets. - */ -struct Incoming { - struct sk_buff *skb; /* packet data buffer */ - __be16 sequence; /* sequence number of assembly */ - uint8_t lastpacket, /* number of last packet (from 1) */ - numpackets; /* number of packets in split */ -}; - -/* only needed for RFC1201 */ -struct Outgoing { - struct ArcProto *proto; /* protocol driver that owns this: - * if NULL, no packet is pending. - */ - struct sk_buff *skb; /* buffer from upper levels */ - struct archdr *pkt; /* a pointer into the skb */ - uint16_t length, /* bytes total */ - dataleft, /* bytes left */ - segnum, /* segment being sent */ - numsegs; /* number of segments */ -}; - -struct arcnet_local { - uint8_t config, /* current value of CONFIG register */ - timeout, /* Extended timeout for COM20020 */ - backplane, /* Backplane flag for COM20020 */ - clockp, /* COM20020 clock divider */ - clockm, /* COM20020 clock multiplier flag */ - setup, /* Contents of setup1 register */ - setup2, /* Contents of setup2 register */ - intmask; /* current value of INTMASK register */ - uint8_t default_proto[256]; /* default encap to use for each host */ - int cur_tx, /* buffer used by current transmit, or -1 */ - next_tx, /* buffer where a packet is ready to send */ - cur_rx; /* current receive buffer */ - int lastload_dest, /* can last loaded packet be acked? */ - lasttrans_dest; /* can last TX'd packet be acked? */ - int timed_out; /* need to process TX timeout and drop packet */ - unsigned long last_timeout; /* time of last reported timeout */ - char *card_name; /* card ident string */ - int card_flags; /* special card features */ - - /* On preemtive and SMB a lock is needed */ - spinlock_t lock; - - /* - * Buffer management: an ARCnet card has 4 x 512-byte buffers, each of - * which can be used for either sending or receiving. The new dynamic - * buffer management routines use a simple circular queue of available - * buffers, and take them as they're needed. This way, we simplify - * situations in which we (for example) want to pre-load a transmit - * buffer, or start receiving while we copy a received packet to - * memory. - * - * The rules: only the interrupt handler is allowed to _add_ buffers to - * the queue; thus, this doesn't require a lock. Both the interrupt - * handler and the transmit function will want to _remove_ buffers, so - * we need to handle the situation where they try to do it at the same - * time. - * - * If next_buf == first_free_buf, the queue is empty. Since there are - * only four possible buffers, the queue should never be full. - */ - atomic_t buf_lock; - int buf_queue[5]; - int next_buf, first_free_buf; - - /* network "reconfiguration" handling */ - unsigned long first_recon; /* time of "first" RECON message to count */ - unsigned long last_recon; /* time of most recent RECON */ - int num_recons; /* number of RECONs between first and last. */ - int network_down; /* do we think the network is down? */ - - int excnak_pending; /* We just got an excesive nak interrupt */ - - struct { - uint16_t sequence; /* sequence number (incs with each packet) */ - __be16 aborted_seq; - - struct Incoming incoming[256]; /* one from each address */ - } rfc1201; - - /* really only used by rfc1201, but we'll pretend it's not */ - struct Outgoing outgoing; /* packet currently being sent */ - - /* hardware-specific functions */ - struct { - struct module *owner; - void (*command)(struct net_device *dev, int cmd); - int (*status)(struct net_device *dev); - void (*intmask)(struct net_device *dev, int mask); - int (*reset)(struct net_device *dev, int really_reset); - void (*open)(struct net_device *dev); - void (*close)(struct net_device *dev); - - void (*copy_to_card)(struct net_device *dev, int bufnum, - int offset, void *buf, int count); - void (*copy_from_card)(struct net_device *dev, int bufnum, - int offset, void *buf, int count); - } hw; - - void __iomem *mem_start; /* pointer to ioremap'ed MMIO */ -}; - -#define ARCRESET(x) (lp->hw.reset(dev, (x))) -#define ACOMMAND(x) (lp->hw.command(dev, (x))) -#define ASTATUS() (lp->hw.status(dev)) -#define AINTMASK(x) (lp->hw.intmask(dev, (x))) - -#if ARCNET_DEBUG_MAX & D_SKB -void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc); -#else -static inline -void arcnet_dump_skb(struct net_device *dev, struct sk_buff *skb, char *desc) -{ -} -#endif - -void arcnet_unregister_proto(struct ArcProto *proto); -irqreturn_t arcnet_interrupt(int irq, void *dev_id); -struct net_device *alloc_arcdev(const char *name); - -int arcnet_open(struct net_device *dev); -int arcnet_close(struct net_device *dev); -netdev_tx_t arcnet_send_packet(struct sk_buff *skb, - struct net_device *dev); -void arcnet_timeout(struct net_device *dev); - -#endif /* __KERNEL__ */ -#endif /* _LINUX_ARCDEVICE_H */ diff --git a/include/linux/com20020.h b/include/linux/com20020.h deleted file mode 100644 index 85898995b234..000000000000 --- a/include/linux/com20020.h +++ /dev/null @@ -1,145 +0,0 @@ -/* - * Linux ARCnet driver - COM20020 chipset support - function declarations - * - * Written 1997 by David Woodhouse. - * Written 1994-1999 by Avery Pennarun. - * Derived from skeleton.c by Donald Becker. - * - * Special thanks to Contemporary Controls, Inc. (www.ccontrols.com) - * for sponsoring the further development of this driver. - * - * ********************** - * - * The original copyright of skeleton.c was as follows: - * - * skeleton.c Written 1993 by Donald Becker. - * Copyright 1993 United States Government as represented by the - * Director, National Security Agency. This software may only be used - * and distributed according to the terms of the GNU General Public License as - * modified by SRC, incorporated herein by reference. - * - * ********************** - * - * For more details, see drivers/net/arcnet.c - * - * ********************** - */ -#ifndef __COM20020_H -#define __COM20020_H - -int com20020_check(struct net_device *dev); -int com20020_found(struct net_device *dev, int shared); -extern const struct net_device_ops com20020_netdev_ops; - -/* The number of low I/O ports used by the card. */ -#define ARCNET_TOTAL_SIZE 8 - -/* various register addresses */ -#ifdef CONFIG_SA1100_CT6001 -#define BUS_ALIGN 2 /* 8 bit device on a 16 bit bus - needs padding */ -#else -#define BUS_ALIGN 1 -#endif - -#define PLX_PCI_MAX_CARDS 2 - -struct com20020_pci_channel_map { - u32 bar; - u32 offset; - u32 size; /* 0x00 - auto, e.g. length of entire bar */ -}; - -struct com20020_pci_card_info { - const char *name; - int devcount; - - struct com20020_pci_channel_map chan_map_tbl[PLX_PCI_MAX_CARDS]; - - unsigned int flags; -}; - -struct com20020_priv { - struct com20020_pci_card_info *ci; - struct list_head list_dev; -}; - -struct com20020_dev { - struct list_head list; - struct net_device *dev; - - struct com20020_priv *pci_priv; - int index; -}; - -#define _INTMASK (ioaddr+BUS_ALIGN*0) /* writable */ -#define _STATUS (ioaddr+BUS_ALIGN*0) /* readable */ -#define _COMMAND (ioaddr+BUS_ALIGN*1) /* standard arcnet commands */ -#define _DIAGSTAT (ioaddr+BUS_ALIGN*1) /* diagnostic status register */ -#define _ADDR_HI (ioaddr+BUS_ALIGN*2) /* control registers for IO-mapped memory */ -#define _ADDR_LO (ioaddr+BUS_ALIGN*3) -#define _MEMDATA (ioaddr+BUS_ALIGN*4) /* data port for IO-mapped memory */ -#define _SUBADR (ioaddr+BUS_ALIGN*5) /* the extended port _XREG refers to */ -#define _CONFIG (ioaddr+BUS_ALIGN*6) /* configuration register */ -#define _XREG (ioaddr+BUS_ALIGN*7) /* extra registers (indexed by _CONFIG - or _SUBADR) */ - -/* in the ADDR_HI register */ -#define RDDATAflag 0x80 /* next access is a read (not a write) */ - -/* in the DIAGSTAT register */ -#define NEWNXTIDflag 0x02 /* ID to which token is passed has changed */ - -/* in the CONFIG register */ -#define RESETcfg 0x80 /* put card in reset state */ -#define TXENcfg 0x20 /* enable TX */ - -/* in SETUP register */ -#define PROMISCset 0x10 /* enable RCV_ALL */ -#define P1MODE 0x80 /* enable P1-MODE for Backplane */ -#define SLOWARB 0x01 /* enable Slow Arbitration for >=5Mbps */ - -/* COM2002x */ -#define SUB_TENTATIVE 0 /* tentative node ID */ -#define SUB_NODE 1 /* node ID */ -#define SUB_SETUP1 2 /* various options */ -#define SUB_TEST 3 /* test/diag register */ - -/* COM20022 only */ -#define SUB_SETUP2 4 /* sundry options */ -#define SUB_BUSCTL 5 /* bus control options */ -#define SUB_DMACOUNT 6 /* DMA count options */ - -#define SET_SUBADR(x) do { \ - if ((x) < 4) \ - { \ - lp->config = (lp->config & ~0x03) | (x); \ - SETCONF; \ - } \ - else \ - { \ - outb(x, _SUBADR); \ - } \ -} while (0) - -#undef ARCRESET -#undef ASTATUS -#undef ACOMMAND -#undef AINTMASK - -#define ARCRESET { outb(lp->config | 0x80, _CONFIG); \ - udelay(5); \ - outb(lp->config , _CONFIG); \ - } -#define ARCRESET0 { outb(0x18 | 0x80, _CONFIG); \ - udelay(5); \ - outb(0x18 , _CONFIG); \ - } - -#define ASTATUS() inb(_STATUS) -#define ADIAGSTATUS() inb(_DIAGSTAT) -#define ACOMMAND(cmd) outb((cmd),_COMMAND) -#define AINTMASK(msk) outb((msk),_INTMASK) - -#define SETCONF outb(lp->config, _CONFIG) - -#endif /* __COM20020_H */ -- cgit v1.2.3 From 2726d6ce389788c7fe724961a6e1bfe569560088 Mon Sep 17 00:00:00 2001 From: Juri Lelli Date: Wed, 2 Sep 2015 11:01:34 +0100 Subject: sched/deadline: Unify dl_time_before() usage Move dl_time_before() static definition in include/linux/sched/deadline.h so that it can be used by different parties without being re-defined. Reported-by: Luca Abeni Signed-off-by: Juri Lelli Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1441188096-23021-3-git-send-email-juri.lelli@arm.com Signed-off-by: Ingo Molnar --- include/linux/sched/deadline.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched/deadline.h b/include/linux/sched/deadline.h index 9d303b8847df..9089a2ae913d 100644 --- a/include/linux/sched/deadline.h +++ b/include/linux/sched/deadline.h @@ -21,4 +21,9 @@ static inline int dl_task(struct task_struct *p) return dl_prio(p->prio); } +static inline bool dl_time_before(u64 a, u64 b) +{ + return (s64)(a - b) < 0; +} + #endif /* _SCHED_DEADLINE_H */ -- cgit v1.2.3 From 90fe65148ea76988d8d5acbf3e578aa74129a490 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Sep 2015 15:04:59 +0200 Subject: atomic: Add atomic_long_t bitops When adding the atomic bitops, I seem to have forgotten about atomic_long_t, fix this. Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 00a5763e850e..29dafa184aeb 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -451,7 +451,6 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif -#include #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif @@ -463,4 +462,6 @@ static inline void atomic64_andnot(long long i, atomic64_t *v) } #endif +#include + #endif /* _LINUX_ATOMIC_H */ -- cgit v1.2.3 From e3e72ab80a3fac0b88e07d358a2c75724ccd66b4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Sep 2015 13:22:52 +0200 Subject: atomic: Implement atomic_read_ctrl() Provide atomic_read_ctrl() to mirror READ_ONCE_CTRL(), such that we can more conveniently use atomics in control dependencies. Since we can assume atomic_read() implies a READ_ONCE(), we must only emit an extra smp_read_barrier_depends() in order to upgrade to READ_ONCE_CTRL() semantics. Requested-by: Dmitry Vyukov Signed-off-by: Peter Zijlstra (Intel) Acked-by: Will Deacon Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: oleg@redhat.com Link: http://lkml.kernel.org/r/20150918115637.GM3604@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 18 ++++++++++++++++++ 1 file changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 29dafa184aeb..e326469bb9d6 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -4,6 +4,15 @@ #include #include +#ifndef atomic_read_ctrl +static inline int atomic_read_ctrl(const atomic_t *v) +{ + int val = atomic_read(v); + smp_read_barrier_depends(); /* Enforce control dependency. */ + return val; +} +#endif + /* * Relaxed variants of xchg, cmpxchg and some atomic operations. * @@ -455,6 +464,15 @@ static inline int atomic_dec_if_positive(atomic_t *v) #include #endif +#ifndef atomic64_read_ctrl +static inline long long atomic64_read_ctrl(const atomic64_t *v) +{ + long long val = atomic64_read(v); + smp_read_barrier_depends(); /* Enforce control dependency. */ + return val; +} +#endif + #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) { -- cgit v1.2.3 From c6e1e7b5b7f031910850ddaf7bfa65ba3b4843ea Mon Sep 17 00:00:00 2001 From: Juergen Gross Date: Tue, 22 Sep 2015 12:48:59 +0200 Subject: sched/core: Make 'sched_domain_topology' declaration static The 'sched_domain_topology' variable is only used within kernel/sched/core.c. Make it static. Signed-off-by: Juergen Gross Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1442918939-9907-1-git-send-email-jgross@suse.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index bd38b3ee9e83..699228bb0035 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1127,8 +1127,6 @@ struct sched_domain_topology_level { #endif }; -extern struct sched_domain_topology_level *sched_domain_topology; - extern void set_sched_topology(struct sched_domain_topology_level *tl); extern void wake_up_if_idle(int cpu); -- cgit v1.2.3 From 081bab217db769526c1202c87099ff69737126ae Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 22 Sep 2015 14:35:06 -0500 Subject: power: bq27x00_battery: Renaming for consistency Rename functions that are used by multiple devices. New devices have been added and the function names and driver name are no longer general enough for the functionality they provide. Signed-off-by: Andrew F. Davis Acked-by: Tony Lindgren Acked-by: GUAN Xuetao Signed-off-by: Sebastian Reichel --- include/linux/power/bq27x00_battery.h | 19 ------------------- include/linux/power/bq27xxx_battery.h | 19 +++++++++++++++++++ 2 files changed, 19 insertions(+), 19 deletions(-) delete mode 100644 include/linux/power/bq27x00_battery.h create mode 100644 include/linux/power/bq27xxx_battery.h (limited to 'include/linux') diff --git a/include/linux/power/bq27x00_battery.h b/include/linux/power/bq27x00_battery.h deleted file mode 100644 index a857f719bf40..000000000000 --- a/include/linux/power/bq27x00_battery.h +++ /dev/null @@ -1,19 +0,0 @@ -#ifndef __LINUX_BQ27X00_BATTERY_H__ -#define __LINUX_BQ27X00_BATTERY_H__ - -/** - * struct bq27000_plaform_data - Platform data for bq27000 devices - * @name: Name of the battery. If NULL the driver will fallback to "bq27000". - * @read: HDQ read callback. - * This function should provide access to the HDQ bus the battery is - * connected to. - * The first parameter is a pointer to the battery device, the second the - * register to be read. The return value should either be the content of - * the passed register or an error value. - */ -struct bq27000_platform_data { - const char *name; - int (*read)(struct device *dev, unsigned int); -}; - -#endif diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h new file mode 100644 index 000000000000..e70a93a6799f --- /dev/null +++ b/include/linux/power/bq27xxx_battery.h @@ -0,0 +1,19 @@ +#ifndef __LINUX_BQ27X00_BATTERY_H__ +#define __LINUX_BQ27X00_BATTERY_H__ + +/** + * struct bq27xxx_plaform_data - Platform data for bq27xxx devices + * @name: Name of the battery. If NULL the driver will fallback to "bq27000". + * @read: HDQ read callback. + * This function should provide access to the HDQ bus the battery is + * connected to. + * The first parameter is a pointer to the battery device, the second the + * register to be read. The return value should either be the content of + * the passed register or an error value. + */ +struct bq27xxx_platform_data { + const char *name; + int (*read)(struct device *dev, unsigned int); +}; + +#endif -- cgit v1.2.3 From 424cfde49acaf1426e90837961e8aead0238b11b Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 22 Sep 2015 14:35:07 -0500 Subject: power: bq27xxx_battery: Platform initialization must declare a device When initialized as a platform device the initializer must now specify a device. An empty device name is no longer valid. Signed-off-by: Andrew F. Davis Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index e70a93a6799f..a4efb10a1bab 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -3,7 +3,8 @@ /** * struct bq27xxx_plaform_data - Platform data for bq27xxx devices - * @name: Name of the battery. If NULL the driver will fallback to "bq27000". + * @name: Name of the battery. + * @chip: Chip class number of this device. * @read: HDQ read callback. * This function should provide access to the HDQ bus the battery is * connected to. @@ -11,8 +12,11 @@ * register to be read. The return value should either be the content of * the passed register or an error value. */ +enum bq27xxx_chip { BQ27000 = 1, BQ27500, BQ27425, BQ27742, BQ27510 }; + struct bq27xxx_platform_data { const char *name; + enum bq27xxx_chip chip; int (*read)(struct device *dev, unsigned int); }; -- cgit v1.2.3 From d74534c27775857cb09abd0f92ed9539dc8d0a93 Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Tue, 22 Sep 2015 14:35:09 -0500 Subject: power: bq27xxx_battery: Add support for additional bq27xxx family devices Add support for additional devices and register equivalent family devices including the bq27010, bq27210, bq27500, bq27510, bq27520, bq27530, bq27531, bq27541, bq27542, bq27546, bq27545, bq27441, bq27421, and the bq27641. To facilitate this process the register mapings have been moved to tables and other small cleanups have been made. Signed-off-by: Andrew F. Davis Signed-off-by: Sebastian Reichel --- include/linux/power/bq27xxx_battery.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/power/bq27xxx_battery.h b/include/linux/power/bq27xxx_battery.h index a4efb10a1bab..45f6a7b5b3cb 100644 --- a/include/linux/power/bq27xxx_battery.h +++ b/include/linux/power/bq27xxx_battery.h @@ -12,7 +12,15 @@ * register to be read. The return value should either be the content of * the passed register or an error value. */ -enum bq27xxx_chip { BQ27000 = 1, BQ27500, BQ27425, BQ27742, BQ27510 }; +enum bq27xxx_chip { + BQ27000 = 1, /* bq27000, bq27200 */ + BQ27010, /* bq27010, bq27210 */ + BQ27500, /* bq27500, bq27510, bq27520 */ + BQ27530, /* bq27530, bq27531 */ + BQ27541, /* bq27541, bq27542, bq27546, bq27742 */ + BQ27545, /* bq27545 */ + BQ27421, /* bq27421, bq27425, bq27441, bq27621 */ +}; struct bq27xxx_platform_data { const char *name; -- cgit v1.2.3 From a79e88d9fbbe2e3ecb9d883fb59dca7468d42d79 Mon Sep 17 00:00:00 2001 From: Scott Feldman Date: Wed, 23 Sep 2015 08:39:16 -0700 Subject: bridge: define some min/max/default ageing time constants Signed-off-by: Scott Feldman Acked-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/if_bridge.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_bridge.h b/include/linux/if_bridge.h index dad8b00beed2..a338a688ee4a 100644 --- a/include/linux/if_bridge.h +++ b/include/linux/if_bridge.h @@ -46,6 +46,12 @@ struct br_ip_list { #define BR_LEARNING_SYNC BIT(9) #define BR_PROXYARP_WIFI BIT(10) +/* values as per ieee8021QBridgeFdbAgingTime */ +#define BR_MIN_AGEING_TIME (10 * HZ) +#define BR_MAX_AGEING_TIME (1000000 * HZ) + +#define BR_DEFAULT_AGEING_TIME (300 * HZ) + extern void brioctl_set(int (*ioctl_hook)(struct net *, unsigned int, void __user *)); typedef int br_should_route_hook_t(struct sk_buff *skb); -- cgit v1.2.3 From b4fe8ba7a310da6a2b99e3abe67c7815198cde49 Mon Sep 17 00:00:00 2001 From: Qipeng Zha Date: Tue, 15 Sep 2015 00:39:17 +0800 Subject: regmap: Add generic macro to define regmap_irq Add REGMAP_IRQ_REG macro in regmap.h to define regmap_irq structure easily for other driver module. Signed-off-by: Qipeng Zha Acked-by: Mark Brown Signed-off-by: Lee Jones --- include/linux/regmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 8fc0bfd8edc4..f6226976e158 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -791,6 +791,9 @@ struct regmap_irq { unsigned int mask; }; +#define REGMAP_IRQ_REG(_irq, _off, _mask) \ + [_irq] = { .reg_offset = (_off), .mask = (_mask) } + /** * Description of a generic regmap irq_chip. This is not intended to * handle every possible interrupt controller, but it should handle a -- cgit v1.2.3 From c0017ed71966a19ec40c7bc900d4338ddfbc4105 Mon Sep 17 00:00:00 2001 From: Markus Pargmann Date: Fri, 14 Aug 2015 16:10:59 +0200 Subject: gpio: Introduce gpio descriptor 'name' The latest gpio hogging mechanism assigns each gpio a 'line-name' in the devicetree. The 'name' field is different from the 'label' field. 'label' is only used for requested GPIOs to describe its current use by driver or userspace. The 'name' field describes the GPIO itself, not the use. This is most likely identical to the label in the schematic on the GPIO line and should help to find this particular GPIO. This is equivalent to the gpiochip->names array. However names should be stored in the GPIO descriptor. We will use gpiochip->names in the future only as initializer for the GPIO descriptors for drivers that assign GPIO names hardcoded. All other GPIO names will be parsed from DT and directly assigned to the GPIO descriptor. This patch adds a helper function to find gpio descriptors by name instead of gpio number. Signed-off-by: Markus Pargmann Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 14cac67c2012..366a3fdbdbea 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -130,6 +130,7 @@ int gpiod_to_irq(const struct gpio_desc *desc); /* Convert between the old gpio_ and new gpiod_ interfaces */ struct gpio_desc *gpio_to_desc(unsigned gpio); int desc_to_gpio(const struct gpio_desc *desc); +struct gpio_desc *gpio_name_to_desc(const char *name); /* Child properties interface */ struct fwnode_handle; @@ -400,6 +401,12 @@ static inline struct gpio_desc *gpio_to_desc(unsigned gpio) { return ERR_PTR(-EINVAL); } + +static inline struct gpio_desc *gpio_name_to_desc(const char *name) +{ + return ERR_PTR(-EINVAL); +} + static inline int desc_to_gpio(const struct gpio_desc *desc) { /* GPIO can never have been requested */ -- cgit v1.2.3 From f881bab038c9667deab19a85d8666029cbfa6f2c Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 23 Sep 2015 16:20:43 -0700 Subject: gpio: keep the GPIO line names internal This refactors the changes to the GPIO line naming mechanism to not have so widespread effects, instead we conclude the patch series by having created a name attribute in the GPIO descriptor, that need not be globally unique, and it will be initialized from the old .names array in struct gpio_chip if it exists, then used in the legacy sysfs code like the array was used previously. The associated changes to name lines from the device tree are controversial and need to stand alone from this. Resulting changes: 1. Remove the export and the header for the gpio_name_to_desc() as so far the only use is inside gpiolib.c. Staticize gpio_name_to_desc() and move it above the only function using it. 2. Only print a warning if there are two GPIO lines with the same name. The reason is to preserve current behaviour: before the previous changes to the naming mechanism this would not reject probing the driver, instead the error would occur when trying to export the line in sysfs, so restore this behaviour, but print a friendly warning if names collide. Cc: Johan Hovold Cc: Markus Pargmann Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 366a3fdbdbea..fb0fde686cb1 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -130,7 +130,6 @@ int gpiod_to_irq(const struct gpio_desc *desc); /* Convert between the old gpio_ and new gpiod_ interfaces */ struct gpio_desc *gpio_to_desc(unsigned gpio); int desc_to_gpio(const struct gpio_desc *desc); -struct gpio_desc *gpio_name_to_desc(const char *name); /* Child properties interface */ struct fwnode_handle; @@ -402,11 +401,6 @@ static inline struct gpio_desc *gpio_to_desc(unsigned gpio) return ERR_PTR(-EINVAL); } -static inline struct gpio_desc *gpio_name_to_desc(const char *name) -{ - return ERR_PTR(-EINVAL); -} - static inline int desc_to_gpio(const struct gpio_desc *desc) { /* GPIO can never have been requested */ -- cgit v1.2.3 From f856f21dbcd162a53e30987a91d75d5ab54a7f80 Mon Sep 17 00:00:00 2001 From: Alexander Aring Date: Thu, 24 Sep 2015 09:37:10 +0200 Subject: ieee802154: remove unnecessary includes This patch removes some unnecessary includes from ieee802154 header, which was introduced by commit b609fb54adfa ("ieee802154: add helpers for frame control checks"). Signed-off-by: Alexander Aring Signed-off-by: Marcel Holtmann --- include/linux/ieee802154.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee802154.h b/include/linux/ieee802154.h index aca228b81464..d3e415674dac 100644 --- a/include/linux/ieee802154.h +++ b/include/linux/ieee802154.h @@ -25,9 +25,6 @@ #include #include -#include -#include -#include #define IEEE802154_MTU 127 #define IEEE802154_ACK_PSDU_LEN 5 -- cgit v1.2.3 From 3f5c8d3187852b1cbed8546169e6293d6d421751 Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Mon, 14 Sep 2015 17:37:35 +0300 Subject: device property: Add fwnode_property_match_string() Sometimes it is useful to be able to extract an index of certain string value from an array of strings. A typical use case is to give a name to a DMA channel, PWM, clock and so on. Provide an implementation using unified device property accessors that follows of_property_match_string() but works for all supported fwnodes. Signed-off-by: Mika Westerberg Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index a59c6ee566c2..463de52fe891 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -40,6 +40,8 @@ int device_property_read_string_array(struct device *dev, const char *propname, const char **val, size_t nval); int device_property_read_string(struct device *dev, const char *propname, const char **val); +int device_property_match_string(struct device *dev, + const char *propname, const char *string); bool fwnode_property_present(struct fwnode_handle *fwnode, const char *propname); int fwnode_property_read_u8_array(struct fwnode_handle *fwnode, @@ -59,6 +61,8 @@ int fwnode_property_read_string_array(struct fwnode_handle *fwnode, size_t nval); int fwnode_property_read_string(struct fwnode_handle *fwnode, const char *propname, const char **val); +int fwnode_property_match_string(struct fwnode_handle *fwnode, + const char *propname, const char *string); struct fwnode_handle *device_get_next_child_node(struct device *dev, struct fwnode_handle *child); -- cgit v1.2.3 From c6790aa9f4fdc26b1246ba36da2fd749663beb65 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 24 Sep 2015 10:34:23 +0300 Subject: IB/mlx5: Remove support for IB_DEVICE_LOCAL_DMA_LKEY Commit 96249d70dd70 ("IB/core: Guarantee that a local_dma_lkey is available") allows ULPs that make use of the local dma key to keep working as before by allocating a DMA MR with local permissions and converted these consumers to use the MR associated with the PD rather then device->local_dma_lkey. ConnectIB has some known issues with memory registration using the local_dma_lkey (SEND, RDMA, RECV seems to work ok). Thus don't expose support for it (remove device->local_dma_lkey setting), and take advantage of the above commit such that no regression is introduced to working systems. The local_dma_lkey support will be restored in CX4 depending on FW capability query. Signed-off-by: Sagi Grimberg Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 11 ----------- include/linux/mlx5/driver.h | 1 - 2 files changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8eb3b19af2a4..250b1ff8b48d 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -402,17 +402,6 @@ struct mlx5_cmd_teardown_hca_mbox_out { u8 rsvd[8]; }; -struct mlx5_cmd_query_special_contexts_mbox_in { - struct mlx5_inbox_hdr hdr; - u8 rsvd[8]; -}; - -struct mlx5_cmd_query_special_contexts_mbox_out { - struct mlx5_outbox_hdr hdr; - __be32 dump_fill_mkey; - __be32 resd_lkey; -}; - struct mlx5_cmd_layout { u8 type; u8 rsvd0[3]; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 27b53f9a24ad..8b6d6f2154a4 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -845,7 +845,6 @@ void *mlx5_get_protocol_dev(struct mlx5_core_dev *mdev, int protocol); int mlx5_register_interface(struct mlx5_interface *intf); void mlx5_unregister_interface(struct mlx5_interface *intf); int mlx5_core_query_vendor_id(struct mlx5_core_dev *mdev, u32 *vendor_id); -int mlx5_core_query_special_context(struct mlx5_core_dev *dev, u32 *rsvd_lkey); struct mlx5_profile { u64 mask; -- cgit v1.2.3 From 41907416bc24412f839559ea10a2b9e4eeb21aa7 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 25 Sep 2015 15:06:00 -0400 Subject: tracing: Remove unused function trace_current_buffer_lock_reserve() trace_current_buffer_lock_reserve() is not used by anything. Might as well get rid of it. Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index ed27917cabc9..71c1191b9954 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -168,9 +168,6 @@ struct ring_buffer_event * trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, int type, unsigned long len, unsigned long flags, int pc); -void trace_current_buffer_unlock_commit(struct ring_buffer *buffer, - struct ring_buffer_event *event, - unsigned long flags, int pc); void trace_buffer_unlock_commit(struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc); -- cgit v1.2.3 From b7f0c959edfb4448f94bd33c39fda08e10ce6ede Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 25 Sep 2015 17:38:44 -0400 Subject: tracing: Pass trace_array into trace_buffer_unlock_commit() In preparation for having trace options be per instance, the trace_array needs to be passed to the trace_buffer_unlock_commit(). The trace_event_buffer_lock_reserve() already passes in the trace_event_file where the trace_array can be derived from. Also added a "__init" to the boot up test event plus function tracing function function_test_events_call(). Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index 71c1191b9954..f85693bbcdc3 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -168,10 +168,12 @@ struct ring_buffer_event * trace_current_buffer_lock_reserve(struct ring_buffer **current_buffer, int type, unsigned long len, unsigned long flags, int pc); -void trace_buffer_unlock_commit(struct ring_buffer *buffer, +void trace_buffer_unlock_commit(struct trace_array *tr, + struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc); -void trace_buffer_unlock_commit_regs(struct ring_buffer *buffer, +void trace_buffer_unlock_commit_regs(struct trace_array *tr, + struct ring_buffer *buffer, struct ring_buffer_event *event, unsigned long flags, int pc, struct pt_regs *regs); @@ -505,7 +507,7 @@ event_trigger_unlock_commit(struct trace_event_file *file, enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit(buffer, event, irq_flags, pc); + trace_buffer_unlock_commit(file->tr, buffer, event, irq_flags, pc); if (tt) event_triggers_post_call(file, tt); @@ -537,7 +539,7 @@ event_trigger_unlock_commit_regs(struct trace_event_file *file, enum event_trigger_type tt = ETT_NONE; if (!__event_trigger_test_discard(file, buffer, event, entry, &tt)) - trace_buffer_unlock_commit_regs(buffer, event, + trace_buffer_unlock_commit_regs(file->tr, buffer, event, irq_flags, pc, regs); if (tt) -- cgit v1.2.3 From 5ebc76035303016ec41bb752bec156ea9fde7c34 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Thu, 17 Sep 2015 14:02:45 +0800 Subject: ACPI, PCI, irq: Do not share PCI IRQ with ISA IRQ Avoid IRQs occupied by ISA IRQs when allocating IRQs for PCI link devices, otherwise it may cause interrupt storm due to incompatible pin attributes. This issue was triggered on a KVM virtual machine, which 1) uses IRQ9 for SCI in high level mode. 2) defines an PCI interrupt link device (LNKS) with IRQ9 as the only possible irq. 3) has an PCI device referring to link device LNKS. So it causes interrupt storm when enabling the PCI device because PCI IRQ works in low level mode. Signed-off-by: Jiang Liu Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7235c4851460..43856d19cf4d 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -217,6 +217,7 @@ struct pci_dev; int acpi_pci_irq_enable (struct pci_dev *dev); void acpi_penalize_isa_irq(int irq, int active); +bool acpi_isa_irq_available(int irq); void acpi_penalize_sci_irq(int irq, int trigger, int polarity); void acpi_pci_irq_disable (struct pci_dev *dev); -- cgit v1.2.3 From c6f7b48e7e21989f4cfc996837d55c595d5dbf87 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 10 Sep 2015 16:00:50 +0200 Subject: PM / Domains: Remove name based API for genpd As all users of the named based APIs now have converted to the non-named based APIs, the time has come to remove them. Signed-off-by: Ulf Hansson Acked-by: Geert Uytterhoeven Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 38 -------------------------------------- 1 file changed, 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index b1cf7e797892..443fc196dc01 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -125,27 +125,18 @@ extern int __pm_genpd_add_device(struct generic_pm_domain *genpd, struct device *dev, struct gpd_timing_data *td); -extern int __pm_genpd_name_add_device(const char *domain_name, - struct device *dev, - struct gpd_timing_data *td); - extern int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev); extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); -extern int pm_genpd_add_subdomain_names(const char *master_name, - const char *subdomain_name); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); extern int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state); -extern int pm_genpd_name_attach_cpuidle(const char *name, int state); extern int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd); -extern int pm_genpd_name_detach_cpuidle(const char *name); extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); extern int pm_genpd_poweron(struct generic_pm_domain *genpd); -extern int pm_genpd_name_poweron(const char *domain_name); extern void pm_genpd_poweroff_unused(void); extern struct dev_power_governor simple_qos_governor; @@ -166,12 +157,6 @@ static inline int __pm_genpd_add_device(struct generic_pm_domain *genpd, { return -ENOSYS; } -static inline int __pm_genpd_name_add_device(const char *domain_name, - struct device *dev, - struct gpd_timing_data *td) -{ - return -ENOSYS; -} static inline int pm_genpd_remove_device(struct generic_pm_domain *genpd, struct device *dev) { @@ -182,11 +167,6 @@ static inline int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, { return -ENOSYS; } -static inline int pm_genpd_add_subdomain_names(const char *master_name, - const char *subdomain_name) -{ - return -ENOSYS; -} static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target) { @@ -196,18 +176,10 @@ static inline int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int s { return -ENOSYS; } -static inline int pm_genpd_name_attach_cpuidle(const char *name, int state) -{ - return -ENOSYS; -} static inline int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd) { return -ENOSYS; } -static inline int pm_genpd_name_detach_cpuidle(const char *name) -{ - return -ENOSYS; -} static inline void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off) { @@ -216,10 +188,6 @@ static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) { return -ENOSYS; } -static inline int pm_genpd_name_poweron(const char *domain_name) -{ - return -ENOSYS; -} static inline void pm_genpd_poweroff_unused(void) {} #endif @@ -229,12 +197,6 @@ static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, return __pm_genpd_add_device(genpd, dev, NULL); } -static inline int pm_genpd_name_add_device(const char *domain_name, - struct device *dev) -{ - return __pm_genpd_name_add_device(domain_name, dev, NULL); -} - #ifdef CONFIG_PM_GENERIC_DOMAINS_SLEEP extern void pm_genpd_syscore_poweroff(struct device *dev); extern void pm_genpd_syscore_poweron(struct device *dev); -- cgit v1.2.3 From cea3ad93d9a5e054d916f1ad71da02cb306e4828 Mon Sep 17 00:00:00 2001 From: Daniel Lezcano Date: Tue, 1 Sep 2015 20:37:49 +0200 Subject: PM / Domains: Remove cpuidle attach The power domains code allows to tie a cpuidle state with a power domain. Preventing the cpuidle framework to enter a specific idle state by disabling from the power domain framework is a good idea. Unfortunately, the current implementation has some gaps with a SMP system and a complex cpuidle implementation. Enabling a power domain wakes up all the cpus even if a cpu does not belong to the power domain. There is some work to do a logical representation with the power domains of the hardware dependencies (eg. a cpu belongs to a power domains, these power domains belong to a higher power domain for a cluster, etc ...). A new code relying on the genpd hierarchy to disable the idle states would make more sense. As the unique user of this code has been removed, let's wipe out this code to prevent new user and to have a clean place to put a new implementation. Signed-off-by: Daniel Lezcano Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 17 ----------------- 1 file changed, 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 443fc196dc01..384b1d193707 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -15,7 +15,6 @@ #include #include #include -#include /* Defines used for the flags field in the struct generic_pm_domain */ #define GENPD_FLAG_PM_CLK (1U << 0) /* PM domain uses PM clk */ @@ -38,11 +37,6 @@ struct gpd_dev_ops { bool (*active_wakeup)(struct device *dev); }; -struct gpd_cpuidle_data { - unsigned int saved_exit_latency; - struct cpuidle_state *idle_state; -}; - struct generic_pm_domain { struct dev_pm_domain domain; /* PM domain operations */ struct list_head gpd_list_node; /* Node in the global PM domains list */ @@ -68,7 +62,6 @@ struct generic_pm_domain { s64 max_off_time_ns; /* Maximum allowed "suspended" time. */ bool max_off_time_changed; bool cached_power_down_ok; - struct gpd_cpuidle_data *cpuidle_data; int (*attach_dev)(struct generic_pm_domain *domain, struct device *dev); void (*detach_dev)(struct generic_pm_domain *domain, @@ -131,8 +124,6 @@ extern int pm_genpd_add_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *new_subdomain); extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); -extern int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int state); -extern int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd); extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); @@ -172,14 +163,6 @@ static inline int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, { return -ENOSYS; } -static inline int pm_genpd_attach_cpuidle(struct generic_pm_domain *genpd, int st) -{ - return -ENOSYS; -} -static inline int pm_genpd_detach_cpuidle(struct generic_pm_domain *genpd) -{ - return -ENOSYS; -} static inline void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off) { -- cgit v1.2.3 From 9b7537642cb6ad400ee4a95114582ba758b3009c Mon Sep 17 00:00:00 2001 From: Bin Liu Date: Wed, 9 Sep 2015 13:17:23 -0500 Subject: usb: musb: set the controller speed based on the config setting Set the Power register HSENAB bit based on musb->config->maximum_speed, so that the glue layer can control MUSB to work in high- or full-speed. Signed-off-by: Bin Liu Signed-off-by: Felipe Balbi --- include/linux/usb/musb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/musb.h b/include/linux/usb/musb.h index a4ee1b582183..fa6dc132bd1b 100644 --- a/include/linux/usb/musb.h +++ b/include/linux/usb/musb.h @@ -95,7 +95,7 @@ struct musb_hdrc_config { /* musb CLKIN in Blackfin in MHZ */ unsigned char clkin; #endif - + u32 maximum_speed; }; struct musb_hdrc_platform_data { -- cgit v1.2.3 From 1f91b4cc03556ba0d43ac80621dac8263cda3880 Mon Sep 17 00:00:00 2001 From: Felipe Balbi Date: Thu, 6 Aug 2015 18:11:54 -0500 Subject: usb: dwc2: rename all s3c_* to dwc2_* this driver has long ago became dwc2.ko with both peripheral and host roles, there's no point in keeping the old function names. Acked-by: John Youn Tested-by: John Youn Signed-off-by: Felipe Balbi --- include/linux/platform_data/s3c-hsotg.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/s3c-hsotg.h b/include/linux/platform_data/s3c-hsotg.h index 3f1cbf95ec3b..3982586ba6df 100644 --- a/include/linux/platform_data/s3c-hsotg.h +++ b/include/linux/platform_data/s3c-hsotg.h @@ -17,19 +17,19 @@ struct platform_device; -enum s3c_hsotg_dmamode { +enum dwc2_hsotg_dmamode { S3C_HSOTG_DMA_NONE, /* do not use DMA at-all */ S3C_HSOTG_DMA_ONLY, /* always use DMA */ S3C_HSOTG_DMA_DRV, /* DMA is chosen by driver */ }; /** - * struct s3c_hsotg_plat - platform data for high-speed otg/udc + * struct dwc2_hsotg_plat - platform data for high-speed otg/udc * @dma: Whether to use DMA or not. * @is_osc: The clock source is an oscillator, not a crystal */ -struct s3c_hsotg_plat { - enum s3c_hsotg_dmamode dma; +struct dwc2_hsotg_plat { + enum dwc2_hsotg_dmamode dma; unsigned int is_osc:1; int phy_type; @@ -37,6 +37,6 @@ struct s3c_hsotg_plat { int (*phy_exit)(struct platform_device *pdev, int type); }; -extern void s3c_hsotg_set_platdata(struct s3c_hsotg_plat *pd); +extern void dwc2_hsotg_set_platdata(struct dwc2_hsotg_plat *pd); #endif /* __LINUX_USB_S3C_HSOTG_H */ -- cgit v1.2.3 From 428163d703712d11cacfddaf30f40b18ccc50042 Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Mon, 10 Aug 2015 16:46:19 +0200 Subject: usb: gadget: at91_udc: move at91_udc_data in at91_udc.h struct at91_udc_data is now only used inside the driver, move it to its include. Acked-by: Nicolas Ferre Signed-off-by: Alexandre Belloni Signed-off-by: Felipe Balbi --- include/linux/platform_data/atmel.h | 9 --------- 1 file changed, 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h index 527a85c61924..9127ebbaa487 100644 --- a/include/linux/platform_data/atmel.h +++ b/include/linux/platform_data/atmel.h @@ -25,15 +25,6 @@ */ #define ATMEL_MAX_UART 7 - /* USB Device */ -struct at91_udc_data { - int vbus_pin; /* high == host powering us */ - u8 vbus_active_low; /* vbus polarity */ - u8 vbus_polled; /* Use polling, not interrupt */ - int pullup_pin; /* active == D+ pulled up */ - u8 pullup_active_low; /* true == pullup_pin is active low */ -}; - /* Compact Flash */ struct at91_cf_data { int irq_pin; /* I/O IRQ */ -- cgit v1.2.3 From b67f628c84329a9ce82dbff5fde196dc4624e7c2 Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Wed, 16 Sep 2015 12:10:41 +0200 Subject: usb: gadget: epautoconf: add usb_ep_autoconfig_release() function This patch introduces usb_ep_autoconfig_release() function which allows to release endpoint previously obtained from usb_ep_autoconfig() during USB function bind. Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index c14a69b36d27..3f299e2b6942 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -1233,6 +1233,8 @@ extern struct usb_ep *usb_ep_autoconfig_ss(struct usb_gadget *, struct usb_endpoint_descriptor *, struct usb_ss_ep_comp_descriptor *); +extern void usb_ep_autoconfig_release(struct usb_ep *); + extern void usb_ep_autoconfig_reset(struct usb_gadget *); #endif /* __LINUX_USB_GADGET_H */ -- cgit v1.2.3 From b0bac2581c1918cc4ab0aca01977ad69f0bc127a Mon Sep 17 00:00:00 2001 From: Robert Baldyga Date: Wed, 16 Sep 2015 12:10:42 +0200 Subject: usb: gadget: introduce 'enabled' flag in struct usb_ep This patch introduces 'enabled' flag in struct usb_ep, and modifies usb_ep_enable() and usb_ep_disable() functions to encapsulate endpoint enabled/disabled state. It helps to avoid enabling endpoints which are already enabled, and disabling endpoints which are already disables. From now USB functions don't have to remember current endpoint enable/disable state, as this state is now handled automatically which makes this API less bug-prone. Signed-off-by: Robert Baldyga Signed-off-by: Felipe Balbi --- include/linux/usb/gadget.h | 27 +++++++++++++++++++++++++-- 1 file changed, 25 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget.h b/include/linux/usb/gadget.h index 3f299e2b6942..3d583a10b926 100644 --- a/include/linux/usb/gadget.h +++ b/include/linux/usb/gadget.h @@ -215,6 +215,7 @@ struct usb_ep { struct list_head ep_list; struct usb_ep_caps caps; bool claimed; + bool enabled; unsigned maxpacket:16; unsigned maxpacket_limit:16; unsigned max_streams:16; @@ -264,7 +265,18 @@ static inline void usb_ep_set_maxpacket_limit(struct usb_ep *ep, */ static inline int usb_ep_enable(struct usb_ep *ep) { - return ep->ops->enable(ep, ep->desc); + int ret; + + if (ep->enabled) + return 0; + + ret = ep->ops->enable(ep, ep->desc); + if (ret) + return ret; + + ep->enabled = true; + + return 0; } /** @@ -281,7 +293,18 @@ static inline int usb_ep_enable(struct usb_ep *ep) */ static inline int usb_ep_disable(struct usb_ep *ep) { - return ep->ops->disable(ep); + int ret; + + if (!ep->enabled) + return 0; + + ret = ep->ops->disable(ep); + if (ret) + return ret; + + ep->enabled = false; + + return 0; } /** -- cgit v1.2.3 From 58efd4b06df4a421652cb2c8a850a9697a37915c Mon Sep 17 00:00:00 2001 From: Peter Chen Date: Tue, 22 Sep 2015 15:31:34 +0800 Subject: usb: phy: change some comments - Replace all "transceiver" with "phy" - Replace one "OTG controller" with "phy" Signed-off-by: Peter Chen Signed-off-by: Felipe Balbi --- include/linux/usb/phy.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/phy.h b/include/linux/usb/phy.h index e39f251cf861..31a8068c42a5 100644 --- a/include/linux/usb/phy.h +++ b/include/linux/usb/phy.h @@ -63,7 +63,7 @@ enum usb_otg_state { struct usb_phy; struct usb_otg; -/* for transceivers connected thru an ULPI interface, the user must +/* for phys connected thru an ULPI interface, the user must * provide access ops */ struct usb_phy_io_ops { @@ -92,10 +92,10 @@ struct usb_phy { u16 port_status; u16 port_change; - /* to support controllers that have multiple transceivers */ + /* to support controllers that have multiple phys */ struct list_head head; - /* initialize/shutdown the OTG controller */ + /* initialize/shutdown the phy */ int (*init)(struct usb_phy *x); void (*shutdown)(struct usb_phy *x); @@ -106,7 +106,7 @@ struct usb_phy { int (*set_power)(struct usb_phy *x, unsigned mA); - /* Set transceiver into suspend mode */ + /* Set phy into suspend mode */ int (*set_suspend)(struct usb_phy *x, int suspend); -- cgit v1.2.3 From 63863b988eeca2823ce76b28b104e0b8366cafec Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 21 Sep 2015 11:14:32 +0300 Subject: usb: common: of_usb_get_maximum_speed to usb_get_maximum_speed By using the unified device property interface, the function can be made available for all platforms and not just the ones using DT. Signed-off-by: Heikki Krogerus Signed-off-by: Felipe Balbi --- include/linux/usb/ch9.h | 11 ++++++++++- include/linux/usb/of.h | 6 ------ 2 files changed, 10 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/ch9.h b/include/linux/usb/ch9.h index 27603bcbb9b9..6cc96bb12ddc 100644 --- a/include/linux/usb/ch9.h +++ b/include/linux/usb/ch9.h @@ -32,9 +32,9 @@ #ifndef __LINUX_USB_CH9_H #define __LINUX_USB_CH9_H +#include #include - /** * usb_speed_string() - Returns human readable-name of the speed. * @speed: The speed to return human-readable name for. If it's not @@ -43,6 +43,15 @@ */ extern const char *usb_speed_string(enum usb_device_speed speed); +/** + * usb_get_maximum_speed - Get maximum requested speed for a given USB + * controller. + * @dev: Pointer to the given USB controller device + * + * The function gets the maximum speed string from property "maximum-speed", + * and returns the corresponding enum usb_device_speed. + */ +extern enum usb_device_speed usb_get_maximum_speed(struct device *dev); /** * usb_state_string - Returns human readable name for the state. diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h index 8c5a818ec244..ff23fea49fca 100644 --- a/include/linux/usb/of.h +++ b/include/linux/usb/of.h @@ -13,7 +13,6 @@ #if IS_ENABLED(CONFIG_OF) enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np); -enum usb_device_speed of_usb_get_maximum_speed(struct device_node *np); bool of_usb_host_tpl_support(struct device_node *np); int of_usb_update_otg_caps(struct device_node *np, struct usb_otg_caps *otg_caps); @@ -23,11 +22,6 @@ static inline enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np) return USB_DR_MODE_UNKNOWN; } -static inline enum usb_device_speed -of_usb_get_maximum_speed(struct device_node *np) -{ - return USB_SPEED_UNKNOWN; -} static inline bool of_usb_host_tpl_support(struct device_node *np) { return false; -- cgit v1.2.3 From 06e7114f0d8297278eb24f4e9bee3393a94bd8ce Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Mon, 21 Sep 2015 11:14:34 +0300 Subject: usb: common: of_usb_get_dr_mode to usb_get_dr_mode By using the unified device property interface, the function can be made available for all platforms and not just the ones using DT. Signed-off-by: Heikki Krogerus Signed-off-by: Felipe Balbi --- include/linux/usb/of.h | 6 ------ include/linux/usb/otg.h | 9 +++++++++ 2 files changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/of.h b/include/linux/usb/of.h index ff23fea49fca..c3fe9e48ce27 100644 --- a/include/linux/usb/of.h +++ b/include/linux/usb/of.h @@ -12,16 +12,10 @@ #include #if IS_ENABLED(CONFIG_OF) -enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np); bool of_usb_host_tpl_support(struct device_node *np); int of_usb_update_otg_caps(struct device_node *np, struct usb_otg_caps *otg_caps); #else -static inline enum usb_dr_mode of_usb_get_dr_mode(struct device_node *np) -{ - return USB_DR_MODE_UNKNOWN; -} - static inline bool of_usb_host_tpl_support(struct device_node *np) { return false; diff --git a/include/linux/usb/otg.h b/include/linux/usb/otg.h index bd1dcf816100..67929df86df5 100644 --- a/include/linux/usb/otg.h +++ b/include/linux/usb/otg.h @@ -119,4 +119,13 @@ enum usb_dr_mode { USB_DR_MODE_OTG, }; +/** + * usb_get_dr_mode - Get dual role mode for given device + * @dev: Pointer to the given device + * + * The function gets phy interface string from property 'dr_mode', + * and returns the correspondig enum usb_dr_mode + */ +extern enum usb_dr_mode usb_get_dr_mode(struct device *dev); + #endif /* __LINUX_USB_OTG_H */ -- cgit v1.2.3 From 13c23cd18bd12ddbf00beddd136e3cd33b4f2dfa Mon Sep 17 00:00:00 2001 From: Franklin S Cooper Jr Date: Fri, 11 Sep 2015 17:30:34 -0700 Subject: Input: edt-ft5x06 - switch to newer gpio framework The current/old gpio framework used doesn't properly listen to ACTIVE_LOW and ACTIVE_HIGH flags. The newer gpio framework takes into account these flags when setting gpio values. Since the values being output were based on voltage and not logic they change to reflect this difference. Also use gpiod_set_value_cansleep since wake and reset pins can be provided by bus based io expanders. Switch from msleep(5) to udelay_range(5000,6000) to avoid check patch warning. Signed-off-by: Franklin S Cooper Jr Signed-off-by: Dmitry Torokhov --- include/linux/input/edt-ft5x06.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/input/edt-ft5x06.h b/include/linux/input/edt-ft5x06.h index 8a1e0d1a0124..5ca5b4cc308c 100644 --- a/include/linux/input/edt-ft5x06.h +++ b/include/linux/input/edt-ft5x06.h @@ -10,9 +10,6 @@ */ struct edt_ft5x06_platform_data { - int irq_pin; - int reset_pin; - /* startup defaults for operational parameters */ bool use_parameters; u8 gain; -- cgit v1.2.3 From 22ddbacc4bb54ef8577c46114227c06c31e3c47d Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Sat, 12 Sep 2015 09:37:03 -0700 Subject: Input: edt-ft5x06 - remove support for platform data We do not have any users of platform data in the tree and all newer platforms are either DT or ACPI, so let's drop handling of platform data. Tested-by: Franklin S Cooper Jr Signed-off-by: Dmitry Torokhov --- include/linux/input/edt-ft5x06.h | 21 --------------------- 1 file changed, 21 deletions(-) delete mode 100644 include/linux/input/edt-ft5x06.h (limited to 'include/linux') diff --git a/include/linux/input/edt-ft5x06.h b/include/linux/input/edt-ft5x06.h deleted file mode 100644 index 5ca5b4cc308c..000000000000 --- a/include/linux/input/edt-ft5x06.h +++ /dev/null @@ -1,21 +0,0 @@ -#ifndef _EDT_FT5X06_H -#define _EDT_FT5X06_H - -/* - * Copyright (c) 2012 Simon Budig, - * - * This program is free software; you can redistribute it and/or modify it - * under the terms of the GNU General Public License version 2 as published by - * the Free Software Foundation. - */ - -struct edt_ft5x06_platform_data { - /* startup defaults for operational parameters */ - bool use_parameters; - u8 gain; - u8 threshold; - u8 offset; - u8 report_rate; -}; - -#endif /* _EDT_FT5X06_H */ -- cgit v1.2.3 From 8cb7cf56c9fe5412de238465b27ef35b4d2801aa Mon Sep 17 00:00:00 2001 From: Sudeep Holla Date: Mon, 30 Mar 2015 10:59:52 +0100 Subject: firmware: add support for ARM System Control and Power Interface(SCPI) protocol This patch adds support for System Control and Power Interface (SCPI) Message Protocol used between the Application Cores(AP) and the System Control Processor(SCP). The MHU peripheral provides a mechanism for inter-processor communication between SCP's M3 processor and AP. SCP offers control and management of the core/cluster power states, various power domain DVFS including the core/cluster, certain system clocks configuration, thermal sensors and many others. This protocol driver provides interface for all the client drivers using SCPI to make use of the features offered by the SCP. Signed-off-by: Sudeep Holla Reviewed-by: Jon Medhurst (Tixy) Cc: Jassi Brar Cc: Liviu Dudau Cc: Lorenzo Pieralisi --- include/linux/scpi_protocol.h | 61 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) create mode 100644 include/linux/scpi_protocol.h (limited to 'include/linux') diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h new file mode 100644 index 000000000000..e7169cd54e19 --- /dev/null +++ b/include/linux/scpi_protocol.h @@ -0,0 +1,61 @@ +/* + * SCPI Message Protocol driver header + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#include + +struct scpi_opp { + u32 freq; + u32 m_volt; +} __packed; + +struct scpi_dvfs_info { + unsigned int count; + unsigned int latency; /* in nanoseconds */ + struct scpi_opp *opps; +}; + +/** + * struct scpi_ops - represents the various operations provided + * by SCP through SCPI message protocol + * @get_version: returns the major and minor revision on the SCPI + * message protocol + * @clk_get_range: gets clock range limit(min - max in Hz) + * @clk_get_val: gets clock value(in Hz) + * @clk_set_val: sets the clock value, setting to 0 will disable the + * clock (if supported) + * @dvfs_get_idx: gets the Operating Point of the given power domain. + * OPP is an index to the list return by @dvfs_get_info + * @dvfs_set_idx: sets the Operating Point of the given power domain. + * OPP is an index to the list return by @dvfs_get_info + * @dvfs_get_info: returns the DVFS capabilities of the given power + * domain. It includes the OPP list and the latency information + */ +struct scpi_ops { + u32 (*get_version)(void); + int (*clk_get_range)(u16, unsigned long *, unsigned long *); + unsigned long (*clk_get_val)(u16); + int (*clk_set_val)(u16, unsigned long); + int (*dvfs_get_idx)(u8); + int (*dvfs_set_idx)(u8, u8); + struct scpi_dvfs_info *(*dvfs_get_info)(u8); +}; + +#if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL) +struct scpi_ops *get_scpi_ops(void); +#else +static inline struct scpi_ops *get_scpi_ops(void) { return NULL; } +#endif -- cgit v1.2.3 From 07294cc2ea3000da706fd88c8ec7dcfadc715e14 Mon Sep 17 00:00:00 2001 From: Stefan Koch Date: Mon, 28 Sep 2015 23:59:52 +0200 Subject: USB: Added forgotten parameter description for authorized attribute in usb.h Signed-off-by: Stefan Koch Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 3deccab2ce0a..2cbcf8db517a 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -122,6 +122,8 @@ enum usb_interface_condition { * has been deferred. * @needs_binding: flag set when the driver should be re-probed or unbound * following a reset or suspend operation it doesn't support. + * @authorized: This allows to (de)authorize individual interfaces instead + * a whole device in contrast to the device authorization. * @dev: driver model's view of this device * @usb_dev: if an interface is bound to the USB major, this will point * to the sysfs representation for that device. -- cgit v1.2.3 From 78ccb25861d76a8fc5c678d762180e6918834200 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Fri, 25 Sep 2015 10:49:15 +0300 Subject: net/mlx5_core: Fix wrong name in struct The name refers to syndrome so uset ext_synd instread of ext_sync. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8eb3b19af2a4..41e9f3bd663c 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -440,7 +440,7 @@ struct health_buffer { __be32 rsvd2; u8 irisc_index; u8 synd; - __be16 ext_sync; + __be16 ext_synd; }; struct mlx5_init_seg { -- cgit v1.2.3 From 55acca90da52b85299c033354e51ddaa7b73e019 Mon Sep 17 00:00:00 2001 From: Hante Meuleman Date: Fri, 18 Sep 2015 22:08:17 +0200 Subject: brcmfmac: Add support for the BCM4365 and BCM4366 PCIE devices. This patch adds support for the BCM4365 and BCM4366 11ac Wave2 PCIE devices. Reviewed-by: Arend Van Spriel Reviewed-by: Pieter-Paul Giesberts Signed-off-by: Hante Meuleman Signed-off-by: Arend van Spriel Signed-off-by: Kalle Valo --- include/linux/bcma/bcma.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bcma/bcma.h b/include/linux/bcma/bcma.h index 2ff4a9961e1d..3feb1b2d75d8 100644 --- a/include/linux/bcma/bcma.h +++ b/include/linux/bcma/bcma.h @@ -151,6 +151,8 @@ struct bcma_host_ops { #define BCMA_CORE_PCIE2 0x83C /* PCI Express Gen2 */ #define BCMA_CORE_USB30_DEV 0x83D #define BCMA_CORE_ARM_CR4 0x83E +#define BCMA_CORE_ARM_CA7 0x847 +#define BCMA_CORE_SYS_MEM 0x849 #define BCMA_CORE_DEFAULT 0xFFF #define BCMA_MAX_NR_CORES 16 -- cgit v1.2.3 From 30f2136346cab91e1ffd9ee6370d76809f20487a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 21 Sep 2015 22:58:34 +0200 Subject: irqchip/gicv3-its: Add range check for number of allocated pages The number of pages for the its table may exceed the maximum of 256. Adding a range check and limitting the number to its maximum. Based on a patch from Tirumalesh Chalamarla . Signed-off-by: Tirumalesh Chalamarla Signed-off-by: Robert Richter Reviewed-by: Marc Zyngier Acked-by: Catalin Marinas Cc: linux-arm-kernel@lists.infradead.org Cc: Jason Cooper Link: http://lkml.kernel.org/r/1442869119-1814-2-git-send-email-rric@kernel.org Signed-off-by: Thomas Gleixner --- include/linux/irqchip/arm-gic-v3.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 9eeeb9589acf..c0c8a2ef9d90 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -231,6 +231,7 @@ #define GITS_BASER_PAGE_SIZE_16K (1UL << GITS_BASER_PAGE_SIZE_SHIFT) #define GITS_BASER_PAGE_SIZE_64K (2UL << GITS_BASER_PAGE_SIZE_SHIFT) #define GITS_BASER_PAGE_SIZE_MASK (3UL << GITS_BASER_PAGE_SIZE_SHIFT) +#define GITS_BASER_PAGES_MAX 256 #define GITS_BASER_TYPE_NONE 0 #define GITS_BASER_TYPE_DEVICE 1 -- cgit v1.2.3 From 4593fdbe7a2f44d5e64c627c715dd0bcec9bdf14 Mon Sep 17 00:00:00 2001 From: Akinobu Mita Date: Sun, 27 Sep 2015 02:09:20 +0900 Subject: blk-mq: fix sysfs registration/unregistration race There is a race between cpu hotplug handling and adding/deleting gendisk for blk-mq, where both are trying to register and unregister the same sysfs entries. null_add_dev --> blk_mq_init_queue --> blk_mq_init_allocated_queue --> add to 'all_q_list' (*) --> add_disk --> blk_register_queue --> blk_mq_register_disk (++) null_del_dev --> del_gendisk --> blk_unregister_queue --> blk_mq_unregister_disk (--) --> blk_cleanup_queue --> blk_mq_free_queue --> del from 'all_q_list' (*) blk_mq_queue_reinit --> blk_mq_sysfs_unregister (-) --> blk_mq_sysfs_register (+) While the request queue is added to 'all_q_list' (*), blk_mq_queue_reinit() can be called for the queue anytime by CPU hotplug callback. But blk_mq_sysfs_unregister (-) and blk_mq_sysfs_register (+) in blk_mq_queue_reinit must not be called before blk_mq_register_disk (++) and after blk_mq_unregister_disk (--) is finished. Because '/sys/block/*/mq/' is not exists. There has already been BLK_MQ_F_SYSFS_UP flag in hctx->flags which can be used to track these sysfs stuff, but it is only fixing this issue partially. In order to fix it completely, we just need per-queue flag instead of per-hctx flag with appropriate locking. So this introduces q->mq_sysfs_init_done which is properly protected with all_q_mutex. Also, we need to ensure that blk_mq_map_swqueue() is called with all_q_mutex is held. Since hctx->nr_ctx is reset temporarily and updated in blk_mq_map_swqueue(), so we should avoid blk_mq_register_hctx() seeing the temporary hctx->nr_ctx value in CPU hotplug handling or adding/deleting gendisk . Signed-off-by: Akinobu Mita Reviewed-by: Ming Lei Cc: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - include/linux/blkdev.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 37d1602c4f7a..b80ba4572a31 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -145,7 +145,6 @@ enum { BLK_MQ_F_SHOULD_MERGE = 1 << 0, BLK_MQ_F_TAG_SHARED = 1 << 1, BLK_MQ_F_SG_MERGE = 1 << 2, - BLK_MQ_F_SYSFS_UP = 1 << 3, BLK_MQ_F_DEFER_ISSUE = 1 << 4, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99da9ebc7377..19c2e947d4d1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -456,6 +456,8 @@ struct request_queue { struct blk_mq_tag_set *tag_set; struct list_head tag_set_list; struct bio_set *bio_split; + + bool mq_sysfs_init_done; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -- cgit v1.2.3 From d815d90bbbc08777c0e3a36f57b97fc4a4fb3150 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 Sep 2015 15:07:28 -0500 Subject: netfilter: Push struct net down into nf_afinfo.reroute The network namespace is needed when routing a packet. Stop making nf_afinfo.reroute guess which network namespace is the proper namespace to route the packet in. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 987c74cd523c..165ab2d14734 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -283,7 +283,7 @@ struct nf_afinfo { struct flowi *fl, bool strict); void (*saveroute)(const struct sk_buff *skb, struct nf_queue_entry *entry); - int (*reroute)(struct sk_buff *skb, + int (*reroute)(struct net *net, struct sk_buff *skb, const struct nf_queue_entry *entry); int route_key_size; }; -- cgit v1.2.3 From e45f50660ee5fd38a540afabb7c0f65d063db631 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 Sep 2015 15:07:30 -0500 Subject: ipv4: Pass struct net into ip_route_me_harder Don't make ip_route_me_harder guess which network namespace it is routing in, pass the network namespace in. Signed-off-by: "Eric W. Biederman" Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv4.h b/include/linux/netfilter_ipv4.h index 6e4591bb54d4..98c03b2462b5 100644 --- a/include/linux/netfilter_ipv4.h +++ b/include/linux/netfilter_ipv4.h @@ -6,7 +6,7 @@ #include -int ip_route_me_harder(struct sk_buff *skb, unsigned addr_type); +int ip_route_me_harder(struct net *net, struct sk_buff *skb, unsigned addr_type); __sum16 nf_ip_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); #endif /*__LINUX_IP_NETFILTER_H*/ -- cgit v1.2.3 From 5f5d74d723146c5b97c7318b5851af15b30e3304 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 25 Sep 2015 15:07:31 -0500 Subject: ipv6: Pass struct net into ip6_route_me_harder Don't make ip6_route_me_harder guess which network namespace it is routing in, pass the network namespace in. Signed-off-by: Eric W. Biederman Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ipv6.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 771574677e83..2ac8369fa96c 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -22,7 +22,7 @@ struct nf_ipv6_ops { }; #ifdef CONFIG_NETFILTER -int ip6_route_me_harder(struct sk_buff *skb); +int ip6_route_me_harder(struct net *net, struct sk_buff *skb); __sum16 nf_ip6_checksum(struct sk_buff *skb, unsigned int hook, unsigned int dataoff, u_int8_t protocol); -- cgit v1.2.3 From 53bb724f94f57b67213e08a4c155c8c5eb74c644 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Mon, 21 Sep 2015 13:26:59 -0700 Subject: mtd: provide proper 32/64-bit compat_ioctl() support for BLKPG After a bit of poking around wondering why my 32-bit user-space can't seem to send a proper ioctl(BLKPG) to an MTD on my 64-bit kernel (ARM64), I noticed that struct blkpg_ioctl_arg is actually pretty unsuitable for use in the ioctl() ABI, due to its use of raw pointers, and its lack of alignment/packing restrictions (32-bit arch'es tend to pack the 4 fields into 4 32-bit words, whereas 64-bit arch'es would add padding after the third int, and make this 6 32-bit words). Anyway, this means BLKPG deserves some special compat_ioctl handling. Do the conversion in a small shim for MTD. block/compat_ioctl.c already has compat support for the block subsystem, but it does so by a re-marshalling data to/from user-space (see compat_blkpg_ioctl()). Personally, I think this approach is cleaner. Tested only on MTD, with an ARM32 user space on an ARM64 kernel. Signed-off-by: Brian Norris --- include/linux/blkpg.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 include/linux/blkpg.h (limited to 'include/linux') diff --git a/include/linux/blkpg.h b/include/linux/blkpg.h new file mode 100644 index 000000000000..bef124fde61e --- /dev/null +++ b/include/linux/blkpg.h @@ -0,0 +1,21 @@ +#ifndef _LINUX_BLKPG_H +#define _LINUX_BLKPG_H + +/* + * Partition table and disk geometry handling + */ + +#include +#include + +#ifdef CONFIG_COMPAT +/* For 32-bit/64-bit compatibility of struct blkpg_ioctl_arg */ +struct blkpg_compat_ioctl_arg { + compat_int_t op; + compat_int_t flags; + compat_int_t datalen; + compat_uptr_t data; +}; +#endif + +#endif /* _LINUX_BLKPG_H */ -- cgit v1.2.3 From 2094acbb714e24e464c810c2d8fa57493fcb25a6 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 28 Sep 2015 11:10:31 -0700 Subject: net/ipv4: Pass proto as u8 instead of u16 in ip_check_mc_rcu This patch updates ip_check_mc_rcu so that protocol is passed as a u8 instead of a u16. The motivation is just to avoid any unneeded type transitions since some systems will require an instruction to zero extend a u8 field to a u16. Also it makes it a bit more readable as to the fact that protocol is a u8 so there are no byte ordering changes needed to pass it. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/igmp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/igmp.h b/include/linux/igmp.h index 908429216d9f..9c9de11549a7 100644 --- a/include/linux/igmp.h +++ b/include/linux/igmp.h @@ -110,7 +110,7 @@ struct ip_mc_list { #define IGMPV3_QQIC(value) IGMPV3_EXP(0x80, 4, 3, value) #define IGMPV3_MRC(value) IGMPV3_EXP(0x80, 4, 3, value) -extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u16 proto); +extern int ip_check_mc_rcu(struct in_device *dev, __be32 mc_addr, __be32 src_addr, u8 proto); extern int igmp_rcv(struct sk_buff *); extern int ip_mc_join_group(struct sock *sk, struct ip_mreqn *imr); extern int ip_mc_leave_group(struct sock *sk, struct ip_mreqn *imr); -- cgit v1.2.3 From 31b33dfb0a144469dd805514c9e63f4993729a48 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Mon, 28 Sep 2015 17:24:25 -0700 Subject: skbuff: Fix skb checksum partial check. Earlier patch 6ae459bda tried to detect void ckecksum partial skb by comparing pull length to checksum offset. But it does not work for all cases since checksum-offset depends on updates to skb->data. Following patch fixes it by validating checksum start offset after skb-data pointer is updated. Negative value of checksum offset start means there is no need to checksum. Fixes: 6ae459bda ("skbuff: Fix skb checksum flag on skb pull") Reported-by: Andrew Vagin Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/skbuff.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 2b0a30a6e31c..4398411236f1 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -2708,7 +2708,7 @@ static inline void skb_postpull_rcsum(struct sk_buff *skb, if (skb->ip_summed == CHECKSUM_COMPLETE) skb->csum = csum_sub(skb->csum, csum_partial(start, len, 0)); else if (skb->ip_summed == CHECKSUM_PARTIAL && - skb_checksum_start_offset(skb) <= len) + skb_checksum_start_offset(skb) < 0) skb->ip_summed = CHECKSUM_NONE; } -- cgit v1.2.3 From 0536fcc039a8926ec12ec587f41a83f7acafeb82 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 29 Sep 2015 07:42:52 -0700 Subject: tcp: prepare fastopen code for upcoming listener changes While auditing TCP stack for upcoming 'lockless' listener changes, I found I had to change fastopen_init_queue() to properly init the object before publishing it. Otherwise an other cpu could try to lock the spinlock before it gets properly initialized. Instead of adding appropriate barriers, just remove dynamic memory allocations : - Structure is 28 bytes on 64bit arches. Using additional 8 bytes for holding a pointer seems overkill. - Two listeners can share same cache line and performance would suffer. If we really want to save few bytes, we would instead dynamically allocate whole struct request_sock_queue in the future. Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index fcb573be75d9..e442e6e9a365 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -382,25 +382,11 @@ static inline bool tcp_passive_fastopen(const struct sock *sk) tcp_sk(sk)->fastopen_rsk != NULL); } -extern void tcp_sock_destruct(struct sock *sk); - -static inline int fastopen_init_queue(struct sock *sk, int backlog) +static inline void fastopen_queue_tune(struct sock *sk, int backlog) { - struct request_sock_queue *queue = - &inet_csk(sk)->icsk_accept_queue; - - if (queue->fastopenq == NULL) { - queue->fastopenq = kzalloc( - sizeof(struct fastopen_queue), - sk->sk_allocation); - if (queue->fastopenq == NULL) - return -ENOMEM; - - sk->sk_destruct = tcp_sock_destruct; - spin_lock_init(&queue->fastopenq->lock); - } - queue->fastopenq->max_qlen = backlog; - return 0; + struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + + queue->fastopenq.max_qlen = backlog; } static inline void tcp_saved_syn_free(struct tcp_sock *tp) -- cgit v1.2.3 From 007979eaf94d1c888d8c7cf8a5250c2c6c9bd98e Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 20:07:10 -0700 Subject: net: Rename IFF_VRF_MASTER to IFF_L3MDEV_MASTER Rename IFF_VRF_MASTER to IFF_L3MDEV_MASTER and update the name of the netif_is_vrf and netif_index_is_vrf macros. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index d2ffeafc9998..99c33e83822f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1258,7 +1258,7 @@ struct net_device_ops { * @IFF_LIVE_ADDR_CHANGE: device supports hardware address * change when it's running * @IFF_MACVLAN: Macvlan device - * @IFF_VRF_MASTER: device is a VRF master + * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master */ @@ -1283,7 +1283,7 @@ enum netdev_priv_flags { IFF_XMIT_DST_RELEASE_PERM = 1<<17, IFF_IPVLAN_MASTER = 1<<18, IFF_IPVLAN_SLAVE = 1<<19, - IFF_VRF_MASTER = 1<<20, + IFF_L3MDEV_MASTER = 1<<20, IFF_NO_QUEUE = 1<<21, IFF_OPENVSWITCH = 1<<22, }; @@ -1308,7 +1308,7 @@ enum netdev_priv_flags { #define IFF_XMIT_DST_RELEASE_PERM IFF_XMIT_DST_RELEASE_PERM #define IFF_IPVLAN_MASTER IFF_IPVLAN_MASTER #define IFF_IPVLAN_SLAVE IFF_IPVLAN_SLAVE -#define IFF_VRF_MASTER IFF_VRF_MASTER +#define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER #define IFF_NO_QUEUE IFF_NO_QUEUE #define IFF_OPENVSWITCH IFF_OPENVSWITCH @@ -3824,9 +3824,9 @@ static inline bool netif_supports_nofcs(struct net_device *dev) return dev->priv_flags & IFF_SUPP_NOFCS; } -static inline bool netif_is_vrf(const struct net_device *dev) +static inline bool netif_is_l3_master(const struct net_device *dev) { - return dev->priv_flags & IFF_VRF_MASTER; + return dev->priv_flags & IFF_L3MDEV_MASTER; } static inline bool netif_is_bridge_master(const struct net_device *dev) @@ -3839,7 +3839,7 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } -static inline bool netif_index_is_vrf(struct net *net, int ifindex) +static inline bool netif_index_is_l3_master(struct net *net, int ifindex) { bool rc = false; @@ -3853,7 +3853,7 @@ static inline bool netif_index_is_vrf(struct net *net, int ifindex) dev = dev_get_by_index_rcu(net, ifindex); if (dev) - rc = netif_is_vrf(dev); + rc = netif_is_l3_master(dev); rcu_read_unlock(); #endif -- cgit v1.2.3 From 1b69c6d0ae90b7f1a4f61d5c8209d5cb7a55f849 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 20:07:11 -0700 Subject: net: Introduce L3 Master device abstraction L3 master devices allow users of the abstraction to influence FIB lookups for enslaved devices. Current API provides a means for the master device to return a specific FIB table for an enslaved device, to return an rtable/custom dst and influence the OIF used for fib lookups. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 99c33e83822f..c7f14794fe14 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1587,6 +1587,9 @@ struct net_device { #ifdef CONFIG_NET_SWITCHDEV const struct switchdev_ops *switchdev_ops; #endif +#ifdef CONFIG_NET_L3_MASTER_DEV + const struct l3mdev_ops *l3mdev_ops; +#endif const struct header_ops *header_ops; -- cgit v1.2.3 From 93a7e7e837af6846052481da974320c19ab82e5c Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 20:07:16 -0700 Subject: net: Remove the now unused vrf_ptr Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index c7f14794fe14..72bf9e37a2f0 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1427,7 +1427,6 @@ enum netdev_priv_flags { * @dn_ptr: DECnet specific data * @ip6_ptr: IPv6 specific data * @ax25_ptr: AX.25 specific data - * @vrf_ptr: VRF specific data * @ieee80211_ptr: IEEE 802.11 specific data, assign before registering * * @last_rx: Time of last Rx @@ -1649,7 +1648,6 @@ struct net_device { struct dn_dev __rcu *dn_ptr; struct inet6_dev __rcu *ip6_ptr; void *ax25_ptr; - struct net_vrf_dev __rcu *vrf_ptr; struct wireless_dev *ieee80211_ptr; struct wpan_dev *ieee802154_ptr; #if IS_ENABLED(CONFIG_MPLS_ROUTING) -- cgit v1.2.3 From 9478d12d33ad12d29c5343ae7346b51bc1f4c5a9 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Tue, 29 Sep 2015 20:07:18 -0700 Subject: net: Move netif_index_is_l3_master to l3mdev.h Change CONFIG dependency to CONFIG_NET_L3_MASTER_DEV as well. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 72bf9e37a2f0..b9450784ae06 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3840,27 +3840,6 @@ static inline bool netif_is_ovs_master(const struct net_device *dev) return dev->priv_flags & IFF_OPENVSWITCH; } -static inline bool netif_index_is_l3_master(struct net *net, int ifindex) -{ - bool rc = false; - -#if IS_ENABLED(CONFIG_NET_VRF) - struct net_device *dev; - - if (ifindex == 0) - return false; - - rcu_read_lock(); - - dev = dev_get_by_index_rcu(net, ifindex); - if (dev) - rc = netif_is_l3_master(dev); - - rcu_read_unlock(); -#endif - return rc; -} - /* This device needs to keep skb dst for qdisc enqueue or ndo_start_xmit() */ static inline void netif_keep_dst(struct net_device *dev) { -- cgit v1.2.3 From 7d8c6e391575ee86c870b88635a163743fca9eac Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 12 Jun 2015 22:12:04 -0500 Subject: ipv6: Pass struct net through ip6_fragment Signed-off-by: Eric W. Biederman --- include/linux/netfilter_ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ipv6.h b/include/linux/netfilter_ipv6.h index 2ac8369fa96c..47c6b04c28c0 100644 --- a/include/linux/netfilter_ipv6.h +++ b/include/linux/netfilter_ipv6.h @@ -17,8 +17,8 @@ struct nf_ipv6_ops { int (*chk_addr)(struct net *net, const struct in6_addr *addr, const struct net_device *dev, int strict); void (*route_input)(struct sk_buff *skb); - int (*fragment)(struct sock *sk, struct sk_buff *skb, - int (*output)(struct sock *, struct sk_buff *)); + int (*fragment)(struct net *net, struct sock *sk, struct sk_buff *skb, + int (*output)(struct net *, struct sock *, struct sk_buff *)); }; #ifdef CONFIG_NETFILTER -- cgit v1.2.3 From ce6f7496050e29108bcefe0413df1789648fb38f Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 29 Sep 2015 22:57:24 +0900 Subject: extcon: gpio: Add the missing supported_cable parameter to devm_extcon_dev_allocate() The commit 2a9de9c0 ("extcon: Use the unique id for external connector instead of string") defines the unique id of each external connector to identify the type of external connector instead of string name. So, devm_extcon_dev_allocate() should include the second parameter (unsigned int *supported_cable). This patch adds the supported_cable parameter which is passed by platform data. Signed-off-by: Chanwoo Choi --- include/linux/extcon/extcon-gpio.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/extcon/extcon-gpio.h b/include/linux/extcon/extcon-gpio.h index 0b17ad43fbfc..232bb8f80b51 100644 --- a/include/linux/extcon/extcon-gpio.h +++ b/include/linux/extcon/extcon-gpio.h @@ -26,6 +26,7 @@ /** * struct gpio_extcon_platform_data - A simple GPIO-controlled extcon device. * @name: The name of this GPIO extcon device. + * @extcon_id: The unique id of specific external connector. * @gpio: Corresponding GPIO. * @gpio_active_low: Boolean describing whether gpio active state is 1 or 0 * If true, low state of gpio means active. @@ -45,6 +46,7 @@ */ struct gpio_extcon_platform_data { const char *name; + unsigned int extcon_id; unsigned gpio; bool gpio_active_low; unsigned long debounce; -- cgit v1.2.3 From 6ba129974a3d6bcb236cbd96aa1db6001d163678 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Tue, 29 Sep 2015 22:59:55 +0900 Subject: extcon: gpio: Fix minor coding style and remove the unused fields. This patch fixes the minor coding style about indentation and removes the unused fields from struct gpio_extcon_platform_data. Signed-off-by: Chanwoo Choi --- include/linux/extcon/extcon-gpio.h | 22 ++++------------------ 1 file changed, 4 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon/extcon-gpio.h b/include/linux/extcon/extcon-gpio.h index 232bb8f80b51..7cacafb78b09 100644 --- a/include/linux/extcon/extcon-gpio.h +++ b/include/linux/extcon/extcon-gpio.h @@ -1,5 +1,5 @@ /* - * External connector (extcon) class generic GPIO driver + * Single-state GPIO extcon driver based on extcon class * * Copyright (C) 2012 Samsung Electronics * Author: MyungJoo Ham @@ -16,16 +16,14 @@ * but WITHOUT ANY WARRANTY; without even the implied warranty of * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the * GNU General Public License for more details. - * -*/ + */ #ifndef __EXTCON_GPIO_H__ #define __EXTCON_GPIO_H__ __FILE__ #include /** - * struct gpio_extcon_platform_data - A simple GPIO-controlled extcon device. - * @name: The name of this GPIO extcon device. + * struct gpio_extcon_pdata - A simple GPIO-controlled extcon device. * @extcon_id: The unique id of specific external connector. * @gpio: Corresponding GPIO. * @gpio_active_low: Boolean describing whether gpio active state is 1 or 0 @@ -33,28 +31,16 @@ * If false, high state of gpio means active. * @debounce: Debounce time for GPIO IRQ in ms. * @irq_flags: IRQ Flags (e.g., IRQF_TRIGGER_LOW). - * @state_on: print_state is overriden with state_on if attached. - * If NULL, default method of extcon class is used. - * @state_off: print_state is overriden with state_off if detached. - * If NUll, default method of extcon class is used. * @check_on_resume: Boolean describing whether to check the state of gpio * while resuming from sleep. - * - * Note that in order for state_on or state_off to be valid, both state_on - * and state_off should be not NULL. If at least one of them is NULL, - * the print_state is not overriden. */ -struct gpio_extcon_platform_data { - const char *name; +struct gpio_extcon_pdata { unsigned int extcon_id; unsigned gpio; bool gpio_active_low; unsigned long debounce; unsigned long irq_flags; - /* if NULL, "0" or "1" will be printed */ - const char *state_on; - const char *state_off; bool check_on_resume; }; -- cgit v1.2.3 From 9ae7ce00cc1353155b1914bfc40e8362efef7d1c Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 29 Sep 2015 18:21:18 +0900 Subject: usb: renesas_usbhs: fix build warning if 64-bit architecture This patch fixes the following warning if 64-bit architecture environment: ./drivers/usb/renesas_usbhs/common.c:496:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] dparam->type = of_id ? (u32)of_id->data : 0; Acked-by: Geert Uytterhoeven Signed-off-by: Yoshihiro Shimoda Signed-off-by: Felipe Balbi --- include/linux/usb/renesas_usbhs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index 3dd5a781da99..bfb74723f151 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -157,7 +157,7 @@ struct renesas_usbhs_driver_param { */ int pio_dma_border; /* default is 64byte */ - u32 type; + uintptr_t type; u32 enable_gpio; /* -- cgit v1.2.3 From 8a3e33cf92c7b7ae25c589eccd1a69ab11cc4353 Mon Sep 17 00:00:00 2001 From: Manuel Lauss Date: Wed, 30 Sep 2015 21:10:25 +0200 Subject: ata: ahci: find eSATA ports and flag them as removable If the AHCI ports' HPCP or ESP bits are set, the port should be considered external (e.g. eSATA) and is marked as removable. Userspace tools like udisks then treat it like an usb drive. With this patch applied, when I plug a drive into the esata port, KDE pops up a window asking what to do with the drives(s), just like it does for any random USB stick. Removability is indicated to the upper layers by way of the SCSI RMB bit, as I haven't found another way to signal userspace to treat a sata disk like any usb stick. Signed-off-by: Manuel Lauss Signed-off-by: Tejun Heo --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index c9cfbcdb8d14..83577f8fd15b 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -254,6 +254,7 @@ enum { ATA_PFLAG_PIO32 = (1 << 20), /* 32bit PIO */ ATA_PFLAG_PIO32CHANGE = (1 << 21), /* 32bit PIO can be turned on/off */ + ATA_PFLAG_EXTERNAL = (1 << 22), /* eSATA/external port */ /* struct ata_queued_cmd flags */ ATA_QCFLAG_ACTIVE = (1 << 0), /* cmd not yet ack'd to scsi lyer */ -- cgit v1.2.3 From e647b532275bb357e87272e052fccf5fcdb36a17 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Sep 2015 15:49:12 +0100 Subject: ACPI: Add early device probing infrastructure IRQ controllers and timers are the two types of device the kernel requires before being able to use the device driver model. ACPI so far lacks a proper probing infrastructure similar to the one we have with DT, where we're able to declare IRQ chips and clocksources inside the driver code, and let the core code pick it up and call us back on a match. This leads to all kind of really ugly hacks all over the arm64 code and even in the ACPI layer. In order to allow some basic probing based on the ACPI tables, introduce "struct acpi_probe_entry" which contains just enough data and callbacks to match a table, an optional subtable, and call a probe function. A driver can, at build time, register itself and expect being called if the right entry exists in the ACPI table. A acpi_probe_device_table() is provided, taking an identifier for a set of acpi_prove_entries, and iterating over the registered entries. Signed-off-by: Marc Zyngier Reviewed-by: Lorenzo Pieralisi Reviewed-by: Hanjun Guo Acked-by: Thomas Gleixner Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 66 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 66 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 865d948c60e6..815f5f62513f 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -773,6 +773,61 @@ int acpi_dev_prop_read(struct acpi_device *adev, const char *propname, struct fwnode_handle *acpi_get_next_subnode(struct device *dev, struct fwnode_handle *subnode); + +struct acpi_probe_entry; +typedef bool (*acpi_probe_entry_validate_subtbl)(struct acpi_subtable_header *, + struct acpi_probe_entry *); + +#define ACPI_TABLE_ID_LEN 5 + +/** + * struct acpi_probe_entry - boot-time probing entry + * @id: ACPI table name + * @type: Optional subtable type to match + * (if @id contains subtables) + * @subtable_valid: Optional callback to check the validity of + * the subtable + * @probe_table: Callback to the driver being probed when table + * match is successful + * @probe_subtbl: Callback to the driver being probed when table and + * subtable match (and optional callback is successful) + * @driver_data: Sideband data provided back to the driver + */ +struct acpi_probe_entry { + __u8 id[ACPI_TABLE_ID_LEN]; + __u8 type; + acpi_probe_entry_validate_subtbl subtable_valid; + union { + acpi_tbl_table_handler probe_table; + acpi_tbl_entry_handler probe_subtbl; + }; + kernel_ulong_t driver_data; +}; + +#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, valid, data, fn) \ + static const struct acpi_probe_entry __acpi_probe_##name \ + __used __section(__##table##_acpi_probe_table) \ + = { \ + .id = table_id, \ + .type = subtable, \ + .subtable_valid = valid, \ + .probe_table = (acpi_tbl_table_handler)fn, \ + .driver_data = data, \ + } + +#define ACPI_PROBE_TABLE(name) __##name##_acpi_probe_table +#define ACPI_PROBE_TABLE_END(name) __##name##_acpi_probe_table_end + +int __acpi_probe_device_table(struct acpi_probe_entry *start, int nr); + +#define acpi_probe_device_table(t) \ + ({ \ + extern struct acpi_probe_entry ACPI_PROBE_TABLE(t), \ + ACPI_PROBE_TABLE_END(t); \ + __acpi_probe_device_table(&ACPI_PROBE_TABLE(t), \ + (&ACPI_PROBE_TABLE_END(t) - \ + &ACPI_PROBE_TABLE(t))); \ + }) #else static inline int acpi_dev_get_property(struct acpi_device *adev, const char *name, acpi_object_type type, @@ -831,6 +886,17 @@ static inline struct fwnode_handle *acpi_get_next_subnode(struct device *dev, { return NULL; } + +#define ACPI_DECLARE_PROBE_ENTRY(table, name, table_id, subtable, validate, data, fn) \ + static const void * __acpi_table_##name[] \ + __attribute__((unused)) \ + = { (void *) table_id, \ + (void *) subtable, \ + (void *) valid, \ + (void *) fn, \ + (void *) data } + +#define acpi_probe_device_table(t) ({ int __r = 0; __r;}) #endif #endif /*_LINUX_ACPI_H*/ -- cgit v1.2.3 From 46e589a391809627144e6bee93d71d73fe915db2 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Sep 2015 15:49:13 +0100 Subject: irqchip / ACPI: Add probing infrastructure for ACPI-based irqchips DT enjoys a rather nice probing infrastructure for irqchips, while ACPI is so far stuck into a very distant past. This patch introduces a declarative API, allowing irqchips to be self-contained and be called when a particular entry is matched in the MADT table. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Reviewed-by: Hanjun Guo Acked-by: Thomas Gleixner Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi_irq.h | 10 ---------- include/linux/irqchip.h | 17 +++++++++++++++++ 2 files changed, 17 insertions(+), 10 deletions(-) delete mode 100644 include/linux/acpi_irq.h (limited to 'include/linux') diff --git a/include/linux/acpi_irq.h b/include/linux/acpi_irq.h deleted file mode 100644 index f10c87265855..000000000000 --- a/include/linux/acpi_irq.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef _LINUX_ACPI_IRQ_H -#define _LINUX_ACPI_IRQ_H - -#include - -#ifndef acpi_irq_init -static inline void acpi_irq_init(void) { } -#endif - -#endif /* _LINUX_ACPI_IRQ_H */ diff --git a/include/linux/irqchip.h b/include/linux/irqchip.h index 638887376e58..89c34b200671 100644 --- a/include/linux/irqchip.h +++ b/include/linux/irqchip.h @@ -11,6 +11,7 @@ #ifndef _LINUX_IRQCHIP_H #define _LINUX_IRQCHIP_H +#include #include /* @@ -25,6 +26,22 @@ */ #define IRQCHIP_DECLARE(name, compat, fn) OF_DECLARE_2(irqchip, name, compat, fn) +/* + * This macro must be used by the different irqchip drivers to declare + * the association between their version and their initialization function. + * + * @name: name that must be unique accross all IRQCHIP_ACPI_DECLARE of the + * same file. + * @subtable: Subtable to be identified in MADT + * @validate: Function to be called on that subtable to check its validity. + * Can be NULL. + * @data: data to be checked by the validate function. + * @fn: initialization function + */ +#define IRQCHIP_ACPI_DECLARE(name, subtable, validate, data, fn) \ + ACPI_DECLARE_PROBE_ENTRY(irqchip, name, ACPI_SIG_MADT, \ + subtable, validate, data, fn) + #ifdef CONFIG_IRQCHIP void irqchip_init(void); #else -- cgit v1.2.3 From f26527b1428f379fbd7edf779854c3b41bc0b3e5 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Sep 2015 15:49:14 +0100 Subject: irqchip / GIC: Convert the GIC driver to ACPI probing Now that we have a basic infrastructure to register irqchips and call them on discovery of a matching entry in MADT, convert the GIC driver to this new probing method. It ends up being a code deletion party, which is a rather good thing. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Reviewed-by: Hanjun Guo Acked-by: Thomas Gleixner Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/irqchip/arm-gic-acpi.h | 31 ------------------------------- 1 file changed, 31 deletions(-) delete mode 100644 include/linux/irqchip/arm-gic-acpi.h (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-acpi.h b/include/linux/irqchip/arm-gic-acpi.h deleted file mode 100644 index de3419ed3937..000000000000 --- a/include/linux/irqchip/arm-gic-acpi.h +++ /dev/null @@ -1,31 +0,0 @@ -/* - * Copyright (C) 2014, Linaro Ltd. - * Author: Tomasz Nowicki - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - */ - -#ifndef ARM_GIC_ACPI_H_ -#define ARM_GIC_ACPI_H_ - -#ifdef CONFIG_ACPI - -/* - * Hard code here, we can not get memory size from MADT (but FDT does), - * Actually no need to do that, because this size can be inferred - * from GIC spec. - */ -#define ACPI_GICV2_DIST_MEM_SIZE (SZ_4K) -#define ACPI_GIC_CPU_IF_MEM_SIZE (SZ_8K) - -struct acpi_table_header; - -int gic_v2_acpi_init(struct acpi_table_header *table); -void acpi_gic_init(void); -#else -static inline void acpi_gic_init(void) { } -#endif - -#endif /* ARM_GIC_ACPI_H_ */ -- cgit v1.2.3 From c625f76a9910b9d51df5d6ca40a8da0684326996 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Sep 2015 15:49:15 +0100 Subject: clocksource / ACPI: Add probing infrastructure for ACPI-based clocksources DT enjoys a rather nice probing infrastructure for clocksources, while ACPI is so far stuck into a very distant past. This patch introduces a declarative API, allowing clocksources to be self-contained and be called when parsing the GTDT table. Signed-off-by: Marc Zyngier Acked-by: Thomas Gleixner Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/clocksource.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 278dd279a7a8..8cde04803d91 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -258,4 +258,7 @@ void acpi_generic_timer_init(void); static inline void acpi_generic_timer_init(void) { } #endif +#define CLOCKSOURCE_ACPI_DECLARE(name, table_id, fn) \ + ACPI_DECLARE_PROBE_ENTRY(clksrc, name, table_id, 0, NULL, 0, fn) + #endif /* _LINUX_CLOCKSOURCE_H */ -- cgit v1.2.3 From aad83b15aa21f2d9e46b978b27bc63989e61202d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Sep 2015 15:49:16 +0100 Subject: clocksource: Add new CLKSRC_{PROBE,ACPI} config symbols The clocksource probing infrastructure currently depends on CONFIG_CLKSRC_OF, which depends on CONFIG_OF. In order to make this infrastructure selectable even if CONFIG_OF is not selected, introduce a new CONFIG_CLKSRC_PROBE (which allow the infrastructure to be compiled in), and CONFIG_CLKSRC_ACPI (which is the pendent of CONFIG_CLKSRC_OF for ACPI). Signed-off-by: Marc Zyngier Acked-by: Thomas Gleixner Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/clocksource.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 8cde04803d91..6eab6abf89b3 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -246,7 +246,7 @@ extern int clocksource_i8253_init(void); #define CLOCKSOURCE_OF_DECLARE(name, compat, fn) \ OF_DECLARE_1(clksrc, name, compat, fn) -#ifdef CONFIG_CLKSRC_OF +#ifdef CONFIG_CLKSRC_PROBE extern void clocksource_of_init(void); #else static inline void clocksource_of_init(void) {} -- cgit v1.2.3 From ae281cbd2689200329afe2474b2f39f3f6eb54b9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Sep 2015 15:49:17 +0100 Subject: clocksource / arm_arch_timer: Convert to ACPI probing It is now absolutely trivial to convert the arch timer driver to use ACPI probing, just like its DT counterpart. Let's enjoy another crapectomy. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Acked-by: Thomas Gleixner Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/clocksource.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 6eab6abf89b3..116645f746c1 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -252,12 +252,6 @@ extern void clocksource_of_init(void); static inline void clocksource_of_init(void) {} #endif -#ifdef CONFIG_ACPI -void acpi_generic_timer_init(void); -#else -static inline void acpi_generic_timer_init(void) { } -#endif - #define CLOCKSOURCE_ACPI_DECLARE(name, table_id, fn) \ ACPI_DECLARE_PROBE_ENTRY(clksrc, name, table_id, 0, NULL, 0, fn) -- cgit v1.2.3 From 3722ed2380ad6e89eaf81fcf93f06d605e740435 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Mon, 28 Sep 2015 15:49:18 +0100 Subject: clocksource: cosmetic: Drop OF 'dependency' from symbols Seeing the 'of' characters in a symbol that is being called from ACPI seems to freak out people. So let's do a bit of pointless renaming so that these folks do feel at home. Signed-off-by: Marc Zyngier Acked-by: Catalin Marinas Reviewed-by: Hanjun Guo Acked-by: Thomas Gleixner Tested-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/clocksource.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clocksource.h b/include/linux/clocksource.h index 116645f746c1..7784b597e959 100644 --- a/include/linux/clocksource.h +++ b/include/linux/clocksource.h @@ -247,9 +247,9 @@ extern int clocksource_i8253_init(void); OF_DECLARE_1(clksrc, name, compat, fn) #ifdef CONFIG_CLKSRC_PROBE -extern void clocksource_of_init(void); +extern void clocksource_probe(void); #else -static inline void clocksource_of_init(void) {} +static inline void clocksource_probe(void) {} #endif #define CLOCKSOURCE_ACPI_DECLARE(name, table_id, fn) \ -- cgit v1.2.3 From 9290a16cf19301224556bc7bcb913c0c2a45bb9a Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Fri, 21 Aug 2015 11:48:37 +0000 Subject: dmaengine: OF DMAEngine API based on CONFIG_DMA_OF instead of CONFIG_OF 5fa422c ("dmaengine: move drivers/of/dma.c -> drivers/dma/of-dma.c") moved OF base DMAEngine code to of-dma.c, then it based on CONFIG_DMA_OF. But, OF base DMAEngine API on of_dma.h still based on CONFIG_OF now. So, current kernel can't find OF base DMAEngine API if .config has CONFIG_OF, but not have CONFIG_DMA_OF. This patch tidyup it. Signed-off-by: Kuninori Morimoto Signed-off-by: Vinod Koul --- include/linux/of_dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index 98ba7525929e..36112cdd665a 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -34,7 +34,7 @@ struct of_dma_filter_info { dma_filter_fn filter_fn; }; -#ifdef CONFIG_OF +#ifdef CONFIG_DMA_OF extern int of_dma_controller_register(struct device_node *np, struct dma_chan *(*of_dma_xlate) (struct of_phandle_args *, struct of_dma *), -- cgit v1.2.3 From f4829a9b7a61e159367350008a608b062c4f6840 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 27 Sep 2015 21:01:50 +0200 Subject: blk-mq: fix racy updates of rq->errors blk_mq_complete_request may be a no-op if the request has already been completed by others means (e.g. a timeout or cancellation), but currently drivers have to set rq->errors before calling blk_mq_complete_request, which might leave us with the wrong error value. Add an error parameter to blk_mq_complete_request so that we can defer setting rq->errors until we known we won the race to complete the request. Signed-off-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b80ba4572a31..c1b5c867ff07 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -214,7 +214,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); void blk_mq_cancel_requeue_work(struct request_queue *q); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_abort_requeue_list(struct request_queue *q); -void blk_mq_complete_request(struct request *rq); +void blk_mq_complete_request(struct request *rq, int error); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); -- cgit v1.2.3 From 0bf6cd5b9531bcc29c0a5e504b6ce2984c6fd8d8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 27 Sep 2015 21:01:51 +0200 Subject: blk-mq: factor out a helper to iterate all tags for a request_queue And replace the blk_mq_tag_busy_iter with it - the driver use has been replaced with a new helper a while ago, and internal to the block we only need the new version. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index c1b5c867ff07..5e7d43ab61c0 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -223,8 +223,6 @@ void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_run_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, - void *priv); void blk_mq_all_tag_busy_iter(struct blk_mq_tags *tags, busy_tag_iter_fn *fn, void *priv); void blk_mq_freeze_queue(struct request_queue *q); -- cgit v1.2.3 From f1e0bb0ad473a32d1b7e6d285ae9f7e47710bb5e Mon Sep 17 00:00:00 2001 From: Yang Yingliang Date: Thu, 24 Sep 2015 17:32:13 +0800 Subject: genirq: Introduce generic irq migration for cpu hotunplug ARM and ARM64 have almost identical code for migrating interrupts on cpu hotunplug. Provide a generic version which can be used by both. The new code addresses a shortcoming in the ARM[64] variants which fails to update the affinity change in some cases. The solution for this is to use the core function irq_do_set_affinity() instead of open coding it. [ tglx: Added copyright notice and license boilerplate. Rewrote subject and changelog. ] Signed-off-by: Yang Yingliang Acked-by: Russell King - ARM Linux Cc: Jiang Liu Cc: Marc Zyngier Cc: Mark Rutland Cc: Will Deacon Cc: Hanjun Guo Cc: Link: http://lkml.kernel.org/r/1443087135-17044-2-git-send-email-yangyingliang@huawei.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 11bf09288ddb..45cc7299bb61 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -452,6 +452,8 @@ extern int irq_set_affinity_locked(struct irq_data *data, const struct cpumask *cpumask, bool force); extern int irq_set_vcpu_affinity(unsigned int irq, void *vcpu_info); +extern void irq_migrate_all_off_this_cpu(void); + #if defined(CONFIG_SMP) && defined(CONFIG_GENERIC_PENDING_IRQ) void irq_move_irq(struct irq_data *data); void irq_move_masked_irq(struct irq_data *data); -- cgit v1.2.3 From 49df6397edfc5a8ba8ca813b51fb9729d8e94b40 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:21:40 -0700 Subject: KVM: x86: Split the APIC from the rest of IRQCHIP. First patch in a series which enables the relocation of the PIC/IOAPIC to userspace. Adds capability KVM_CAP_SPLIT_IRQCHIP; KVM_CAP_SPLIT_IRQCHIP enables the construction of LAPICs without the rest of the irqchip. Compile tested for x86. Signed-off-by: Steve Rutherford Suggested-by: Andrew Honig Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1bef9e21e725..354f147647ab 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1002,6 +1002,7 @@ static inline int mmu_notifier_retry(struct kvm *kvm, unsigned long mmu_seq) #endif int kvm_setup_default_irq_routing(struct kvm *kvm); +int kvm_setup_empty_irq_routing(struct kvm *kvm); int kvm_set_irq_routing(struct kvm *kvm, const struct kvm_irq_routing_entry *entries, unsigned nr, -- cgit v1.2.3 From 7543a635aa09eb138b2cbf60ac3ff19503ae6954 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:21:41 -0700 Subject: KVM: x86: Add KVM exit for IOAPIC EOIs Adds KVM_EXIT_IOAPIC_EOI which allows the kernel to EOI level-triggered IOAPIC interrupts. Uses a per VCPU exit bitmap to decide whether or not the IOAPIC needs to be informed (which is identical to the EOI_EXIT_BITMAP field used by modern x86 processors, but can also be used to elide kvm IOAPIC EOI exits on older processors). [Note: A prototype using ResampleFDs found that decoupling the EOI from the VCPU's thread made it possible for the VCPU to not see a recent EOI after reentering the guest. This does not match real hardware.] Compile tested for Intel x86. Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 354f147647ab..3b33215ed447 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -140,6 +140,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_APIC_PAGE_RELOAD 25 #define KVM_REQ_SMI 26 #define KVM_REQ_HV_CRASH 27 +#define KVM_REQ_IOAPIC_EOI_EXIT 28 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 @@ -1146,4 +1147,3 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) } #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ #endif - -- cgit v1.2.3 From b053b2aef25d00773fa6762dcd4b7f5c9c42d171 Mon Sep 17 00:00:00 2001 From: Steve Rutherford Date: Wed, 29 Jul 2015 23:32:35 -0700 Subject: KVM: x86: Add EOI exit bitmap inference In order to support a userspace IOAPIC interacting with an in kernel APIC, the EOI exit bitmaps need to be configurable. If the IOAPIC is in userspace (i.e. the irqchip has been split), the EOI exit bitmaps will be set whenever the GSI Routes are configured. In particular, for the low MSI routes are reservable for userspace IOAPICs. For these MSI routes, the EOI Exit bit corresponding to the destination vector of the route will be set for the destination VCPU. The intention is for the userspace IOAPICs to use the reservable MSI routes to inject interrupts into the guest. This is a slight abuse of the notion of an MSI Route, given that MSIs classically bypass the IOAPIC. It might be worthwhile to add an additional route type to improve clarity. Compile tested for Intel x86. Signed-off-by: Steve Rutherford Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 3b33215ed447..d754f2d826e9 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -330,6 +330,18 @@ struct kvm_kernel_irq_routing_entry { struct hlist_node link; }; +#ifdef CONFIG_HAVE_KVM_IRQ_ROUTING +struct kvm_irq_routing_table { + int chip[KVM_NR_IRQCHIPS][KVM_IRQCHIP_NUM_PINS]; + u32 nr_rt_entries; + /* + * Array indexed by gsi. Each entry contains list of irq chips + * the gsi is connected to. + */ + struct hlist_head map[0]; +}; +#endif + #ifndef KVM_PRIVATE_MEM_SLOTS #define KVM_PRIVATE_MEM_SLOTS 0 #endif @@ -456,10 +468,14 @@ void vcpu_put(struct kvm_vcpu *vcpu); #ifdef __KVM_HAVE_IOAPIC void kvm_vcpu_request_scan_ioapic(struct kvm *kvm); +void kvm_arch_irq_routing_update(struct kvm *kvm); #else static inline void kvm_vcpu_request_scan_ioapic(struct kvm *kvm) { } +static inline void kvm_arch_irq_routing_update(struct kvm *kvm) +{ +} #endif #ifdef CONFIG_HAVE_KVM_IRQFD -- cgit v1.2.3 From e516cebb4f2b2057a5a421fea589079502acfff6 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Wed, 16 Sep 2015 12:29:48 +0300 Subject: kvm/x86: Hyper-V HV_X64_MSR_RESET msr HV_X64_MSR_RESET msr is used by Hyper-V based Windows guest to reset guest VM by hypervisor. Necessary to support loading of winhv.sys in guest, which in turn is required to support Windows VMBus. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Paolo Bonzini CC: Gleb Natapov Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index d754f2d826e9..cd0ba2e931e1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -141,6 +141,7 @@ static inline bool is_error_page(struct page *page) #define KVM_REQ_SMI 26 #define KVM_REQ_HV_CRASH 27 #define KVM_REQ_IOAPIC_EOI_EXIT 28 +#define KVM_REQ_HV_RESET 29 #define KVM_USERSPACE_IRQ_SOURCE_ID 0 #define KVM_IRQFD_RESAMPLE_IRQ_SOURCE_ID 1 -- cgit v1.2.3 From f73f8173126ba68eb1c42bd9a234a51d78576ca6 Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Fri, 18 Sep 2015 22:29:39 +0800 Subject: virt: IRQ bypass manager When a physical I/O device is assigned to a virtual machine through facilities like VFIO and KVM, the interrupt for the device generally bounces through the host system before being injected into the VM. However, hardware technologies exist that often allow the host to be bypassed for some of these scenarios. Intel Posted Interrupts allow the specified physical edge interrupts to be directly injected into a guest when delivered to a physical processor while the vCPU is running. ARM IRQ Forwarding allows forwarded physical interrupts to be directly deactivated by the guest. The IRQ bypass manager here is meant to provide the shim to connect interrupt producers, generally the host physical device driver, with interrupt consumers, generally the hypervisor, in order to configure these bypass mechanism. To do this, we base the connection on a shared, opaque token. For KVM-VFIO this is expected to be an eventfd_ctx since this is the connection we already use to connect an eventfd to an irqfd on the in-kernel path. When a producer and consumer with matching tokens is found, callbacks via both registered participants allow the bypass facilities to be automatically enabled. Signed-off-by: Alex Williamson Reviewed-by: Eric Auger Tested-by: Eric Auger Tested-by: Feng Wu Signed-off-by: Feng Wu Signed-off-by: Paolo Bonzini --- include/linux/irqbypass.h | 90 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 90 insertions(+) create mode 100644 include/linux/irqbypass.h (limited to 'include/linux') diff --git a/include/linux/irqbypass.h b/include/linux/irqbypass.h new file mode 100644 index 000000000000..1551b5b2f4c2 --- /dev/null +++ b/include/linux/irqbypass.h @@ -0,0 +1,90 @@ +/* + * IRQ offload/bypass manager + * + * Copyright (C) 2015 Red Hat, Inc. + * Copyright (c) 2015 Linaro Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef IRQBYPASS_H +#define IRQBYPASS_H + +#include + +struct irq_bypass_consumer; + +/* + * Theory of operation + * + * The IRQ bypass manager is a simple set of lists and callbacks that allows + * IRQ producers (ex. physical interrupt sources) to be matched to IRQ + * consumers (ex. virtualization hardware that allows IRQ bypass or offload) + * via a shared token (ex. eventfd_ctx). Producers and consumers register + * independently. When a token match is found, the optional @stop callback + * will be called for each participant. The pair will then be connected via + * the @add_* callbacks, and finally the optional @start callback will allow + * any final coordination. When either participant is unregistered, the + * process is repeated using the @del_* callbacks in place of the @add_* + * callbacks. Match tokens must be unique per producer/consumer, 1:N pairings + * are not supported. + */ + +/** + * struct irq_bypass_producer - IRQ bypass producer definition + * @node: IRQ bypass manager private list management + * @token: opaque token to match between producer and consumer + * @irq: Linux IRQ number for the producer device + * @add_consumer: Connect the IRQ producer to an IRQ consumer (optional) + * @del_consumer: Disconnect the IRQ producer from an IRQ consumer (optional) + * @stop: Perform any quiesce operations necessary prior to add/del (optional) + * @start: Perform any startup operations necessary after add/del (optional) + * + * The IRQ bypass producer structure represents an interrupt source for + * participation in possible host bypass, for instance an interrupt vector + * for a physical device assigned to a VM. + */ +struct irq_bypass_producer { + struct list_head node; + void *token; + int irq; + int (*add_consumer)(struct irq_bypass_producer *, + struct irq_bypass_consumer *); + void (*del_consumer)(struct irq_bypass_producer *, + struct irq_bypass_consumer *); + void (*stop)(struct irq_bypass_producer *); + void (*start)(struct irq_bypass_producer *); +}; + +/** + * struct irq_bypass_consumer - IRQ bypass consumer definition + * @node: IRQ bypass manager private list management + * @token: opaque token to match between producer and consumer + * @add_producer: Connect the IRQ consumer to an IRQ producer + * @del_producer: Disconnect the IRQ consumer from an IRQ producer + * @stop: Perform any quiesce operations necessary prior to add/del (optional) + * @start: Perform any startup operations necessary after add/del (optional) + * + * The IRQ bypass consumer structure represents an interrupt sink for + * participation in possible host bypass, for instance a hypervisor may + * support offloads to allow bypassing the host entirely or offload + * portions of the interrupt handling to the VM. + */ +struct irq_bypass_consumer { + struct list_head node; + void *token; + int (*add_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); + void (*del_producer)(struct irq_bypass_consumer *, + struct irq_bypass_producer *); + void (*stop)(struct irq_bypass_consumer *); + void (*start)(struct irq_bypass_consumer *); +}; + +int irq_bypass_register_producer(struct irq_bypass_producer *); +void irq_bypass_unregister_producer(struct irq_bypass_producer *); +int irq_bypass_register_consumer(struct irq_bypass_consumer *); +void irq_bypass_unregister_consumer(struct irq_bypass_consumer *); + +#endif /* IRQBYPASS_H */ -- cgit v1.2.3 From 166c9775f1f8b8f00ad1db0fa5c8fc74059d965d Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:42 +0800 Subject: KVM: create kvm_irqfd.h Move _irqfd_resampler and _irqfd struct declarations in a new public header: kvm_irqfd.h. They are respectively renamed into kvm_kernel_irqfd_resampler and kvm_kernel_irqfd. Those datatypes will be used by architecture specific code, in the context of IRQ bypass manager integration. Signed-off-by: Eric Auger Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_irqfd.h | 69 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 include/linux/kvm_irqfd.h (limited to 'include/linux') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h new file mode 100644 index 000000000000..f926b39a26b6 --- /dev/null +++ b/include/linux/kvm_irqfd.h @@ -0,0 +1,69 @@ +/* + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * irqfd: Allows an fd to be used to inject an interrupt to the guest + * Credit goes to Avi Kivity for the original idea. + */ + +#ifndef __LINUX_KVM_IRQFD_H +#define __LINUX_KVM_IRQFD_H + +#include +#include + +/* + * Resampling irqfds are a special variety of irqfds used to emulate + * level triggered interrupts. The interrupt is asserted on eventfd + * trigger. On acknowledgment through the irq ack notifier, the + * interrupt is de-asserted and userspace is notified through the + * resamplefd. All resamplers on the same gsi are de-asserted + * together, so we don't need to track the state of each individual + * user. We can also therefore share the same irq source ID. + */ +struct kvm_kernel_irqfd_resampler { + struct kvm *kvm; + /* + * List of resampling struct _irqfd objects sharing this gsi. + * RCU list modified under kvm->irqfds.resampler_lock + */ + struct list_head list; + struct kvm_irq_ack_notifier notifier; + /* + * Entry in list of kvm->irqfd.resampler_list. Use for sharing + * resamplers among irqfds on the same gsi. + * Accessed and modified under kvm->irqfds.resampler_lock + */ + struct list_head link; +}; + +struct kvm_kernel_irqfd { + /* Used for MSI fast-path */ + struct kvm *kvm; + wait_queue_t wait; + /* Update side is protected by irqfds.lock */ + struct kvm_kernel_irq_routing_entry irq_entry; + seqcount_t irq_entry_sc; + /* Used for level IRQ fast-path */ + int gsi; + struct work_struct inject; + /* The resampler used by this irqfd (resampler-only) */ + struct kvm_kernel_irqfd_resampler *resampler; + /* Eventfd notified on resample (resampler-only) */ + struct eventfd_ctx *resamplefd; + /* Entry in list of irqfds for a resampler (resampler-only) */ + struct list_head resampler_link; + /* Used for setup/shutdown */ + struct eventfd_ctx *eventfd; + struct list_head list; + poll_table pt; + struct work_struct shutdown; +}; + +#endif /* __LINUX_KVM_IRQFD_H */ -- cgit v1.2.3 From 1a02b27035f82091d51ecafcb9ccaac1f31d4eb2 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:43 +0800 Subject: KVM: introduce kvm_arch functions for IRQ bypass This patch introduces - kvm_arch_irq_bypass_add_producer - kvm_arch_irq_bypass_del_producer - kvm_arch_irq_bypass_stop - kvm_arch_irq_bypass_start They make possible to specialize the KVM IRQ bypass consumer in case CONFIG_KVM_HAVE_IRQ_BYPASS is set. Signed-off-by: Eric Auger [Add weak implementations of the callbacks. - Feng] Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index cd0ba2e931e1..b8543b02b7bc 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -24,6 +24,7 @@ #include #include #include +#include #include #include @@ -1163,4 +1164,13 @@ static inline void kvm_vcpu_set_dy_eligible(struct kvm_vcpu *vcpu, bool val) { } #endif /* CONFIG_HAVE_KVM_CPU_RELAX_INTERCEPT */ + +#ifdef CONFIG_HAVE_KVM_IRQ_BYPASS +int kvm_arch_irq_bypass_add_producer(struct irq_bypass_consumer *, + struct irq_bypass_producer *); +void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, + struct irq_bypass_producer *); +void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); +void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); +#endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #endif -- cgit v1.2.3 From 9016cfb577a15abd6a7990890ccf6bf1edf04d31 Mon Sep 17 00:00:00 2001 From: Eric Auger Date: Fri, 18 Sep 2015 22:29:44 +0800 Subject: KVM: eventfd: add irq bypass consumer management This patch adds the registration/unregistration of an irq_bypass_consumer on irqfd assignment/deassignment. Signed-off-by: Eric Auger Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_irqfd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_irqfd.h b/include/linux/kvm_irqfd.h index f926b39a26b6..0c1de05098c8 100644 --- a/include/linux/kvm_irqfd.h +++ b/include/linux/kvm_irqfd.h @@ -64,6 +64,8 @@ struct kvm_kernel_irqfd { struct list_head list; poll_table pt; struct work_struct shutdown; + struct irq_bypass_consumer consumer; + struct irq_bypass_producer *producer; }; #endif /* __LINUX_KVM_IRQFD_H */ -- cgit v1.2.3 From f70c20aaf141adb715a2d750c55154073b02a9c3 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Fri, 18 Sep 2015 22:29:53 +0800 Subject: KVM: Add an arch specific hooks in 'struct kvm_kernel_irqfd' This patch adds an arch specific hooks 'arch_update' in 'struct kvm_kernel_irqfd'. On Intel side, it is used to update the IRTE when VT-d posted-interrupts is used. Signed-off-by: Feng Wu Reviewed-by: Alex Williamson Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b8543b02b7bc..5c3f4538807f 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1172,5 +1172,7 @@ void kvm_arch_irq_bypass_del_producer(struct irq_bypass_consumer *, struct irq_bypass_producer *); void kvm_arch_irq_bypass_stop(struct irq_bypass_consumer *); void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); +int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, + uint32_t guest_irq, bool set); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ #endif -- cgit v1.2.3 From bf9f6ac8d74969690df1485b33b7c238ca9f2269 Mon Sep 17 00:00:00 2001 From: Feng Wu Date: Fri, 18 Sep 2015 22:29:55 +0800 Subject: KVM: Update Posted-Interrupts Descriptor when vCPU is blocked This patch updates the Posted-Interrupts Descriptor when vCPU is blocked. pre-block: - Add the vCPU to the blocked per-CPU list - Set 'NV' to POSTED_INTR_WAKEUP_VECTOR post-block: - Remove the vCPU from the per-CPU list Signed-off-by: Feng Wu [Concentrate invocation of pre/post-block hooks to vcpu_block. - Paolo] Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 5c3f4538807f..9596a2f0977b 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -234,6 +234,9 @@ struct kvm_vcpu { unsigned long requests; unsigned long guest_debug; + int pre_pcpu; + struct list_head blocked_vcpu_list; + struct mutex mutex; struct kvm_run *run; -- cgit v1.2.3 From 92068d17c20b218bf1e24505a3e08495476b0ebf Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 1 Oct 2015 15:54:52 +0300 Subject: genirq: Fix typo in documentation of enumeration field name It should say IRQ_NESTED_THREAD instead of IRQ_NESTED_TRHEAD. Signed-off-by: Mika Westerberg Cc: Jiang Liu Link: http://lkml.kernel.org/r/1443704093-36837-1-git-send-email-mika.westerberg@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 45cc7299bb61..7038f38a63c7 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -67,7 +67,7 @@ enum irqchip_irq_state; * request/setup_irq() * IRQ_NO_BALANCING - Interrupt cannot be balanced (affinity set) * IRQ_MOVE_PCNTXT - Interrupt can be migrated from process context - * IRQ_NESTED_TRHEAD - Interrupt nests into another thread + * IRQ_NESTED_THREAD - Interrupt nests into another thread * IRQ_PER_CPU_DEVID - Dev_id is a per-cpu variable * IRQ_IS_POLLED - Always polled by another interrupt. Exclude * it from the spurious interrupt detection -- cgit v1.2.3 From 9e7e2b0a6a005941a6854cdabae19c3d9e069dbe Mon Sep 17 00:00:00 2001 From: Mika Westerberg Date: Thu, 1 Oct 2015 15:54:53 +0300 Subject: genirq: Remove unused functions irqd_[set|clr]_chained_irq_inprogress() These two functions are not used anywhere in the kernel source tree so remove them. Signed-off-by: Mika Westerberg Cc: Jiang Liu Link: http://lkml.kernel.org/r/1443704093-36837-2-git-send-email-mika.westerberg@linux.intel.com Signed-off-by: Thomas Gleixner --- include/linux/irq.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index 7038f38a63c7..ba72b60b57b1 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -297,21 +297,6 @@ static inline void irqd_clr_forwarded_to_vcpu(struct irq_data *d) __irqd_to_state(d) &= ~IRQD_FORWARDED_TO_VCPU; } -/* - * Functions for chained handlers which can be enabled/disabled by the - * standard disable_irq/enable_irq calls. Must be called with - * irq_desc->lock held. - */ -static inline void irqd_set_chained_irq_inprogress(struct irq_data *d) -{ - __irqd_to_state(d) |= IRQD_IRQ_INPROGRESS; -} - -static inline void irqd_clr_chained_irq_inprogress(struct irq_data *d) -{ - __irqd_to_state(d) &= ~IRQD_IRQ_INPROGRESS; -} - static inline irq_hw_number_t irqd_to_hwirq(struct irq_data *d) { return d->hwirq; -- cgit v1.2.3 From 7ec88e4be461590b5a3817460c34603f76d9b3ae Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Sep 2015 22:21:28 +0200 Subject: ntp/pps: use timespec64 for hardpps() There is only one user of the hardpps function in the kernel, so it makes sense to atomically change it over to using 64-bit timestamps for y2038 safety. In the hardpps implementation, we also need to change the pps_normtime structure, which is similar to struct timespec and also requires a 64-bit seconds portion. This introduces two temporary variables in pps_kc_event() to do the conversion, they will be removed again in the next step, which seemed preferable to having a larger patch changing it all at the same time. Acked-by: Richard Cochran Acked-by: David S. Miller Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/timex.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/timex.h b/include/linux/timex.h index 9d3f1a5b6178..39c25dbebfe8 100644 --- a/include/linux/timex.h +++ b/include/linux/timex.h @@ -152,7 +152,7 @@ extern unsigned long tick_nsec; /* SHIFTED_HZ period (nsec) */ #define NTP_INTERVAL_LENGTH (NSEC_PER_SEC/NTP_INTERVAL_FREQ) extern int do_adjtimex(struct timex *); -extern void hardpps(const struct timespec *, const struct timespec *); +extern void hardpps(const struct timespec64 *, const struct timespec64 *); int read_current_timer(unsigned long *timer_val); void ntp_notify_cmos_timer(void); -- cgit v1.2.3 From 071eee45b1650d53d21c636d344bdcebd4577ed2 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Sep 2015 22:21:29 +0200 Subject: ntp/pps: replace getnstime_raw_and_real with 64-bit version There is exactly one caller of getnstime_raw_and_real in the kernel, which is the pps_get_ts function. This changes the caller and the implementation to work on timespec64 types rather than timespec, to avoid the time_t overflow on 32-bit architectures. For consistency with the other new functions (ktime_get_seconds, ktime_get_real_*, ...), I'm renaming the function to ktime_get_raw_and_real_ts64. We still need to convert from the internal 64-bit type to 32 bit types in the caller, but this conversion is now pushed out from getnstime_raw_and_real to pps_get_ts. A follow-up patch changes the remaining pps code to completely avoid the conversion. Acked-by: Richard Cochran Acked-by: David S. Miller Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/pps_kernel.h | 7 ++++++- include/linux/timekeeping.h | 4 ++-- 2 files changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index 1d2cd21242e8..b2fbd62ab18d 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -115,7 +115,12 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt, static inline void pps_get_ts(struct pps_event_time *ts) { - getnstime_raw_and_real(&ts->ts_raw, &ts->ts_real); + struct timespec64 raw, real; + + ktime_get_raw_and_real_ts64(&raw, &real); + + ts->ts_raw = timespec64_to_timespec(raw); + ts->ts_real = timespec64_to_timespec(real); } #else /* CONFIG_NTP_PPS */ diff --git a/include/linux/timekeeping.h b/include/linux/timekeeping.h index ba0ae09cbb21..ec89d846324c 100644 --- a/include/linux/timekeeping.h +++ b/include/linux/timekeeping.h @@ -263,8 +263,8 @@ extern void timekeeping_inject_sleeptime64(struct timespec64 *delta); /* * PPS accessor */ -extern void getnstime_raw_and_real(struct timespec *ts_raw, - struct timespec *ts_real); +extern void ktime_get_raw_and_real_ts64(struct timespec64 *ts_raw, + struct timespec64 *ts_real); /* * Persistent clock related interfaces -- cgit v1.2.3 From ade1bdffe90e59cd257cb9bd4f5abe4de5f14911 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 28 Sep 2015 22:21:31 +0200 Subject: ntp/pps: use y2038 safe types in pps_event_time The pps_event_time uses two 'timespec' structures internally, which suffer from the y2038 problem. The uses of this structure are fairly self-contained in the pps code, so this replaces them all at once. Unfortunately, this includes the sfc ethernet driver aside from the pps subsystem, so we change that one as well. Both touch the same data structure, and there probably is no good way to split the patch into smaller units. Acked-by: Richard Cochran Acked-by: David S. Miller Reviewed-by: Thomas Gleixner Signed-off-by: Arnd Bergmann Signed-off-by: John Stultz --- include/linux/pps_kernel.h | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pps_kernel.h b/include/linux/pps_kernel.h index b2fbd62ab18d..54bf1484d41f 100644 --- a/include/linux/pps_kernel.h +++ b/include/linux/pps_kernel.h @@ -48,9 +48,9 @@ struct pps_source_info { struct pps_event_time { #ifdef CONFIG_NTP_PPS - struct timespec ts_raw; + struct timespec64 ts_raw; #endif /* CONFIG_NTP_PPS */ - struct timespec ts_real; + struct timespec64 ts_real; }; /* The main struct */ @@ -105,7 +105,7 @@ extern void pps_event(struct pps_device *pps, struct pps_device *pps_lookup_dev(void const *cookie); static inline void timespec_to_pps_ktime(struct pps_ktime *kt, - struct timespec ts) + struct timespec64 ts) { kt->sec = ts.tv_sec; kt->nsec = ts.tv_nsec; @@ -115,29 +115,24 @@ static inline void timespec_to_pps_ktime(struct pps_ktime *kt, static inline void pps_get_ts(struct pps_event_time *ts) { - struct timespec64 raw, real; - - ktime_get_raw_and_real_ts64(&raw, &real); - - ts->ts_raw = timespec64_to_timespec(raw); - ts->ts_real = timespec64_to_timespec(real); + ktime_get_raw_and_real_ts64(&ts->ts_raw, &ts->ts_real); } #else /* CONFIG_NTP_PPS */ static inline void pps_get_ts(struct pps_event_time *ts) { - getnstimeofday(&ts->ts_real); + ktime_get_real_ts64(&ts->ts_real); } #endif /* CONFIG_NTP_PPS */ /* Subtract known time delay from PPS event time(s) */ -static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec delta) +static inline void pps_sub_ts(struct pps_event_time *ts, struct timespec64 delta) { - ts->ts_real = timespec_sub(ts->ts_real, delta); + ts->ts_real = timespec64_sub(ts->ts_real, delta); #ifdef CONFIG_NTP_PPS - ts->ts_raw = timespec_sub(ts->ts_raw, delta); + ts->ts_raw = timespec64_sub(ts->ts_raw, delta); #endif } -- cgit v1.2.3 From a7f5ba40c74e67b2c8c6e15425157ab6775621d7 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Thu, 1 Oct 2015 16:58:27 +0200 Subject: mtd: nand: remove unused ->init_size() hook The ->init_size() hook was introduced to let NAND controller drivers support NAND devices that could not be described in the nand_ids table. Since then, the core has added support for extended-id parsing and full-id description, thus allowing to describe pretty much all existing NANDs. Moreover, this hook is not used by any mainline driver, and should not be used by new drivers, because detecting the NAND chip is not something controller specific. Signed-off-by: Boris Brezillon Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c4d8e308f453..50e1f94fa377 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -556,10 +556,6 @@ struct nand_buffers { * @block_markbad: [REPLACEABLE] mark a block bad * @cmd_ctrl: [BOARDSPECIFIC] hardwarespecific function for controlling * ALE/CLE/nCE. Also used to write command and address - * @init_size: [BOARDSPECIFIC] hardwarespecific function for setting - * mtd->oobsize, mtd->writesize and so on. - * @id_data contains the 8 bytes values of NAND_CMD_READID. - * Return with the bus width. * @dev_ready: [BOARDSPECIFIC] hardwarespecific function for accessing * device ready/busy line. If set to NULL no access to * ready/busy is available and the ready/busy information @@ -658,8 +654,6 @@ struct nand_chip { int (*block_bad)(struct mtd_info *mtd, loff_t ofs, int getchip); int (*block_markbad)(struct mtd_info *mtd, loff_t ofs); void (*cmd_ctrl)(struct mtd_info *mtd, int dat, unsigned int ctrl); - int (*init_size)(struct mtd_info *mtd, struct nand_chip *this, - u8 *id_data); int (*dev_ready)(struct mtd_info *mtd); void (*cmdfunc)(struct mtd_info *mtd, unsigned command, int column, int page_addr); -- cgit v1.2.3 From 96ef36e9c424b7a66413bb9229ef5afcddf4fef4 Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Wed, 17 Jun 2015 14:40:38 +0200 Subject: clk: at91: cleanup PMC header file for PCR register fields Add _MASK and _OFFSET values and cleanup register fields layout. Signed-off-by: Nicolas Ferre Signed-off-by: Boris Brezillon Signed-off-by: Stephen Boyd --- include/linux/clk/at91_pmc.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index 7669f7618f39..dfc59e2b64fb 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -182,13 +182,11 @@ extern void __iomem *at91_pmc_base; #define AT91_PMC_PCSR1 0x108 /* Peripheral Clock Enable Register 1 */ #define AT91_PMC_PCR 0x10c /* Peripheral Control Register [some SAM9 and SAMA5] */ -#define AT91_PMC_PCR_PID (0x3f << 0) /* Peripheral ID */ -#define AT91_PMC_PCR_CMD (0x1 << 12) /* Command (read=0, write=1) */ -#define AT91_PMC_PCR_DIV(n) ((n) << 16) /* Divisor Value */ -#define AT91_PMC_PCR_DIV0 0x0 /* Peripheral clock is MCK */ -#define AT91_PMC_PCR_DIV2 0x1 /* Peripheral clock is MCK/2 */ -#define AT91_PMC_PCR_DIV4 0x2 /* Peripheral clock is MCK/4 */ -#define AT91_PMC_PCR_DIV8 0x3 /* Peripheral clock is MCK/8 */ -#define AT91_PMC_PCR_EN (0x1 << 28) /* Enable */ +#define AT91_PMC_PCR_PID_MASK 0x3f +#define AT91_PMC_PCR_CMD (0x1 << 12) /* Command (read=0, write=1) */ +#define AT91_PMC_PCR_DIV_OFFSET 16 +#define AT91_PMC_PCR_DIV_MASK (0x3 << AT91_PMC_PCR_DIV_OFFSET) +#define AT91_PMC_PCR_DIV(n) ((n) << AT91_PMC_PCR_DIV_OFFSET) /* Divisor Value */ +#define AT91_PMC_PCR_EN (0x1 << 28) /* Enable */ #endif -- cgit v1.2.3 From a5752e57bb63154fe9202d8d2282bad3bae3bced Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Thu, 18 Jun 2015 14:43:29 +0200 Subject: clk: at91: add PMC sama5d2 support Add support for the new sama5d2 SoC and adapt capabilities. Signed-off-by: Nicolas Ferre Signed-off-by: Boris Brezillon Signed-off-by: Stephen Boyd --- include/linux/clk/at91_pmc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index dfc59e2b64fb..dc2a0fa62eaa 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -164,6 +164,7 @@ extern void __iomem *at91_pmc_base; #define AT91_PMC_MOSCSELS (1 << 16) /* Main Oscillator Selection [some SAM9] */ #define AT91_PMC_MOSCRCS (1 << 17) /* Main On-Chip RC [some SAM9] */ #define AT91_PMC_CFDEV (1 << 18) /* Clock Failure Detector Event [some SAM9] */ +#define AT91_PMC_GCKRDY (1 << 24) /* Generated Clocks */ #define AT91_PMC_IMR 0x6c /* Interrupt Mask Register */ #define AT91_PMC_PLLICPR 0x80 /* PLL Charge Pump Current Register */ -- cgit v1.2.3 From df70aeef60839cb2732913fa41e61aba52ca942c Mon Sep 17 00:00:00 2001 From: Nicolas Ferre Date: Fri, 31 Jul 2015 11:43:12 +0200 Subject: clk: at91: add generated clock driver Add a new type of clocks that can be provided to a peripheral. In addition to the peripheral clock, this new clock that can use several input clocks as parents can generate divided rates. This would allow a peripheral to have finer grained clocks for generating a baud rate, clocking an asynchronous part or having more options in frequency. Signed-off-by: Nicolas Ferre Signed-off-by: Boris Brezillon [sboyd@codeaurora.org: Transition to new clk_hw provider APIs] Signed-off-by: Stephen Boyd --- include/linux/clk/at91_pmc.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk/at91_pmc.h b/include/linux/clk/at91_pmc.h index dc2a0fa62eaa..1e6932222e11 100644 --- a/include/linux/clk/at91_pmc.h +++ b/include/linux/clk/at91_pmc.h @@ -184,10 +184,17 @@ extern void __iomem *at91_pmc_base; #define AT91_PMC_PCR 0x10c /* Peripheral Control Register [some SAM9 and SAMA5] */ #define AT91_PMC_PCR_PID_MASK 0x3f +#define AT91_PMC_PCR_GCKCSS_OFFSET 8 +#define AT91_PMC_PCR_GCKCSS_MASK (0x7 << AT91_PMC_PCR_GCKCSS_OFFSET) +#define AT91_PMC_PCR_GCKCSS(n) ((n) << AT91_PMC_PCR_GCKCSS_OFFSET) /* GCK Clock Source Selection */ #define AT91_PMC_PCR_CMD (0x1 << 12) /* Command (read=0, write=1) */ #define AT91_PMC_PCR_DIV_OFFSET 16 #define AT91_PMC_PCR_DIV_MASK (0x3 << AT91_PMC_PCR_DIV_OFFSET) #define AT91_PMC_PCR_DIV(n) ((n) << AT91_PMC_PCR_DIV_OFFSET) /* Divisor Value */ +#define AT91_PMC_PCR_GCKDIV_OFFSET 20 +#define AT91_PMC_PCR_GCKDIV_MASK (0xff << AT91_PMC_PCR_GCKDIV_OFFSET) +#define AT91_PMC_PCR_GCKDIV(n) ((n) << AT91_PMC_PCR_GCKDIV_OFFSET) /* Generated Clock Divisor Value */ #define AT91_PMC_PCR_EN (0x1 << 28) /* Enable */ +#define AT91_PMC_PCR_GCKEN (0x1 << 29) /* GCK Enable */ #endif -- cgit v1.2.3 From 0610c25daa3e76e38ad5a8fae683a89ff9f71798 Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 1 Oct 2015 15:37:02 -0700 Subject: memcg: fix dirty page migration The problem starts with a file backed dirty page which is charged to a memcg. Then page migration is used to move oldpage to newpage. Migration: - copies the oldpage's data to newpage - clears oldpage.PG_dirty - sets newpage.PG_dirty - uncharges oldpage from memcg - charges newpage to memcg Clearing oldpage.PG_dirty decrements the charged memcg's dirty page count. However, because newpage is not yet charged, setting newpage.PG_dirty does not increment the memcg's dirty page count. After migration completes newpage.PG_dirty is eventually cleared, often in account_page_cleaned(). At this time newpage is charged to a memcg so the memcg's dirty page count is decremented which causes underflow because the count was not previously incremented by migration. This underflow causes balance_dirty_pages() to see a very large unsigned number of dirty memcg pages which leads to aggressive throttling of buffered writes by processes in non root memcg. This issue: - can harm performance of non root memcg buffered writes. - can report too small (even negative) values in memory.stat[(total_)dirty] counters of all memcg, including the root. To avoid polluting migrate.c with #ifdef CONFIG_MEMCG checks, introduce page_memcg() and set_page_memcg() helpers. Test: 0) setup and enter limited memcg mkdir /sys/fs/cgroup/test echo 1G > /sys/fs/cgroup/test/memory.limit_in_bytes echo $$ > /sys/fs/cgroup/test/cgroup.procs 1) buffered writes baseline dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k sync grep ^dirty /sys/fs/cgroup/test/memory.stat 2) buffered writes with compaction antagonist to induce migration yes 1 > /proc/sys/vm/compact_memory & rm -rf /data/tmp/foo dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k kill % sync grep ^dirty /sys/fs/cgroup/test/memory.stat 3) buffered writes without antagonist, should match baseline rm -rf /data/tmp/foo dd if=/dev/zero of=/data/tmp/foo bs=1M count=1k sync grep ^dirty /sys/fs/cgroup/test/memory.stat (speed, dirty residue) unpatched patched 1) 841 MB/s 0 dirty pages 886 MB/s 0 dirty pages 2) 611 MB/s -33427456 dirty pages 793 MB/s 0 dirty pages 3) 114 MB/s -33427456 dirty pages 891 MB/s 0 dirty pages Notice that unpatched baseline performance (1) fell after migration (3): 841 -> 114 MB/s. In the patched kernel, post migration performance matches baseline. Fixes: c4843a7593a9 ("memcg: add per cgroup dirty page accounting") Signed-off-by: Greg Thelen Reported-by: Dave Hansen Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: [4.2+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 91c08f6f0dc9..80001de019ba 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -905,6 +905,27 @@ static inline void set_page_links(struct page *page, enum zone_type zone, #endif } +#ifdef CONFIG_MEMCG +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return page->mem_cgroup; +} + +static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) +{ + page->mem_cgroup = memcg; +} +#else +static inline struct mem_cgroup *page_memcg(struct page *page) +{ + return NULL; +} + +static inline void set_page_memcg(struct page *page, struct mem_cgroup *memcg) +{ +} +#endif + /* * Some inline functions in vmstat.h depend on page_zone() */ -- cgit v1.2.3 From ef510194cefe0cd369ef73419cd65b0a5bb4fb5b Mon Sep 17 00:00:00 2001 From: Greg Thelen Date: Thu, 1 Oct 2015 15:37:13 -0700 Subject: memcg: remove pcp_counter_lock Commit 733a572e66d2 ("memcg: make mem_cgroup_read_{stat|event}() iterate possible cpus instead of online") removed the last use of the per memcg pcp_counter_lock but forgot to remove the variable. Kill the vestigial variable. Signed-off-by: Greg Thelen Acked-by: Michal Hocko Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ad800e62cb7a..6452ff4c463f 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -242,7 +242,6 @@ struct mem_cgroup { * percpu counter. */ struct mem_cgroup_stat_cpu __percpu *stat; - spinlock_t pcp_counter_lock; #if defined(CONFIG_MEMCG_KMEM) && defined(CONFIG_INET) struct cg_proto tcp_mem; -- cgit v1.2.3 From 068654c200cc32966ce7906ca0bd096b9b97e988 Mon Sep 17 00:00:00 2001 From: Lorenzo Pieralisi Date: Tue, 26 May 2015 16:49:01 +0100 Subject: drivers: firmware: psci: move power_state handling to generic code Functions implemented on arm64 to check if a power_state parameter is valid and if the power_state implies context loss are not arm64 specific and should be moved to generic code so that they can be reused on arm systems too. This patch moves the functions handling the power_state parameter to generic PSCI firmware layer code. Signed-off-by: Lorenzo Pieralisi Acked-by: Will Deacon Acked-by: Sudeep Holla Tested-by: Jisheng Zhang Cc: Catalin Marinas Cc: Mark Rutland --- include/linux/psci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/psci.h b/include/linux/psci.h index a682fcc91c33..12c4865457ad 100644 --- a/include/linux/psci.h +++ b/include/linux/psci.h @@ -21,6 +21,8 @@ #define PSCI_POWER_STATE_TYPE_POWER_DOWN 1 bool psci_tos_resident_on(int cpu); +bool psci_power_state_loses_context(u32 state); +bool psci_power_state_is_valid(u32 state); struct psci_operations { int (*cpu_suspend)(u32 state, unsigned long entry_point); -- cgit v1.2.3 From 934e2536b1bfe663de033298f75c1b8ff9d0c9ea Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 22 Sep 2015 18:54:09 +0300 Subject: clk: fractional-divider: keep mwidth and nwidth internally The patch adds mwidth and nwidth fields to the struct clk_fractional_divider for further usage. While here, use GENMASK() instead of open coding this functionality. Reviewed-by: Heikki Krogerus Signed-off-by: Andy Shevchenko Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 3ecc07d0da77..8ff43eb4b311 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -500,13 +500,14 @@ struct clk *clk_register_fixed_factor(struct device *dev, const char *name, * * Clock with adjustable fractional divider affecting its output frequency. */ - struct clk_fractional_divider { struct clk_hw hw; void __iomem *reg; u8 mshift; + u8 mwidth; u32 mmask; u8 nshift; + u8 nwidth; u32 nmask; u8 flags; spinlock_t *lock; -- cgit v1.2.3 From 3c4b23dd71ea6f68be9731b68877fbc05f4fb693 Mon Sep 17 00:00:00 2001 From: Masahiro Yamada Date: Wed, 30 Sep 2015 21:07:17 +0900 Subject: pinctrl: pinconf-generic: sort pin configuration params alphabetically Currently, the dt_params array in drivers/pinctrl/pinconf-generic.c is not sorted in the same order as the enum pin_config_param in include/linux/pinctrl/pinconf-generic.h. Sort enum pin_config_param, conf_items, dt_params, alphabetically for consistency. Signed-off-by: Masahiro Yamada Signed-off-by: Linus Walleij --- include/linux/pinctrl/pinconf-generic.h | 64 ++++++++++++++++----------------- 1 file changed, 32 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pinctrl/pinconf-generic.h b/include/linux/pinctrl/pinconf-generic.h index fe65962b264f..d921afd5f109 100644 --- a/include/linux/pinctrl/pinconf-generic.h +++ b/include/linux/pinctrl/pinconf-generic.h @@ -20,6 +20,11 @@ /** * enum pin_config_param - possible pin configuration parameters + * @PIN_CONFIG_BIAS_BUS_HOLD: the pin will be set to weakly latch so that it + * weakly drives the last value on a tristate bus, also known as a "bus + * holder", "bus keeper" or "repeater". This allows another device on the + * bus to change the value by driving the bus high or low and switching to + * tristate. The argument is ignored. * @PIN_CONFIG_BIAS_DISABLE: disable any pin bias on the pin, a * transition from say pull-up to pull-down implies that you disable * pull-up in the process, this setting disables all biasing. @@ -29,14 +34,6 @@ * if for example some other pin is going to drive the signal connected * to it for a while. Pins used for input are usually always high * impedance. - * @PIN_CONFIG_BIAS_BUS_HOLD: the pin will be set to weakly latch so that it - * weakly drives the last value on a tristate bus, also known as a "bus - * holder", "bus keeper" or "repeater". This allows another device on the - * bus to change the value by driving the bus high or low and switching to - * tristate. The argument is ignored. - * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high - * impedance to VDD). If the argument is != 0 pull-up is enabled, - * if it is 0, pull-up is total, i.e. the pin is connected to VDD. * @PIN_CONFIG_BIAS_PULL_DOWN: the pin will be pulled down (usually with high * impedance to GROUND). If the argument is != 0 pull-down is enabled, * if it is 0, pull-down is total, i.e. the pin is connected to GROUND. @@ -48,10 +45,9 @@ * If the argument is != 0 pull up/down is enabled, if it is 0, the * configuration is ignored. The proper way to disable it is to use * @PIN_CONFIG_BIAS_DISABLE. - * @PIN_CONFIG_DRIVE_PUSH_PULL: the pin will be driven actively high and - * low, this is the most typical case and is typically achieved with two - * active transistors on the output. Setting this config will enable - * push-pull mode, the argument is ignored. + * @PIN_CONFIG_BIAS_PULL_UP: the pin will be pulled up (usually with high + * impedance to VDD). If the argument is != 0 pull-up is enabled, + * if it is 0, pull-up is total, i.e. the pin is connected to VDD. * @PIN_CONFIG_DRIVE_OPEN_DRAIN: the pin will be driven with open drain (open * collector) which means it is usually wired with other output ports * which are then pulled up with an external resistor. Setting this @@ -59,28 +55,26 @@ * @PIN_CONFIG_DRIVE_OPEN_SOURCE: the pin will be driven with open source * (open emitter). Setting this config will enable open source mode, the * argument is ignored. + * @PIN_CONFIG_DRIVE_PUSH_PULL: the pin will be driven actively high and + * low, this is the most typical case and is typically achieved with two + * active transistors on the output. Setting this config will enable + * push-pull mode, the argument is ignored. * @PIN_CONFIG_DRIVE_STRENGTH: the pin will sink or source at most the current * passed as argument. The argument is in mA. + * @PIN_CONFIG_INPUT_DEBOUNCE: this will configure the pin to debounce mode, + * which means it will wait for signals to settle when reading inputs. The + * argument gives the debounce time in usecs. Setting the + * argument to zero turns debouncing off. * @PIN_CONFIG_INPUT_ENABLE: enable the pin's input. Note that this does not * affect the pin's ability to drive output. 1 enables input, 0 disables * input. - * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin. - * If the argument != 0, schmitt-trigger mode is enabled. If it's 0, - * schmitt-trigger mode is disabled. * @PIN_CONFIG_INPUT_SCHMITT: this will configure an input pin to run in * schmitt-trigger mode. If the schmitt-trigger has adjustable hysteresis, * the threshold value is given on a custom format as argument when * setting pins to this mode. - * @PIN_CONFIG_INPUT_DEBOUNCE: this will configure the pin to debounce mode, - * which means it will wait for signals to settle when reading inputs. The - * argument gives the debounce time in usecs. Setting the - * argument to zero turns debouncing off. - * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power - * supplies, the argument to this parameter (on a custom format) tells - * the driver which alternative power source to use. - * @PIN_CONFIG_SLEW_RATE: if the pin can select slew rate, the argument to - * this parameter (on a custom format) tells the driver which alternative - * slew rate to use. + * @PIN_CONFIG_INPUT_SCHMITT_ENABLE: control schmitt-trigger mode on the pin. + * If the argument != 0, schmitt-trigger mode is enabled. If it's 0, + * schmitt-trigger mode is disabled. * @PIN_CONFIG_LOW_POWER_MODE: this will configure the pin for low power * operation, if several modes of operation are supported these can be * passed in the argument on a custom form, else just use argument 1 @@ -89,29 +83,35 @@ * 1 to indicate high level, argument 0 to indicate low level. (Please * see Documentation/pinctrl.txt, section "GPIO mode pitfalls" for a * discussion around this parameter.) + * @PIN_CONFIG_POWER_SOURCE: if the pin can select between different power + * supplies, the argument to this parameter (on a custom format) tells + * the driver which alternative power source to use. + * @PIN_CONFIG_SLEW_RATE: if the pin can select slew rate, the argument to + * this parameter (on a custom format) tells the driver which alternative + * slew rate to use. * @PIN_CONFIG_END: this is the last enumerator for pin configurations, if * you need to pass in custom configurations to the pin controller, use * PIN_CONFIG_END+1 as the base offset. */ enum pin_config_param { + PIN_CONFIG_BIAS_BUS_HOLD, PIN_CONFIG_BIAS_DISABLE, PIN_CONFIG_BIAS_HIGH_IMPEDANCE, - PIN_CONFIG_BIAS_BUS_HOLD, - PIN_CONFIG_BIAS_PULL_UP, PIN_CONFIG_BIAS_PULL_DOWN, PIN_CONFIG_BIAS_PULL_PIN_DEFAULT, - PIN_CONFIG_DRIVE_PUSH_PULL, + PIN_CONFIG_BIAS_PULL_UP, PIN_CONFIG_DRIVE_OPEN_DRAIN, PIN_CONFIG_DRIVE_OPEN_SOURCE, + PIN_CONFIG_DRIVE_PUSH_PULL, PIN_CONFIG_DRIVE_STRENGTH, + PIN_CONFIG_INPUT_DEBOUNCE, PIN_CONFIG_INPUT_ENABLE, - PIN_CONFIG_INPUT_SCHMITT_ENABLE, PIN_CONFIG_INPUT_SCHMITT, - PIN_CONFIG_INPUT_DEBOUNCE, - PIN_CONFIG_POWER_SOURCE, - PIN_CONFIG_SLEW_RATE, + PIN_CONFIG_INPUT_SCHMITT_ENABLE, PIN_CONFIG_LOW_POWER_MODE, PIN_CONFIG_OUTPUT, + PIN_CONFIG_POWER_SOURCE, + PIN_CONFIG_SLEW_RATE, PIN_CONFIG_END = 0x7FFF, }; -- cgit v1.2.3 From a91263d520246b63c63e75ddfb072ee6a853fe15 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2015 01:41:50 +0200 Subject: ebpf: migrate bpf_prog's flags to bitfield As we need to add further flags to the bpf_prog structure, lets migrate both bools to a bitfield representation. The size of the base structure (excluding insns) remains unchanged at 40 bytes. Add also tags for the kmemchecker, so that it doesn't throw false positives. Even in case gcc would generate suboptimal code, it's not being accessed in performance critical paths. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index fa2cab985e57..bad618f316d7 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -326,8 +326,10 @@ struct bpf_binary_header { struct bpf_prog { u16 pages; /* Number of allocated pages */ - bool jited; /* Is our filter JIT'ed? */ - bool gpl_compatible; /* Is our filter GPL compatible? */ + kmemcheck_bitfield_begin(meta); + u16 jited:1, /* Is our filter JIT'ed? */ + gpl_compatible:1; /* Is filter GPL compatible? */ + kmemcheck_bitfield_end(meta); u32 len; /* Number of filter blocks */ enum bpf_prog_type type; /* Type of BPF program */ struct bpf_prog_aux *aux; /* Auxiliary fields */ -- cgit v1.2.3 From c46646d0484f5d08e2bede9b45034ba5b8b489cc Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Sep 2015 01:41:51 +0200 Subject: sched, bpf: add helper for retrieving routing realms Using routing realms as part of the classifier is quite useful, it can be viewed as a tag for one or multiple routing entries (think of an analogy to net_cls cgroup for processes), set by user space routing daemons or via iproute2 as an indicator for traffic classifiers and later on processed in the eBPF program. Unlike actions, the classifier can inspect device flags and enable netif_keep_dst() if necessary. tc actions don't have that possibility, but in case people know what they are doing, it can be used from there as well (e.g. via devs that must keep dsts by design anyway). If a realm is set, the handler returns the non-zero realm. User space can set the full 32bit realm for the dst. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index bad618f316d7..3d5fd24b321b 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -328,7 +328,8 @@ struct bpf_prog { u16 pages; /* Number of allocated pages */ kmemcheck_bitfield_begin(meta); u16 jited:1, /* Is our filter JIT'ed? */ - gpl_compatible:1; /* Is filter GPL compatible? */ + gpl_compatible:1, /* Is filter GPL compatible? */ + dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); u32 len; /* Number of filter blocks */ enum bpf_prog_type type; /* Type of BPF program */ -- cgit v1.2.3 From f3a6bd393c2c5d0e6b16624ba99a1c5fa07bdb0b Mon Sep 17 00:00:00 2001 From: Simon Horman Date: Wed, 30 Sep 2015 15:15:52 +0900 Subject: phylib: Add phy_set_max_speed helper Add a helper to allow ethernet drivers to limit the speed of a phy (that they are attached to). This mainly involves factoring out the business-end of of_set_phy_supported() and exporting a new symbol. This code seems to be open coded in several places, in several different variants. It is is envisaged that this will be used in situations where setting the "max-speed" property in DT is not appropriate, e.g. because the maximum speed is not a property of the phy hardware. Signed-off-by: Simon Horman Signed-off-by: David S. Miller --- include/linux/phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 4a4e3a092337..4c477e6ece33 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -798,6 +798,7 @@ int phy_mii_ioctl(struct phy_device *phydev, struct ifreq *ifr, int cmd); int phy_start_interrupts(struct phy_device *phydev); void phy_print_status(struct phy_device *phydev); void phy_device_free(struct phy_device *phydev); +int phy_set_max_speed(struct phy_device *phydev, u32 max_speed); int phy_register_fixup(const char *bus_id, u32 phy_uid, u32 phy_uid_mask, int (*run)(struct phy_device *)); -- cgit v1.2.3 From 3220befddc0da1f4e09fc790a32a9bd8427e8889 Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 1 Oct 2015 18:40:33 +0300 Subject: usb: store the new usb 3.1 SuperSpeedPlus device capability descriptor If a device supports usb 3.1 SupeerSpeedPlus Gen2 speeds it povides a SuperSpeedPlus device capability descriptor as a part of its BOS descriptor. If we find one while parsing the BOS then save it togeter with the other device capabilities found in the BOS Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- include/linux/usb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index 2cbcf8db517a..b9a28074210f 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -328,6 +328,7 @@ struct usb_host_bos { /* wireless cap descriptor is handled by wusb */ struct usb_ext_cap_descriptor *ext_cap; struct usb_ss_cap_descriptor *ss_cap; + struct usb_ssp_cap_descriptor *ssp_cap; struct usb_ss_container_id_descriptor *ss_id; }; -- cgit v1.2.3 From 7117522520b9101af7a0602d6b0d1e67d689fc6b Mon Sep 17 00:00:00 2001 From: Mathias Nyman Date: Thu, 1 Oct 2015 18:40:37 +0300 Subject: usb: define HCD_USB31 speed option for hosts that support USB 3.1 features Hosts that support USB 3.1 Enhaned SuperSpeed can set their speed to HCD_USB31 to let usb core and host drivers know that the controller supports new USB 3.1 features. make sure usb core handle HCD_USB31 hosts correctly, for now similar to HCD_USB3. Signed-off-by: Mathias Nyman Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/hcd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/hcd.h b/include/linux/usb/hcd.h index 4d147277b30d..f89c24bd53a4 100644 --- a/include/linux/usb/hcd.h +++ b/include/linux/usb/hcd.h @@ -250,6 +250,7 @@ struct hc_driver { #define HCD_USB2 0x0020 /* USB 2.0 */ #define HCD_USB25 0x0030 /* Wireless USB 1.0 (USB 2.5)*/ #define HCD_USB3 0x0040 /* USB 3.0 */ +#define HCD_USB31 0x0050 /* USB 3.1 */ #define HCD_MASK 0x0070 #define HCD_BH 0x0100 /* URB complete in BH context */ -- cgit v1.2.3 From f975c5cdb53c1cfef18c28aaae8385e5d16bc104 Mon Sep 17 00:00:00 2001 From: Yoshihiro Shimoda Date: Tue, 29 Sep 2015 18:21:18 +0900 Subject: usb: renesas_usbhs: fix build warning if 64-bit architecture This patch fixes the following warning if 64-bit architecture environment: ./drivers/usb/renesas_usbhs/common.c:496:25: warning: cast from pointer to integer of different size [-Wpointer-to-int-cast] dparam->type = of_id ? (u32)of_id->data : 0; Signed-off-by: Yoshihiro Shimoda Signed-off-by: Greg Kroah-Hartman --- include/linux/usb/renesas_usbhs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/usb/renesas_usbhs.h b/include/linux/usb/renesas_usbhs.h index 3dd5a781da99..bfb74723f151 100644 --- a/include/linux/usb/renesas_usbhs.h +++ b/include/linux/usb/renesas_usbhs.h @@ -157,7 +157,7 @@ struct renesas_usbhs_driver_param { */ int pio_dma_border; /* default is 64byte */ - u32 type; + uintptr_t type; u32 enable_gpio; /* -- cgit v1.2.3 From 621a5f7ad9cd1ce7933f1d302067cbd58354173c Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sat, 26 Sep 2015 15:04:07 -0700 Subject: debugfs: Pass bool pointer to debugfs_create_bool() Its a bit odd that debugfs_create_bool() takes 'u32 *' as an argument, when all it needs is a boolean pointer. It would be better to update this API to make it accept 'bool *' instead, as that will make it more consistent and often more convenient. Over that bool takes just a byte. That required updates to all user sites as well, in the same commit updating the API. regmap core was also using debugfs_{read|write}_file_bool(), directly and variable types were updated for that to be bool as well. Signed-off-by: Viresh Kumar Acked-by: Mark Brown Acked-by: Charles Keepax Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 4 ++-- include/linux/edac.h | 2 +- include/linux/fault-inject.h | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 9beb636b97eb..8321fe3058d6 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -92,7 +92,7 @@ struct dentry *debugfs_create_size_t(const char *name, umode_t mode, struct dentry *debugfs_create_atomic_t(const char *name, umode_t mode, struct dentry *parent, atomic_t *value); struct dentry *debugfs_create_bool(const char *name, umode_t mode, - struct dentry *parent, u32 *value); + struct dentry *parent, bool *value); struct dentry *debugfs_create_blob(const char *name, umode_t mode, struct dentry *parent, @@ -243,7 +243,7 @@ static inline struct dentry *debugfs_create_atomic_t(const char *name, umode_t m static inline struct dentry *debugfs_create_bool(const char *name, umode_t mode, struct dentry *parent, - u32 *value) + bool *value) { return ERR_PTR(-ENODEV); } diff --git a/include/linux/edac.h b/include/linux/edac.h index da3b72e95db3..7c6b7ba55589 100644 --- a/include/linux/edac.h +++ b/include/linux/edac.h @@ -772,7 +772,7 @@ struct mem_ctl_info { #ifdef CONFIG_EDAC_DEBUG struct dentry *debugfs; u8 fake_inject_layer[EDAC_MAX_LAYERS]; - u32 fake_inject_ue; + bool fake_inject_ue; u16 fake_inject_count; #endif }; diff --git a/include/linux/fault-inject.h b/include/linux/fault-inject.h index 798fad9e420d..3159a7dba034 100644 --- a/include/linux/fault-inject.h +++ b/include/linux/fault-inject.h @@ -18,7 +18,7 @@ struct fault_attr { atomic_t times; atomic_t space; unsigned long verbose; - u32 task_filter; + bool task_filter; unsigned long stacktrace_depth; unsigned long require_start; unsigned long require_end; -- cgit v1.2.3 From f7025709e29aded887becdaf4a81072ef1f475bf Mon Sep 17 00:00:00 2001 From: Ulf Magnusson Date: Wed, 2 Sep 2015 15:40:02 +0200 Subject: kobject: explain what kobject's sd field is (More) unclear, especially name-wise, after sysfs_dirent became kernfs_node. Signed-off-by: Ulf Magnusson Signed-off-by: Greg Kroah-Hartman --- include/linux/kobject.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kobject.h b/include/linux/kobject.h index 637f67002c5a..e6284591599e 100644 --- a/include/linux/kobject.h +++ b/include/linux/kobject.h @@ -66,7 +66,7 @@ struct kobject { struct kobject *parent; struct kset *kset; struct kobj_type *ktype; - struct kernfs_node *sd; + struct kernfs_node *sd; /* sysfs directory entry */ struct kref kref; #ifdef CONFIG_DEBUG_KOBJECT_RELEASE struct delayed_work release; -- cgit v1.2.3 From 09e8b485ee4212f78bf27c8d727845919f85cf1f Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Tue, 29 Sep 2015 18:07:02 -0700 Subject: iommu: iova: Move iova cache management to the iova library This is necessary to separate intel-iommu from the iova library. Signed-off-by: Sakari Ailus Signed-off-by: David Woodhouse Signed-off-by: Greg Kroah-Hartman --- include/linux/iova.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iova.h b/include/linux/iova.h index 3920a19d8194..92f7177db2ce 100644 --- a/include/linux/iova.h +++ b/include/linux/iova.h @@ -68,8 +68,8 @@ static inline unsigned long iova_pfn(struct iova_domain *iovad, dma_addr_t iova) return iova >> iova_shift(iovad); } -int iommu_iova_cache_init(void); -void iommu_iova_cache_destroy(void); +int iova_cache_get(void); +void iova_cache_put(void); struct iova *alloc_iova_mem(void); void free_iova_mem(struct iova *iova); -- cgit v1.2.3 From ff39988abd70bcd1b14a4c81f2d102e67b8db580 Mon Sep 17 00:00:00 2001 From: Siva Yerramreddy Date: Tue, 29 Sep 2015 18:09:37 -0700 Subject: dma: Add support to program MIC x100 status descriptiors The MIC X100 DMA engine has a special status descriptor which writes an 8 byte value to a destination location. This is used to signal completion of all DMA descriptors prior to the status descriptor. This patch add a new DMA engine API which enables updating a destination address with an 8 byte immediate data value. Reviewed-by: Nikhil Rao Reviewed-by: Ashutosh Dixit Signed-off-by: Lawrynowicz, Jacek Signed-off-by: Sudeep Dutt Signed-off-by: Siva Yerramreddy Signed-off-by: Greg Kroah-Hartman --- include/linux/dmaengine.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index 7ea9184eaa13..c47c68e535e8 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -645,6 +645,7 @@ enum dmaengine_alignment { * The function takes a buffer of size buf_len. The callback function will * be called after period_len bytes have been transferred. * @device_prep_interleaved_dma: Transfer expression in a generic way. + * @device_prep_dma_imm_data: DMA's 8 byte immediate data to the dst address * @device_config: Pushes a new configuration to a channel, return 0 or an error * code * @device_pause: Pauses any transfer happening on a channel. Returns @@ -727,6 +728,9 @@ struct dma_device { struct dma_async_tx_descriptor *(*device_prep_interleaved_dma)( struct dma_chan *chan, struct dma_interleaved_template *xt, unsigned long flags); + struct dma_async_tx_descriptor *(*device_prep_dma_imm_data)( + struct dma_chan *chan, dma_addr_t dst, u64 data, + unsigned long flags); int (*device_config)(struct dma_chan *chan, struct dma_slave_config *config); -- cgit v1.2.3 From b7f944411b4a628443f84a542858a8c78847bb48 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 29 Sep 2015 18:10:44 -0700 Subject: misc: mic: SCIF poll SCIF poll allows both user and kernel mode clients to wait on events on a SCIF endpoint. These events include availability of space or data in the SCIF ring buffer, availability of connection requests on a listening endpoint and completion of connections when using async connects. Reviewed-by: Nikhil Rao Reviewed-by: Sudeep Dutt Signed-off-by: Ashutosh Dixit Signed-off-by: Greg Kroah-Hartman --- include/linux/scif.h | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 74 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scif.h b/include/linux/scif.h index 44f4f3898bbe..b1923ad2ddd3 100644 --- a/include/linux/scif.h +++ b/include/linux/scif.h @@ -93,6 +93,18 @@ enum { typedef struct scif_endpt *scif_epd_t; +/** + * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll + * @epd: SCIF endpoint + * @events: requested events + * @revents: returned events + */ +struct scif_pollepd { + scif_epd_t epd; + short events; + short revents; +}; + #define SCIF_OPEN_FAILED ((scif_epd_t)-1) #define SCIF_REGISTER_FAILED ((off_t)-1) #define SCIF_MMAP_FAILED ((void *)-1) @@ -990,4 +1002,66 @@ int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, */ int scif_get_node_ids(u16 *nodes, int len, u16 *self); +/** + * scif_poll() - Wait for some event on an endpoint + * @epds: Array of endpoint descriptors + * @nepds: Length of epds + * @timeout: Upper limit on time for which scif_poll() will block + * + * scif_poll() waits for one of a set of endpoints to become ready to perform + * an I/O operation. + * + * The epds argument specifies the endpoint descriptors to be examined and the + * events of interest for each endpoint descriptor. epds is a pointer to an + * array with one member for each open endpoint descriptor of interest. + * + * The number of items in the epds array is specified in nepds. The epd field + * of scif_pollepd is an endpoint descriptor of an open endpoint. The field + * events is a bitmask specifying the events which the application is + * interested in. The field revents is an output parameter, filled by the + * kernel with the events that actually occurred. The bits returned in revents + * can include any of those specified in events, or one of the values POLLERR, + * POLLHUP, or POLLNVAL. (These three bits are meaningless in the events + * field, and will be set in the revents field whenever the corresponding + * condition is true.) + * + * If none of the events requested (and no error) has occurred for any of the + * endpoint descriptors, then scif_poll() blocks until one of the events occurs. + * + * The timeout argument specifies an upper limit on the time for which + * scif_poll() will block, in milliseconds. Specifying a negative value in + * timeout means an infinite timeout. + * + * The following bits may be set in events and returned in revents. + * POLLIN - Data may be received without blocking. For a connected + * endpoint, this means that scif_recv() may be called without blocking. For a + * listening endpoint, this means that scif_accept() may be called without + * blocking. + * POLLOUT - Data may be sent without blocking. For a connected endpoint, this + * means that scif_send() may be called without blocking. POLLOUT may also be + * used to block waiting for a non-blocking connect to complete. This bit value + * has no meaning for a listening endpoint and is ignored if specified. + * + * The following bits are only returned in revents, and are ignored if set in + * events. + * POLLERR - An error occurred on the endpoint + * POLLHUP - The connection to the peer endpoint was disconnected + * POLLNVAL - The specified endpoint descriptor is invalid. + * + * Return: + * Upon successful completion, scif_poll() returns a non-negative value. A + * positive value indicates the total number of endpoint descriptors that have + * been selected (that is, endpoint descriptors for which the revents member is + * non-zero). A value of 0 indicates that the call timed out and no endpoint + * descriptors have been selected. Otherwise in user mode -1 is returned and + * errno is set to indicate the error; in kernel mode the negative of one of + * the following errors is returned. + * + * Errors: + * EINTR - A signal occurred before any requested event + * EINVAL - The nepds argument is greater than {OPEN_MAX} + * ENOMEM - There was no space to allocate file descriptor tables + */ +int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout); + #endif /* __SCIF_H__ */ -- cgit v1.2.3 From d3d912eb7386b7512f131b34407978cecccb3703 Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 29 Sep 2015 18:11:15 -0700 Subject: misc: mic: Add support for kernel mode SCIF clients Add support for registration/de-registration of kernel mode SCIF clients. SCIF clients are probed with new and existing SCIF peer devices. Similarly the client remove method is called when SCIF peer devices are removed. Changes to SCIF peer device framework necessitated by supporting kernel mode SCIF clients are also included in this patch. Reviewed-by: Nikhil Rao Reviewed-by: Sudeep Dutt Signed-off-by: Ashutosh Dixit Signed-off-by: Greg Kroah-Hartman --- include/linux/scif.h | 58 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 58 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scif.h b/include/linux/scif.h index b1923ad2ddd3..fd62c051b166 100644 --- a/include/linux/scif.h +++ b/include/linux/scif.h @@ -55,6 +55,7 @@ #include #include +#include #include #define SCIF_ACCEPT_SYNC 1 @@ -105,6 +106,37 @@ struct scif_pollepd { short revents; }; +/** + * scif_peer_dev - representation of a peer SCIF device + * + * Peer devices show up as PCIe devices for the mgmt node but not the cards. + * The mgmt node discovers all the cards on the PCIe bus and informs the other + * cards about their peers. Upon notification of a peer a node adds a peer + * device to the peer bus to maintain symmetry in the way devices are + * discovered across all nodes in the SCIF network. + * + * @dev: underlying device + * @dnode - The destination node which this device will communicate with. + */ +struct scif_peer_dev { + struct device dev; + u8 dnode; +}; + +/** + * scif_client - representation of a SCIF client + * @name: client name + * @probe - client method called when a peer device is registered + * @remove - client method called when a peer device is unregistered + * @si - subsys_interface used internally for implementing SCIF clients + */ +struct scif_client { + const char *name; + void (*probe)(struct scif_peer_dev *spdev); + void (*remove)(struct scif_peer_dev *spdev); + struct subsys_interface si; +}; + #define SCIF_OPEN_FAILED ((scif_epd_t)-1) #define SCIF_REGISTER_FAILED ((off_t)-1) #define SCIF_MMAP_FAILED ((void *)-1) @@ -1064,4 +1096,30 @@ int scif_get_node_ids(u16 *nodes, int len, u16 *self); */ int scif_poll(struct scif_pollepd *epds, unsigned int nepds, long timeout); +/** + * scif_client_register() - Register a SCIF client + * @client: client to be registered + * + * scif_client_register() registers a SCIF client. The probe() method + * of the client is called when SCIF peer devices come online and the + * remove() method is called when the peer devices disappear. + * + * Return: + * Upon successful completion, scif_client_register() returns a non-negative + * value. Otherwise the return value is the same as subsys_interface_register() + * in the kernel. + */ +int scif_client_register(struct scif_client *client); + +/** + * scif_client_unregister() - Unregister a SCIF client + * @client: client to be unregistered + * + * scif_client_unregister() unregisters a SCIF client. + * + * Return: + * None + */ +void scif_client_unregister(struct scif_client *client); + #endif /* __SCIF_H__ */ -- cgit v1.2.3 From d411e79391092925457d89b77d7cd3038ba6d04b Mon Sep 17 00:00:00 2001 From: Ashutosh Dixit Date: Tue, 29 Sep 2015 18:13:54 -0700 Subject: misc: mic: Remove COSM functionality from the MIC card driver Since card side COSM functionality, to trigger MIC device shutdowns and communicate shutdown status to the host, is now moved into a separate COSM client driver, this patch removes this functionality from the base MIC card driver. The mic_bus driver is also updated to use the device index provided by COSM rather than maintain its own device index. Reviewed-by: Nikhil Rao Reviewed-by: Sudeep Dutt Signed-off-by: Ashutosh Dixit Signed-off-by: Greg Kroah-Hartman --- include/linux/mic_bus.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mic_bus.h b/include/linux/mic_bus.h index d5b5f76d57ef..27d7c95fd0da 100644 --- a/include/linux/mic_bus.h +++ b/include/linux/mic_bus.h @@ -91,7 +91,8 @@ struct mbus_hw_ops { struct mbus_device * mbus_register_device(struct device *pdev, int id, struct dma_map_ops *dma_ops, - struct mbus_hw_ops *hw_ops, void __iomem *mmio_va); + struct mbus_hw_ops *hw_ops, int index, + void __iomem *mmio_va); void mbus_unregister_device(struct mbus_device *mbdev); int mbus_register_driver(struct mbus_driver *drv); -- cgit v1.2.3 From a44f2630d78bfafc8eb5ab7d8b7b8cd4642ba749 Mon Sep 17 00:00:00 2001 From: Sudeep Dutt Date: Tue, 29 Sep 2015 18:15:03 -0700 Subject: misc: mic: SCIF RMA header file and IOCTL changes This patch updates the SCIF header file and IOCTL interface with the changes required to support RMAs. APIs added include the ability to pin pages and register those pages with SCIF. SCIF kernel clients can also add references to remote registered pages and access them via the CPU. The user space IOCTL interface has been updated to enable SCIF registration, RDMA/CPU copies and fence APIs for RDMA synchronization. Reviewed-by: Nikhil Rao Reviewed-by: Ashutosh Dixit Signed-off-by: Sudeep Dutt Signed-off-by: Greg Kroah-Hartman --- include/linux/scif.h | 234 ++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 224 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/scif.h b/include/linux/scif.h index fd62c051b166..49a35d6edc94 100644 --- a/include/linux/scif.h +++ b/include/linux/scif.h @@ -93,6 +93,27 @@ enum { #define SCIF_PORT_RSVD 1088 typedef struct scif_endpt *scif_epd_t; +typedef struct scif_pinned_pages *scif_pinned_pages_t; + +/** + * struct scif_range - SCIF registered range used in kernel mode + * @cookie: cookie used internally by SCIF + * @nr_pages: number of pages of PAGE_SIZE + * @prot_flags: R/W protection + * @phys_addr: Array of bus addresses + * @va: Array of kernel virtual addresses backed by the pages in the phys_addr + * array. The va is populated only when called on the host for a remote + * SCIF connection on MIC. This is required to support the use case of DMA + * between MIC and another device which is not a SCIF node e.g., an IB or + * ethernet NIC. + */ +struct scif_range { + void *cookie; + int nr_pages; + int prot_flags; + dma_addr_t *phys_addr; + void __iomem **va; +}; /** * struct scif_pollepd - SCIF endpoint to be monitored via scif_poll @@ -389,7 +410,6 @@ int scif_close(scif_epd_t epd); * Errors: * EBADF, ENOTTY - epd is not a valid endpoint descriptor * ECONNRESET - Connection reset by peer - * EFAULT - An invalid address was specified for a parameter * EINVAL - flags is invalid, or len is negative * ENODEV - The remote node is lost or existed, but is not currently in the * network since it may have crashed @@ -442,7 +462,6 @@ int scif_send(scif_epd_t epd, void *msg, int len, int flags); * EAGAIN - The destination node is returning from a low power state * EBADF, ENOTTY - epd is not a valid endpoint descriptor * ECONNRESET - Connection reset by peer - * EFAULT - An invalid address was specified for a parameter * EINVAL - flags is invalid, or len is negative * ENODEV - The remote node is lost or existed, but is not currently in the * network since it may have crashed @@ -505,9 +524,6 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags); * SCIF_PROT_READ - allow read operations from the window * SCIF_PROT_WRITE - allow write operations to the window * - * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a - * fixed offset. - * * Return: * Upon successful completion, scif_register() returns the offset at which the * mapping was placed (po); otherwise in user mode SCIF_REGISTER_FAILED (that @@ -520,7 +536,6 @@ int scif_recv(scif_epd_t epd, void *msg, int len, int flags); * EAGAIN - The mapping could not be performed due to lack of resources * EBADF, ENOTTY - epd is not a valid endpoint descriptor * ECONNRESET - Connection reset by peer - * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid * EINVAL - map_flags is invalid, or prot_flags is invalid, or SCIF_MAP_FIXED is * set in flags, and offset is not a multiple of the page size, or addr is not a * multiple of the page size, or len is not a multiple of the page size, or is @@ -803,7 +818,6 @@ int scif_writeto(scif_epd_t epd, off_t loffset, size_t len, off_t * EACCESS - Attempt to write to a read-only range * EBADF, ENOTTY - epd is not a valid endpoint descriptor * ECONNRESET - Connection reset by peer - * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid * EINVAL - rma_flags is invalid * ENODEV - The remote node is lost or existed, but is not currently in the * network since it may have crashed @@ -884,7 +898,6 @@ int scif_vreadfrom(scif_epd_t epd, void *addr, size_t len, off_t roffset, * EACCESS - Attempt to write to a read-only range * EBADF, ENOTTY - epd is not a valid endpoint descriptor * ECONNRESET - Connection reset by peer - * EFAULT - Addresses in the range [addr, addr + len - 1] are invalid * EINVAL - rma_flags is invalid * ENODEV - The remote node is lost or existed, but is not currently in the * network since it may have crashed @@ -1028,11 +1041,212 @@ int scif_fence_signal(scif_epd_t epd, off_t loff, u64 lval, off_t roff, * online nodes in the SCIF network including 'self'; otherwise in user mode * -1 is returned and errno is set to indicate the error; in kernel mode no * errors are returned. + */ +int scif_get_node_ids(u16 *nodes, int len, u16 *self); + +/** + * scif_pin_pages() - Pin a set of pages + * @addr: Virtual address of range to pin + * @len: Length of range to pin + * @prot_flags: Page protection flags + * @map_flags: Page classification flags + * @pinned_pages: Handle to pinned pages + * + * scif_pin_pages() pins (locks in physical memory) the physical pages which + * back the range of virtual address pages starting at addr and continuing for + * len bytes. addr and len are constrained to be multiples of the page size. A + * successful scif_pin_pages() call returns a handle to pinned_pages which may + * be used in subsequent calls to scif_register_pinned_pages(). + * + * The pages will remain pinned as long as there is a reference against the + * scif_pinned_pages_t value returned by scif_pin_pages() and until + * scif_unpin_pages() is called, passing the scif_pinned_pages_t value. A + * reference is added to a scif_pinned_pages_t value each time a window is + * created by calling scif_register_pinned_pages() and passing the + * scif_pinned_pages_t value. A reference is removed from a + * scif_pinned_pages_t value each time such a window is deleted. + * + * Subsequent operations which change the memory pages to which virtual + * addresses are mapped (such as mmap(), munmap()) have no effect on the + * scif_pinned_pages_t value or windows created against it. + * + * If the process will fork(), it is recommended that the registered + * virtual address range be marked with MADV_DONTFORK. Doing so will prevent + * problems due to copy-on-write semantics. + * + * The prot_flags argument is formed by OR'ing together one or more of the + * following values. + * SCIF_PROT_READ - allow read operations against the pages + * SCIF_PROT_WRITE - allow write operations against the pages + * The map_flags argument can be set as SCIF_MAP_KERNEL to interpret addr as a + * kernel space address. By default, addr is interpreted as a user space + * address. + * + * Return: + * Upon successful completion, scif_pin_pages() returns 0; otherwise the + * negative of one of the following errors is returned. * * Errors: - * EFAULT - Bad address + * EINVAL - prot_flags is invalid, map_flags is invalid, or offset is negative + * ENOMEM - Not enough space */ -int scif_get_node_ids(u16 *nodes, int len, u16 *self); +int scif_pin_pages(void *addr, size_t len, int prot_flags, int map_flags, + scif_pinned_pages_t *pinned_pages); + +/** + * scif_unpin_pages() - Unpin a set of pages + * @pinned_pages: Handle to pinned pages to be unpinned + * + * scif_unpin_pages() prevents scif_register_pinned_pages() from registering new + * windows against pinned_pages. The physical pages represented by pinned_pages + * will remain pinned until all windows previously registered against + * pinned_pages are deleted (the window is scif_unregister()'d and all + * references to the window are removed (see scif_unregister()). + * + * pinned_pages must have been obtain from a previous call to scif_pin_pages(). + * After calling scif_unpin_pages(), it is an error to pass pinned_pages to + * scif_register_pinned_pages(). + * + * Return: + * Upon successful completion, scif_unpin_pages() returns 0; otherwise the + * negative of one of the following errors is returned. + * + * Errors: + * EINVAL - pinned_pages is not valid + */ +int scif_unpin_pages(scif_pinned_pages_t pinned_pages); + +/** + * scif_register_pinned_pages() - Mark a memory region for remote access. + * @epd: endpoint descriptor + * @pinned_pages: Handle to pinned pages + * @offset: Registered address space offset + * @map_flags: Flags which control where pages are mapped + * + * The scif_register_pinned_pages() function opens a window, a range of whole + * pages of the registered address space of the endpoint epd, starting at + * offset po. The value of po, further described below, is a function of the + * parameters offset and pinned_pages, and the value of map_flags. Each page of + * the window represents a corresponding physical memory page of the range + * represented by pinned_pages; the length of the window is the same as the + * length of range represented by pinned_pages. A successful + * scif_register_pinned_pages() call returns po as the return value. + * + * When SCIF_MAP_FIXED is set in the map_flags argument, po will be offset + * exactly, and offset is constrained to be a multiple of the page size. The + * mapping established by scif_register_pinned_pages() will not replace any + * existing registration; an error is returned if any page of the new window + * would intersect an existing window. + * + * When SCIF_MAP_FIXED is not set, the implementation uses offset in an + * implementation-defined manner to arrive at po. The po so chosen will be an + * area of the registered address space that the implementation deems suitable + * for a mapping of the required size. An offset value of 0 is interpreted as + * granting the implementation complete freedom in selecting po, subject to + * constraints described below. A non-zero value of offset is taken to be a + * suggestion of an offset near which the mapping should be placed. When the + * implementation selects a value for po, it does not replace any extant + * window. In all cases, po will be a multiple of the page size. + * + * The physical pages which are so represented by a window are available for + * access in calls to scif_get_pages(), scif_readfrom(), scif_writeto(), + * scif_vreadfrom(), and scif_vwriteto(). While a window is registered, the + * physical pages represented by the window will not be reused by the memory + * subsystem for any other purpose. Note that the same physical page may be + * represented by multiple windows. + * + * Windows created by scif_register_pinned_pages() are unregistered by + * scif_unregister(). + * + * The map_flags argument can be set to SCIF_MAP_FIXED which interprets a + * fixed offset. + * + * Return: + * Upon successful completion, scif_register_pinned_pages() returns the offset + * at which the mapping was placed (po); otherwise the negative of one of the + * following errors is returned. + * + * Errors: + * EADDRINUSE - SCIF_MAP_FIXED is set in map_flags and pages in the new window + * would intersect an existing window + * EAGAIN - The mapping could not be performed due to lack of resources + * ECONNRESET - Connection reset by peer + * EINVAL - map_flags is invalid, or SCIF_MAP_FIXED is set in map_flags, and + * offset is not a multiple of the page size, or offset is negative + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOMEM - Not enough space + * ENOTCONN - The endpoint is not connected + */ +off_t scif_register_pinned_pages(scif_epd_t epd, + scif_pinned_pages_t pinned_pages, + off_t offset, int map_flags); + +/** + * scif_get_pages() - Add references to remote registered pages + * @epd: endpoint descriptor + * @offset: remote registered offset + * @len: length of range of pages + * @pages: returned scif_range structure + * + * scif_get_pages() returns the addresses of the physical pages represented by + * those pages of the registered address space of the peer of epd, starting at + * offset and continuing for len bytes. offset and len are constrained to be + * multiples of the page size. + * + * All of the pages in the specified range [offset, offset + len - 1] must be + * within a single window of the registered address space of the peer of epd. + * + * The addresses are returned as a virtually contiguous array pointed to by the + * phys_addr component of the scif_range structure whose address is returned in + * pages. The nr_pages component of scif_range is the length of the array. The + * prot_flags component of scif_range holds the protection flag value passed + * when the pages were registered. + * + * Each physical page whose address is returned by scif_get_pages() remains + * available and will not be released for reuse until the scif_range structure + * is returned in a call to scif_put_pages(). The scif_range structure returned + * by scif_get_pages() must be unmodified. + * + * It is an error to call scif_close() on an endpoint on which a scif_range + * structure of that endpoint has not been returned to scif_put_pages(). + * + * Return: + * Upon successful completion, scif_get_pages() returns 0; otherwise the + * negative of one of the following errors is returned. + * Errors: + * ECONNRESET - Connection reset by peer. + * EINVAL - offset is not a multiple of the page size, or offset is negative, or + * len is not a multiple of the page size + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + * ENXIO - Offsets in the range [offset, offset + len - 1] are invalid + * for the registered address space of the peer epd + */ +int scif_get_pages(scif_epd_t epd, off_t offset, size_t len, + struct scif_range **pages); + +/** + * scif_put_pages() - Remove references from remote registered pages + * @pages: pages to be returned + * + * scif_put_pages() releases a scif_range structure previously obtained by + * calling scif_get_pages(). The physical pages represented by pages may + * be reused when the window which represented those pages is unregistered. + * Therefore, those pages must not be accessed after calling scif_put_pages(). + * + * Return: + * Upon successful completion, scif_put_pages() returns 0; otherwise the + * negative of one of the following errors is returned. + * Errors: + * EINVAL - pages does not point to a valid scif_range structure, or + * the scif_range structure pointed to by pages was already returned + * ENODEV - The remote node is lost or existed, but is not currently in the + * network since it may have crashed + * ENOTCONN - The endpoint is not connected + */ +int scif_put_pages(struct scif_range *pages); /** * scif_poll() - Wait for some event on an endpoint -- cgit v1.2.3 From 9af92fbff3b06d75470717361076aa7bd097ff8b Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 10 Sep 2015 11:29:03 +0200 Subject: tty/serial: at91: move ATMEL_MAX_UART Move ATMEL_MAX_UART from platform_data/atmel.h to atmel_serial.c as this is the only file using it and it is common practise from tty/serial drivers to define it directly in the driver file. Signed-off-by: Alexandre Belloni Acked-by: Nicolas Ferre Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_data/atmel.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h index 527a85c61924..c4bc90bfebe0 100644 --- a/include/linux/platform_data/atmel.h +++ b/include/linux/platform_data/atmel.h @@ -19,12 +19,6 @@ #include #include -/* - * at91: 6 USARTs and one DBGU port (SAM9260) - * avr32: 4 - */ -#define ATMEL_MAX_UART 7 - /* USB Device */ struct at91_udc_data { int vbus_pin; /* high == host powering us */ -- cgit v1.2.3 From 7bd1d4093c2fa37d1ecab05da3c9d48ea2af2264 Mon Sep 17 00:00:00 2001 From: Alexander Shishkin Date: Tue, 22 Sep 2015 15:47:10 +0300 Subject: stm class: Introduce an abstraction for System Trace Module devices A System Trace Module (STM) is a device exporting data in System Trace Protocol (STP) format as defined by MIPI STP standards. Examples of such devices are Intel(R) Trace Hub and Coresight STM. This abstraction provides a unified interface for software trace sources to send their data over an STM device to a debug host. In order to do that, such a trace source needs to be assigned a pair of master/channel identifiers that all the data from this source will be tagged with. The STP decoder on the debug host side will use these master/channel tags to distinguish different trace streams from one another inside one STP stream. This abstraction provides a configfs-based policy management mechanism for dynamic allocation of these master/channel pairs based on trace source-supplied string identifier. It has the flexibility of being defined at runtime and at the same time (provided that the policy definition is aligned with the decoding end) consistency. For userspace trace sources, this abstraction provides write()-based and mmap()-based (if the underlying stm device allows this) output mechanism. For kernel-side trace sources, we provide "stm_source" device class that can be connected to an stm device at run time. Cc: linux-api@vger.kernel.org Reviewed-by: Mathieu Poirier Signed-off-by: Alexander Shishkin Signed-off-by: Greg Kroah-Hartman --- include/linux/stm.h | 126 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 126 insertions(+) create mode 100644 include/linux/stm.h (limited to 'include/linux') diff --git a/include/linux/stm.h b/include/linux/stm.h new file mode 100644 index 000000000000..9d0083d364e6 --- /dev/null +++ b/include/linux/stm.h @@ -0,0 +1,126 @@ +/* + * System Trace Module (STM) infrastructure apis + * Copyright (C) 2014 Intel Corporation. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef _STM_H_ +#define _STM_H_ + +#include + +/** + * enum stp_packet_type - STP packets that an STM driver sends + */ +enum stp_packet_type { + STP_PACKET_DATA = 0, + STP_PACKET_FLAG, + STP_PACKET_USER, + STP_PACKET_MERR, + STP_PACKET_GERR, + STP_PACKET_TRIG, + STP_PACKET_XSYNC, +}; + +/** + * enum stp_packet_flags - STP packet modifiers + */ +enum stp_packet_flags { + STP_PACKET_MARKED = 0x1, + STP_PACKET_TIMESTAMPED = 0x2, +}; + +struct stp_policy; + +struct stm_device; + +/** + * struct stm_data - STM device description and callbacks + * @name: device name + * @stm: internal structure, only used by stm class code + * @sw_start: first STP master available to software + * @sw_end: last STP master available to software + * @sw_nchannels: number of STP channels per master + * @sw_mmiosz: size of one channel's IO space, for mmap, optional + * @packet: callback that sends an STP packet + * @mmio_addr: mmap callback, optional + * @link: called when a new stm_source gets linked to us, optional + * @unlink: likewise for unlinking, again optional + * @set_options: set device-specific options on a channel + * + * Fill out this structure before calling stm_register_device() to create + * an STM device and stm_unregister_device() to destroy it. It will also be + * passed back to @packet(), @mmio_addr(), @link(), @unlink() and @set_options() + * callbacks. + * + * Normally, an STM device will have a range of masters available to software + * and the rest being statically assigned to various hardware trace sources. + * The former is defined by the the range [@sw_start..@sw_end] of the device + * description. That is, the lowest master that can be allocated to software + * writers is @sw_start and data from this writer will appear is @sw_start + * master in the STP stream. + */ +struct stm_data { + const char *name; + struct stm_device *stm; + unsigned int sw_start; + unsigned int sw_end; + unsigned int sw_nchannels; + unsigned int sw_mmiosz; + ssize_t (*packet)(struct stm_data *, unsigned int, + unsigned int, unsigned int, + unsigned int, unsigned int, + const unsigned char *); + phys_addr_t (*mmio_addr)(struct stm_data *, unsigned int, + unsigned int, unsigned int); + int (*link)(struct stm_data *, unsigned int, + unsigned int); + void (*unlink)(struct stm_data *, unsigned int, + unsigned int); + long (*set_options)(struct stm_data *, unsigned int, + unsigned int, unsigned int, + unsigned long); +}; + +int stm_register_device(struct device *parent, struct stm_data *stm_data, + struct module *owner); +void stm_unregister_device(struct stm_data *stm_data); + +struct stm_source_device; + +/** + * struct stm_source_data - STM source device description and callbacks + * @name: device name, will be used for policy lookup + * @src: internal structure, only used by stm class code + * @nr_chans: number of channels to allocate + * @link: called when this source gets linked to an STM device + * @unlink: called when this source is about to get unlinked from its STM + * + * Fill in this structure before calling stm_source_register_device() to + * register a source device. Also pass it to unregister and write calls. + */ +struct stm_source_data { + const char *name; + struct stm_source_device *src; + unsigned int percpu; + unsigned int nr_chans; + int (*link)(struct stm_source_data *data); + void (*unlink)(struct stm_source_data *data); +}; + +int stm_source_register_device(struct device *parent, + struct stm_source_data *data); +void stm_source_unregister_device(struct stm_source_data *data); + +int stm_source_write(struct stm_source_data *data, unsigned int chan, + const char *buf, size_t count); + +#endif /* _STM_H_ */ -- cgit v1.2.3 From b7bd1809e0784435791657502bc0d8280ad6f7ea Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Wed, 30 Sep 2015 22:53:44 +0100 Subject: netfilter: nfnetlink_queue: get rid of nfnetlink_queue_ct.c The original intention was to avoid dependencies between nfnetlink_queue and conntrack without ifdef pollution. However, we can achieve this by moving the conntrack dependent code into ctnetlink and keep some glue code to access the nfq_ct indirection from nfqueue. After this patch, the nfq_ct indirection is always compiled in the netfilter core to avoid polluting nfqueue with ifdefs. Thus, if nf_conntrack is not compiled this results in only 8-bytes of memory waste in x86_64. This patch also adds ctnetlink_nfqueue_seqadj() to avoid that the nf_conn structure layout if exposed to nf_queue, which creates another dependency with nf_conntrack at compilation time. Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 12 ++++++++---- 1 file changed, 8 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 165ab2d14734..3e5e8f2b65f6 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -369,14 +369,21 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) extern void (*ip_ct_attach)(struct sk_buff *, const struct sk_buff *) __rcu; void nf_ct_attach(struct sk_buff *, const struct sk_buff *); extern void (*nf_ct_destroy)(struct nf_conntrack *) __rcu; +#else +static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} +#endif struct nf_conn; enum ip_conntrack_info; struct nlattr; struct nfq_ct_hook { + struct nf_conn *(*get_ct)(struct sk_buff *skb, + enum ip_conntrack_info *ctinfo); size_t (*build_size)(const struct nf_conn *ct); - int (*build)(struct sk_buff *skb, struct nf_conn *ct); + int (*build)(struct sk_buff *skb, struct nf_conn *ct, + enum ip_conntrack_info ctinfo, + u_int16_t ct_attr, u_int16_t ct_info_attr); int (*parse)(const struct nlattr *attr, struct nf_conn *ct); int (*attach_expect)(const struct nlattr *attr, struct nf_conn *ct, u32 portid, u32 report); @@ -384,9 +391,6 @@ struct nfq_ct_hook { enum ip_conntrack_info ctinfo, s32 off); }; extern struct nfq_ct_hook __rcu *nfq_ct_hook; -#else -static inline void nf_ct_attach(struct sk_buff *new, struct sk_buff *skb) {} -#endif /** * nf_skb_duplicated - TEE target has sent a packet -- cgit v1.2.3 From dbe2256ddd8e8420c254c79f4045c41cb5f4bd6b Mon Sep 17 00:00:00 2001 From: Thierry Reding Date: Fri, 25 Sep 2015 17:29:04 +0200 Subject: driver-core: platform: Provide helpers for multi-driver modules Some modules register several sub-drivers. Provide a helper that makes it easy to register and unregister a list of sub-drivers, as well as unwind properly on error. Signed-off-by: Thierry Reding Signed-off-by: Greg Kroah-Hartman --- include/linux/platform_device.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_device.h b/include/linux/platform_device.h index bba08f44cc97..dc777be5f2e1 100644 --- a/include/linux/platform_device.h +++ b/include/linux/platform_device.h @@ -270,6 +270,14 @@ extern struct platform_device *__platform_create_bundle( struct resource *res, unsigned int n_res, const void *data, size_t size, struct module *module); +int __platform_register_drivers(struct platform_driver * const *drivers, + unsigned int count, struct module *owner); +void platform_unregister_drivers(struct platform_driver * const *drivers, + unsigned int count); + +#define platform_register_drivers(drivers, count) \ + __platform_register_drivers(drivers, count, THIS_MODULE) + /* early platform driver interface */ struct early_platform_driver { const char *class_str; -- cgit v1.2.3 From 7e50711993552800644a4667daa0f569a7665eca Mon Sep 17 00:00:00 2001 From: Grigoryev Denis Date: Fri, 2 Oct 2015 16:14:41 +0000 Subject: mfd: tps6105x: Use i2c regmap to access registers This patch modifies tps6105x and associated function driver to use regmap instead of operating directly on i2c. Signed-off-by: Denis Grigoryev Signed-off-by: Lee Jones --- include/linux/mfd/tps6105x.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/tps6105x.h b/include/linux/mfd/tps6105x.h index 386743dd931c..8bc51180800a 100644 --- a/include/linux/mfd/tps6105x.h +++ b/include/linux/mfd/tps6105x.h @@ -10,6 +10,7 @@ #define MFD_TPS6105X_H #include +#include #include /* @@ -82,20 +83,15 @@ struct tps6105x_platform_data { /** * struct tps6105x - state holder for the TPS6105x drivers - * @mutex: mutex to serialize I2C accesses * @i2c_client: corresponding I2C client * @regulator: regulator device if used in voltage mode + * @regmap: used for i2c communcation on accessing registers */ struct tps6105x { struct tps6105x_platform_data *pdata; - struct mutex lock; struct i2c_client *client; struct regulator_dev *regulator; + struct regmap *regmap; }; -extern int tps6105x_set(struct tps6105x *tps6105x, u8 reg, u8 value); -extern int tps6105x_get(struct tps6105x *tps6105x, u8 reg, u8 *buf); -extern int tps6105x_mask_and_set(struct tps6105x *tps6105x, u8 reg, - u8 bitmask, u8 bitvalues); - #endif -- cgit v1.2.3 From e7eadb4de9e645e1b34539dc4128240b1e5f71dc Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Sun, 4 Oct 2015 21:08:09 -0700 Subject: ipv6: inet6_sk() should use sk_fullsock() SYN_RECV & TIMEWAIT sockets are not full blown, they do not have a pinet6 pointer. Fixes: ca6fb0651883 ("tcp: attach SYNACK messages to request sockets instead of listener") Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/ipv6.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index f1f32af6d9b9..0ef2a97ccdb5 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -264,9 +264,9 @@ struct tcp6_timewait_sock { }; #if IS_ENABLED(CONFIG_IPV6) -static inline struct ipv6_pinfo * inet6_sk(const struct sock *__sk) +static inline struct ipv6_pinfo *inet6_sk(const struct sock *__sk) { - return inet_sk(__sk)->pinet6; + return sk_fullsock(__sk) ? inet_sk(__sk)->pinet6 : NULL; } static inline struct raw6_sock *raw6_sk(const struct sock *sk) -- cgit v1.2.3 From 7741c373cf3ea1f5383fa97fb7a640a429d3dd7c Mon Sep 17 00:00:00 2001 From: Jon Ringle Date: Thu, 1 Oct 2015 07:43:20 -0400 Subject: regmap: Allow installing custom reg_update_bits function This commit allows installing a custom reg_update_bits function for cases where the hardware provides a mechanism to set or clear register bits without a read/modify/write cycle. Such is the case with the Microchip ENCX24J600. Signed-off-by: Jon Ringle Signed-off-by: David S. Miller --- include/linux/regmap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 8fc0bfd8edc4..4d3a3b1680bb 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -296,6 +296,9 @@ typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg, unsigned int *val); typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg, unsigned int val); +typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg, + unsigned int mask, unsigned int val, + bool *change, bool force_write); typedef struct regmap_async *(*regmap_hw_async_alloc)(void); typedef void (*regmap_hw_free_context)(void *context); @@ -335,6 +338,7 @@ struct regmap_bus { regmap_hw_gather_write gather_write; regmap_hw_async_write async_write; regmap_hw_reg_write reg_write; + regmap_hw_reg_update_bits reg_update_bits; regmap_hw_read read; regmap_hw_reg_read reg_read; regmap_hw_free_context free_context; -- cgit v1.2.3 From bab18991871545dfbd10c931eb0fe8f7637156a9 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Oct 2015 15:17:33 +0200 Subject: bpf, seccomp: prepare for upcoming criu support The current ongoing effort to dump existing cBPF seccomp filters back to user space requires to hold the pre-transformed instructions like we do in case of socket filters from sk_attach_filter() side, so they can be reloaded in original form at a later point in time by utilities such as criu. To prepare for this, simply extend the bpf_prog_create_from_user() API to hold a flag that tells whether we should store the original or not. Also, fanout filters could make use of that in future for things like diag. While fanout filters already use bpf_prog_destroy(), move seccomp over to them as well to handle original programs when present. Signed-off-by: Daniel Borkmann Cc: Tycho Andersen Cc: Pavel Emelyanov Cc: Kees Cook Cc: Andy Lutomirski Cc: Alexei Starovoitov Tested-by: Tycho Andersen Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/filter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 3d5fd24b321b..1bbce14bcf17 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -411,7 +411,7 @@ typedef int (*bpf_aux_classic_check_t)(struct sock_filter *filter, int bpf_prog_create(struct bpf_prog **pfp, struct sock_fprog_kern *fprog); int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, - bpf_aux_classic_check_t trans); + bpf_aux_classic_check_t trans, bool save_orig); void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); -- cgit v1.2.3 From 0cdf5640e4f6940bdbbefee4bb0adb7dffb185ec Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Fri, 2 Oct 2015 18:42:00 +0200 Subject: ebpf: include perf_event only where really needed Commit ea317b267e9d ("bpf: Add new bpf map type to store the pointer to struct perf_event") added perf_event.h to the main eBPF header, so it gets included for all users. perf_event.h is actually only needed from array map side, so lets sanitize this a bit. Signed-off-by: Daniel Borkmann Cc: Kaixu Xia Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index f57d7fed9ec3..c915a6b54570 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -10,7 +10,6 @@ #include #include #include -#include struct bpf_map; -- cgit v1.2.3 From a4b4766c3cebb4018167e06b863d8e95b7274757 Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Mon, 5 Oct 2015 11:47:13 +0900 Subject: netfilter: nfnetlink_queue: rename related to nfqueue attaching conntrack info The idea of this series of patch is to attach conntrack information to nflog like nfqueue has already done. nfqueue conntrack info attaching basis is generic, rename those names to generic one, glue. Signed-off-by: Ken-ichirou MATSUZAWA Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 3e5e8f2b65f6..27747deb96ed 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -377,7 +377,7 @@ struct nf_conn; enum ip_conntrack_info; struct nlattr; -struct nfq_ct_hook { +struct nfnl_ct_hook { struct nf_conn *(*get_ct)(struct sk_buff *skb, enum ip_conntrack_info *ctinfo); size_t (*build_size)(const struct nf_conn *ct); @@ -390,7 +390,7 @@ struct nfq_ct_hook { void (*seq_adjust)(struct sk_buff *skb, struct nf_conn *ct, enum ip_conntrack_info ctinfo, s32 off); }; -extern struct nfq_ct_hook __rcu *nfq_ct_hook; +extern struct nfnl_ct_hook __rcu *nfnl_ct_hook; /** * nf_skb_duplicated - TEE target has sent a packet -- cgit v1.2.3 From 224a05975ebbbdf507c65043f8aba280ccb39e6e Mon Sep 17 00:00:00 2001 From: Ken-ichirou MATSUZAWA Date: Mon, 5 Oct 2015 11:49:56 +0900 Subject: netfilter: ctnetlink: add const qualifier to nfnl_hook.get_ct get_ct as is and will not update its skb argument, and users of nfnl_ct_hook is currently only nfqueue, we can add const qualifier. Signed-off-by: Ken-ichirou MATSUZAWA --- include/linux/netfilter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index 27747deb96ed..edb3dc32f1da 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -378,7 +378,7 @@ enum ip_conntrack_info; struct nlattr; struct nfnl_ct_hook { - struct nf_conn *(*get_ct)(struct sk_buff *skb, + struct nf_conn *(*get_ct)(const struct sk_buff *skb, enum ip_conntrack_info *ctinfo); size_t (*build_size)(const struct nf_conn *ct); int (*build)(struct sk_buff *skb, struct nf_conn *ct, -- cgit v1.2.3 From 7d8d05d11473a169ab4d53bc7fc23d1fe3f1959f Mon Sep 17 00:00:00 2001 From: Boris Brezillon Date: Sun, 16 Aug 2015 11:23:46 +0200 Subject: misc: atmel_tclib: get and use slow clock Commit dca1a4b5ff6e ("clk: at91: keep slow clk enabled to prevent system hang") added a workaround for the slow clock as it is not properly handled by its users. Get and use the slow clock as it is necessary for the timer counters. Signed-off-by: Boris Brezillon Signed-off-by: Alexandre Belloni Acked-by: Greg Kroah-Hartman Acked-by: Daniel Lezcano Acked-by: Thierry Reding --- include/linux/atmel_tc.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/atmel_tc.h b/include/linux/atmel_tc.h index b87c1c7c242a..468fdfa643f0 100644 --- a/include/linux/atmel_tc.h +++ b/include/linux/atmel_tc.h @@ -67,6 +67,7 @@ struct atmel_tc { const struct atmel_tcb_config *tcb_config; int irq[3]; struct clk *clk[3]; + struct clk *slow_clk; struct list_head node; bool allocated; }; -- cgit v1.2.3 From 21c4c073f14509d685ed219aa3c76362a7bfa0ac Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 6 Oct 2015 06:25:43 -0700 Subject: Revert "regmap: Allow installing custom reg_update_bits function" This reverts commit 7741c373cf3ea1f5383fa97fb7a640a429d3dd7c. --- include/linux/regmap.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 4d3a3b1680bb..8fc0bfd8edc4 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -296,9 +296,6 @@ typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg, unsigned int *val); typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg, unsigned int val); -typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg, - unsigned int mask, unsigned int val, - bool *change, bool force_write); typedef struct regmap_async *(*regmap_hw_async_alloc)(void); typedef void (*regmap_hw_free_context)(void *context); @@ -338,7 +335,6 @@ struct regmap_bus { regmap_hw_gather_write gather_write; regmap_hw_async_write async_write; regmap_hw_reg_write reg_write; - regmap_hw_reg_update_bits reg_update_bits; regmap_hw_read read; regmap_hw_reg_read reg_read; regmap_hw_free_context free_context; -- cgit v1.2.3 From 87dcbc0610cb580c8eaf289f52aca3620af825f0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Sep 2015 17:45:40 +0200 Subject: sched/core: Simplify INIT_PREEMPT_COUNT As per the following commit: d86ee4809d03 ("sched: optimize cond_resched()") we need PREEMPT_ACTIVE to avoid cond_resched() from working before the scheduler is set up. However, keeping preemption disabled should do the same thing already, making the PREEMPT_ACTIVE part entirely redundant. The only complication is !PREEMPT_COUNT kernels, where PREEMPT_DISABLED ends up being 0. Instead we use an unconditional PREEMPT_OFFSET to set preempt_count() even on !PREEMPT_COUNT kernels. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt Reviewed-by: Thomas Gleixner Reviewed-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index d086cf0ca2c7..e5b8cbc4b8d6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -606,19 +606,18 @@ struct task_cputime_atomic { #endif /* - * Disable preemption until the scheduler is running. - * Reset by start_kernel()->sched_init()->init_idle(). + * Disable preemption until the scheduler is running -- use an unconditional + * value so that it also works on !PREEMPT_COUNT kernels. * - * We include PREEMPT_ACTIVE to avoid cond_resched() from working - * before the scheduler is active -- see should_resched(). + * Reset by start_kernel()->sched_init()->init_idle()->init_idle_preempt_count(). */ -#define INIT_PREEMPT_COUNT (PREEMPT_DISABLED + PREEMPT_ACTIVE) +#define INIT_PREEMPT_COUNT PREEMPT_OFFSET /** * struct thread_group_cputimer - thread group interval timer counts * @cputime_atomic: atomic thread group interval timers. * @running: non-zero when there are timers running and - * @cputime receives updates. + * @cputime receives updates. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. -- cgit v1.2.3 From 609ca066386b2e64d4c0b0f55da327654962a0c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Sep 2015 17:52:18 +0200 Subject: sched/core: Create preempt_count invariant Assuming units of PREEMPT_DISABLE_OFFSET for preempt_count() numbers. Now that TASK_DEAD no longer results in preempt_count() == 3 during scheduling, we will always call context_switch() with preempt_count() == 2. However, we don't always end up with preempt_count() == 2 in finish_task_switch() because new tasks get created with preempt_count() == 1. Create FORK_PREEMPT_COUNT and set it to 2 and use that in the right places. Note that we cannot use INIT_PREEMPT_COUNT as that serves another purpose (boot). After this, preempt_count() is invariant across the context switch, with exception of PREEMPT_ACTIVE. Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/sched.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index e5b8cbc4b8d6..23ca455d9582 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -599,11 +599,7 @@ struct task_cputime_atomic { .sum_exec_runtime = ATOMIC64_INIT(0), \ } -#ifdef CONFIG_PREEMPT_COUNT -#define PREEMPT_DISABLED (1 + PREEMPT_ENABLED) -#else -#define PREEMPT_DISABLED PREEMPT_ENABLED -#endif +#define PREEMPT_DISABLED (PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) /* * Disable preemption until the scheduler is running -- use an unconditional @@ -613,6 +609,17 @@ struct task_cputime_atomic { */ #define INIT_PREEMPT_COUNT PREEMPT_OFFSET +/* + * Initial preempt_count value; reflects the preempt_count schedule invariant + * which states that during context switches: + * + * preempt_count() == 2*PREEMPT_DISABLE_OFFSET + * + * Note: PREEMPT_DISABLE_OFFSET is 0 for !PREEMPT_COUNT kernels. + * Note: See finish_task_switch(). + */ +#define FORK_PREEMPT_COUNT (2*PREEMPT_DISABLE_OFFSET + PREEMPT_ENABLED) + /** * struct thread_group_cputimer - thread group interval timer counts * @cputime_atomic: atomic thread group interval timers. -- cgit v1.2.3 From 3d8f74dd4ca1da8a1a464bbafcf679e40c2fc10f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Sep 2015 18:09:19 +0200 Subject: sched/core: Stop setting PREEMPT_ACTIVE Now that nothing tests for PREEMPT_ACTIVE anymore, stop setting it. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: Steven Rostedt Reviewed-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index bea8dd8ff5e0..448dfd0b2ea6 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -146,18 +146,6 @@ extern void preempt_count_sub(int val); #define preempt_count_inc() preempt_count_add(1) #define preempt_count_dec() preempt_count_sub(1) -#define preempt_active_enter() \ -do { \ - preempt_count_add(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \ - barrier(); \ -} while (0) - -#define preempt_active_exit() \ -do { \ - barrier(); \ - preempt_count_sub(PREEMPT_ACTIVE + PREEMPT_DISABLE_OFFSET); \ -} while (0) - #ifdef CONFIG_PREEMPT_COUNT #define preempt_disable() \ -- cgit v1.2.3 From da7142e2ed735e1c1bef5a757dc55de35c65fbd6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Sep 2015 18:11:45 +0200 Subject: sched/core: Simplify preempt_count tests Since we stopped setting PREEMPT_ACTIVE, there is no need to mask it out of preempt_count() tests. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: Steven Rostedt Reviewed-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index 448dfd0b2ea6..b2676a16cfbe 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -126,8 +126,7 @@ * Check whether we were atomic before we did preempt_disable(): * (used by the scheduler) */ -#define in_atomic_preempt_off() \ - ((preempt_count() & ~PREEMPT_ACTIVE) != PREEMPT_DISABLE_OFFSET) +#define in_atomic_preempt_off() (preempt_count() != PREEMPT_DISABLE_OFFSET) #if defined(CONFIG_DEBUG_PREEMPT) || defined(CONFIG_PREEMPT_TRACER) extern void preempt_count_add(int val); -- cgit v1.2.3 From e61bf1e43b6f8e687ce93e5d0ce85bca7e481600 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 28 Sep 2015 18:11:35 +0200 Subject: sched/core: Kill PREEMPT_ACTIVE Its unused, kill the definition. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Reviewed-by: Steven Rostedt Reviewed-by: Frederic Weisbecker Cc: Linus Torvalds Cc: Mike Galbraith Cc: Peter Zijlstra Cc: linux-kernel@vger.kernel.org Signed-off-by: Ingo Molnar --- include/linux/preempt.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/preempt.h b/include/linux/preempt.h index b2676a16cfbe..75e4e30677f1 100644 --- a/include/linux/preempt.h +++ b/include/linux/preempt.h @@ -26,7 +26,6 @@ * SOFTIRQ_MASK: 0x0000ff00 * HARDIRQ_MASK: 0x000f0000 * NMI_MASK: 0x00100000 - * PREEMPT_ACTIVE: 0x00200000 * PREEMPT_NEED_RESCHED: 0x80000000 */ #define PREEMPT_BITS 8 @@ -53,10 +52,6 @@ #define SOFTIRQ_DISABLE_OFFSET (2 * SOFTIRQ_OFFSET) -#define PREEMPT_ACTIVE_BITS 1 -#define PREEMPT_ACTIVE_SHIFT (NMI_SHIFT + NMI_BITS) -#define PREEMPT_ACTIVE (__IRQ_MASK(PREEMPT_ACTIVE_BITS) << PREEMPT_ACTIVE_SHIFT) - /* We use the MSB mostly because its available */ #define PREEMPT_NEED_RESCHED 0x80000000 -- cgit v1.2.3 From 77792b11409c9270d98e604b4314b85ce886ac7d Mon Sep 17 00:00:00 2001 From: Jon Ringle Date: Thu, 1 Oct 2015 12:38:07 -0400 Subject: regmap: Allow installing custom reg_update_bits function This commit allows installing a custom reg_update_bits function for cases where the hardware provides a mechanism to set or clear register bits without a read/modify/write cycle. Such is the case with the Microchip ENCX24J600. If a custom reg_update_bits function is provided, it will only be used against volatile registers. Signed-off-by: Jon Ringle Signed-off-by: Mark Brown --- include/linux/regmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regmap.h b/include/linux/regmap.h index 8fc0bfd8edc4..b49d4133750e 100644 --- a/include/linux/regmap.h +++ b/include/linux/regmap.h @@ -296,6 +296,8 @@ typedef int (*regmap_hw_reg_read)(void *context, unsigned int reg, unsigned int *val); typedef int (*regmap_hw_reg_write)(void *context, unsigned int reg, unsigned int val); +typedef int (*regmap_hw_reg_update_bits)(void *context, unsigned int reg, + unsigned int mask, unsigned int val); typedef struct regmap_async *(*regmap_hw_async_alloc)(void); typedef void (*regmap_hw_free_context)(void *context); @@ -335,6 +337,7 @@ struct regmap_bus { regmap_hw_gather_write gather_write; regmap_hw_async_write async_write; regmap_hw_reg_write reg_write; + regmap_hw_reg_update_bits reg_update_bits; regmap_hw_read read; regmap_hw_reg_read reg_read; regmap_hw_free_context free_context; -- cgit v1.2.3 From 63ab7bd0d450b726b88fa4b932f151b98cee2557 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Wed, 30 Sep 2015 13:03:11 -0700 Subject: locking/asm-generic: Add _{relaxed|acquire|release}() variants for inc/dec atomics Similar to what we have for regular add/sub calls. For now, no actual arch implements them, so everyone falls back to the default atomics... iow, nothing changes. These will be used in future primitives. Signed-off-by: Davidlohr Bueso Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Thomas Gleixner Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Paul E.McKenney Cc: Peter Zijlstra Cc: Will Deacon Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1443643395-17016-2-git-send-email-dave@stgolabs.net Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 97 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 97 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index e326469bb9d6..27e580d232ca 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -90,6 +90,30 @@ static inline int atomic_read_ctrl(const atomic_t *v) #endif #endif /* atomic_add_return_relaxed */ +/* atomic_inc_return_relaxed */ +#ifndef atomic_inc_return_relaxed +#define atomic_inc_return_relaxed atomic_inc_return +#define atomic_inc_return_acquire atomic_inc_return +#define atomic_inc_return_release atomic_inc_return + +#else /* atomic_inc_return_relaxed */ + +#ifndef atomic_inc_return_acquire +#define atomic_inc_return_acquire(...) \ + __atomic_op_acquire(atomic_inc_return, __VA_ARGS__) +#endif + +#ifndef atomic_inc_return_release +#define atomic_inc_return_release(...) \ + __atomic_op_release(atomic_inc_return, __VA_ARGS__) +#endif + +#ifndef atomic_inc_return +#define atomic_inc_return(...) \ + __atomic_op_fence(atomic_inc_return, __VA_ARGS__) +#endif +#endif /* atomic_inc_return_relaxed */ + /* atomic_sub_return_relaxed */ #ifndef atomic_sub_return_relaxed #define atomic_sub_return_relaxed atomic_sub_return @@ -114,6 +138,30 @@ static inline int atomic_read_ctrl(const atomic_t *v) #endif #endif /* atomic_sub_return_relaxed */ +/* atomic_dec_return_relaxed */ +#ifndef atomic_dec_return_relaxed +#define atomic_dec_return_relaxed atomic_dec_return +#define atomic_dec_return_acquire atomic_dec_return +#define atomic_dec_return_release atomic_dec_return + +#else /* atomic_dec_return_relaxed */ + +#ifndef atomic_dec_return_acquire +#define atomic_dec_return_acquire(...) \ + __atomic_op_acquire(atomic_dec_return, __VA_ARGS__) +#endif + +#ifndef atomic_dec_return_release +#define atomic_dec_return_release(...) \ + __atomic_op_release(atomic_dec_return, __VA_ARGS__) +#endif + +#ifndef atomic_dec_return +#define atomic_dec_return(...) \ + __atomic_op_fence(atomic_dec_return, __VA_ARGS__) +#endif +#endif /* atomic_dec_return_relaxed */ + /* atomic_xchg_relaxed */ #ifndef atomic_xchg_relaxed #define atomic_xchg_relaxed atomic_xchg @@ -194,6 +242,31 @@ static inline int atomic_read_ctrl(const atomic_t *v) #endif #endif /* atomic64_add_return_relaxed */ +/* atomic64_inc_return_relaxed */ +#ifndef atomic64_inc_return_relaxed +#define atomic64_inc_return_relaxed atomic64_inc_return +#define atomic64_inc_return_acquire atomic64_inc_return +#define atomic64_inc_return_release atomic64_inc_return + +#else /* atomic64_inc_return_relaxed */ + +#ifndef atomic64_inc_return_acquire +#define atomic64_inc_return_acquire(...) \ + __atomic_op_acquire(atomic64_inc_return, __VA_ARGS__) +#endif + +#ifndef atomic64_inc_return_release +#define atomic64_inc_return_release(...) \ + __atomic_op_release(atomic64_inc_return, __VA_ARGS__) +#endif + +#ifndef atomic64_inc_return +#define atomic64_inc_return(...) \ + __atomic_op_fence(atomic64_inc_return, __VA_ARGS__) +#endif +#endif /* atomic64_inc_return_relaxed */ + + /* atomic64_sub_return_relaxed */ #ifndef atomic64_sub_return_relaxed #define atomic64_sub_return_relaxed atomic64_sub_return @@ -218,6 +291,30 @@ static inline int atomic_read_ctrl(const atomic_t *v) #endif #endif /* atomic64_sub_return_relaxed */ +/* atomic64_dec_return_relaxed */ +#ifndef atomic64_dec_return_relaxed +#define atomic64_dec_return_relaxed atomic64_dec_return +#define atomic64_dec_return_acquire atomic64_dec_return +#define atomic64_dec_return_release atomic64_dec_return + +#else /* atomic64_dec_return_relaxed */ + +#ifndef atomic64_dec_return_acquire +#define atomic64_dec_return_acquire(...) \ + __atomic_op_acquire(atomic64_dec_return, __VA_ARGS__) +#endif + +#ifndef atomic64_dec_return_release +#define atomic64_dec_return_release(...) \ + __atomic_op_release(atomic64_dec_return, __VA_ARGS__) +#endif + +#ifndef atomic64_dec_return +#define atomic64_dec_return(...) \ + __atomic_op_fence(atomic64_dec_return, __VA_ARGS__) +#endif +#endif /* atomic64_dec_return_relaxed */ + /* atomic64_xchg_relaxed */ #ifndef atomic64_xchg_relaxed #define atomic64_xchg_relaxed atomic64_xchg -- cgit v1.2.3 From b6a4ae766e3133a4db73fabc81e522d1601cb623 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Wed, 29 Jul 2015 13:29:38 +0800 Subject: rcu: Use rcu_callback_t in call_rcu*() and friends As we now have rcu_callback_t typedefs as the type of rcu callbacks, we should use it in call_rcu*() and friends as the type of parameters. This could save us a few lines of code and make it clear which function requires an rcu callbacks rather than other callbacks as its argument. Besides, this can also help cscope to generate a better database for code reading. Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 10 +++++----- include/linux/rcutiny.h | 2 +- include/linux/rcutree.h | 2 +- 3 files changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 581abf848566..d63bb77dab35 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -160,7 +160,7 @@ void do_trace_rcu_torture_read(const char *rcutorturename, * more than one CPU). */ void call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *head)); + rcu_callback_t func); #else /* #ifdef CONFIG_PREEMPT_RCU */ @@ -191,7 +191,7 @@ void call_rcu(struct rcu_head *head, * memory ordering guarantees. */ void call_rcu_bh(struct rcu_head *head, - void (*func)(struct rcu_head *head)); + rcu_callback_t func); /** * call_rcu_sched() - Queue an RCU for invocation after sched grace period. @@ -213,7 +213,7 @@ void call_rcu_bh(struct rcu_head *head, * memory ordering guarantees. */ void call_rcu_sched(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)); + rcu_callback_t func); void synchronize_sched(void); @@ -274,7 +274,7 @@ do { \ * See the description of call_rcu() for more detailed information on * memory ordering guarantees. */ -void call_rcu_tasks(struct rcu_head *head, void (*func)(struct rcu_head *head)); +void call_rcu_tasks(struct rcu_head *head, rcu_callback_t func); void synchronize_rcu_tasks(void); void rcu_barrier_tasks(void); @@ -1065,7 +1065,7 @@ static inline notrace void rcu_read_unlock_sched_notrace(void) #define __kfree_rcu(head, offset) \ do { \ BUILD_BUG_ON(!__is_kfree_rcu_offset(offset)); \ - kfree_call_rcu(head, (void (*)(struct rcu_head *))(unsigned long)(offset)); \ + kfree_call_rcu(head, (rcu_callback_t)(unsigned long)(offset)); \ } while (0) /** diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index ff968b7af3a4..c8a0722f77ea 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -83,7 +83,7 @@ static inline void synchronize_sched_expedited(void) } static inline void kfree_call_rcu(struct rcu_head *head, - void (*func)(struct rcu_head *rcu)) + rcu_callback_t func) { call_rcu(head, func); } diff --git a/include/linux/rcutree.h b/include/linux/rcutree.h index 5abec82f325e..60d15a080d7c 100644 --- a/include/linux/rcutree.h +++ b/include/linux/rcutree.h @@ -48,7 +48,7 @@ void synchronize_rcu_bh(void); void synchronize_sched_expedited(void); void synchronize_rcu_expedited(void); -void kfree_call_rcu(struct rcu_head *head, void (*func)(struct rcu_head *rcu)); +void kfree_call_rcu(struct rcu_head *head, rcu_callback_t func); /** * synchronize_rcu_bh_expedited - Brute-force RCU-bh grace period -- cgit v1.2.3 From bb73c52bad3666997ed2ec83c0c80c3f8ef55008 Mon Sep 17 00:00:00 2001 From: Boqun Feng Date: Thu, 30 Jul 2015 16:55:38 -0700 Subject: rcu: Don't disable preemption for Tiny and Tree RCU readers Because preempt_disable() maps to barrier() for non-debug builds, it forces the compiler to spill and reload registers. Because Tree RCU and Tiny RCU now only appear in CONFIG_PREEMPT=n builds, these barrier() instances generate needless extra code for each instance of rcu_read_lock() and rcu_read_unlock(). This extra code slows down Tree RCU and bloats Tiny RCU. This commit therefore removes the preempt_disable() and preempt_enable() from the non-preemptible implementations of __rcu_read_lock() and __rcu_read_unlock(), respectively. However, for debug purposes, preempt_disable() and preempt_enable() are still invoked if CONFIG_PREEMPT_COUNT=y, because this allows detection of sleeping inside atomic sections in non-preemptible kernels. However, Tiny and Tree RCU operates by coalescing all RCU read-side critical sections on a given CPU that lie between successive quiescent states. It is therefore necessary to compensate for removing barriers from __rcu_read_lock() and __rcu_read_unlock() by adding them to a couple of the RCU functions invoked during quiescent states, namely to rcu_all_qs() and rcu_note_context_switch(). However, note that the latter is more paranoia than necessity, at least until link-time optimizations become more aggressive. This is based on an earlier patch by Paul E. McKenney, fixing a bug encountered in kernels built with CONFIG_PREEMPT=n and CONFIG_PREEMPT_COUNT=y. Signed-off-by: Boqun Feng Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 6 ++++-- include/linux/rcutiny.h | 1 + 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index d63bb77dab35..6c3ceceb6148 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -297,12 +297,14 @@ void synchronize_rcu(void); static inline void __rcu_read_lock(void) { - preempt_disable(); + if (IS_ENABLED(CONFIG_PREEMPT_COUNT)) + preempt_disable(); } static inline void __rcu_read_unlock(void) { - preempt_enable(); + if (IS_ENABLED(CONFIG_PREEMPT_COUNT)) + preempt_enable(); } static inline void synchronize_rcu(void) diff --git a/include/linux/rcutiny.h b/include/linux/rcutiny.h index c8a0722f77ea..4c1aaf9cce7b 100644 --- a/include/linux/rcutiny.h +++ b/include/linux/rcutiny.h @@ -216,6 +216,7 @@ static inline bool rcu_is_watching(void) static inline void rcu_all_qs(void) { + barrier(); /* Avoid RCU read-side critical sections leaking across. */ } #endif /* __LINUX_RCUTINY_H */ -- cgit v1.2.3 From 49f5903b473c5f63f3b57856d1bd4593db0a2eef Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 1 Sep 2015 00:42:57 -0700 Subject: rcu: Move preemption disabling out of __srcu_read_lock() Currently, __srcu_read_lock() cannot be invoked from restricted environments because it contains calls to preempt_disable() and preempt_enable(), both of which can invoke lockdep, which is a bad idea in some restricted execution modes. This commit therefore moves the preempt_disable() and preempt_enable() from __srcu_read_lock() to srcu_read_lock(). It also inserts the preempt_disable() and preempt_enable() around the call to __srcu_read_lock() in do_exit(). Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/srcu.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/srcu.h b/include/linux/srcu.h index bdeb4567b71e..f5f80c5643ac 100644 --- a/include/linux/srcu.h +++ b/include/linux/srcu.h @@ -215,8 +215,11 @@ static inline int srcu_read_lock_held(struct srcu_struct *sp) */ static inline int srcu_read_lock(struct srcu_struct *sp) __acquires(sp) { - int retval = __srcu_read_lock(sp); + int retval; + preempt_disable(); + retval = __srcu_read_lock(sp); + preempt_enable(); rcu_lock_acquire(&(sp)->dep_map); return retval; } -- cgit v1.2.3 From c3ac7cf1847a4e68c909984f60d36adef2088e35 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 10 Sep 2015 16:29:02 -0700 Subject: rcu: Add rcu_pointer_handoff() This commit adds an rcu_pointer_handoff() that is intended to mark situations where a structure's protection transitions from RCU to some other mechanism (locking, reference counting, whatever). These markings should allow external tools to more easily spot bugs involving leaking pointers out of RCU read-side critical sections. Signed-off-by: Paul E. McKenney --- include/linux/rcupdate.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 6c3ceceb6148..587eb057e2fa 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -812,6 +812,28 @@ static inline void rcu_preempt_sleep_check(void) */ #define rcu_dereference_sched(p) rcu_dereference_sched_check(p, 0) +/** + * rcu_pointer_handoff() - Hand off a pointer from RCU to other mechanism + * @p: The pointer to hand off + * + * This is simply an identity function, but it documents where a pointer + * is handed off from RCU to some other synchronization mechanism, for + * example, reference counting or locking. In C11, it would map to + * kill_dependency(). It could be used as follows: + * + * rcu_read_lock(); + * p = rcu_dereference(gp); + * long_lived = is_long_lived(p); + * if (long_lived) { + * if (!atomic_inc_not_zero(p->refcnt)) + * long_lived = false; + * else + * p = rcu_pointer_handoff(p); + * } + * rcu_read_unlock(); + */ +#define rcu_pointer_handoff(p) (p) + /** * rcu_read_lock() - mark the beginning of an RCU read-side critical section * -- cgit v1.2.3 From 8db70b132dd57696cfc7560203a72e90c51bfdda Mon Sep 17 00:00:00 2001 From: Patrick Marlier Date: Fri, 11 Sep 2015 15:50:35 -0700 Subject: rculist: Make list_entry_rcu() use lockless_dereference() The current list_entry_rcu() implementation copies the pointer to a stack variable, then invokes rcu_dereference_raw() on it. This results in an additional store-load pair. Now, most compilers will emit normal store and load instructions, which might seem to be of negligible overhead, but this results in a load-hit-store situation that can cause surprisingly long pipeline stalls, even on modern microprocessors. The problem is that it takes time for the store to get the store buffer updated, which can delay the subsequent load, which immediately follows. This commit therefore switches to the lockless_dereference() primitive, which does not expect the __rcu annotations (that are anyway not present in the list_head structure) and which, like rcu_dereference_raw(), does not check for an enclosing RCU read-side critical section. Most importantly, it does not copy the pointer, thus avoiding the load-hit-store overhead. Signed-off-by: Patrick Marlier [ paulmck: Switched to lockless_dereference() to suppress sparse warnings. ] Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rculist.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rculist.h b/include/linux/rculist.h index 17c6b1f84a77..5ed540986019 100644 --- a/include/linux/rculist.h +++ b/include/linux/rculist.h @@ -247,10 +247,7 @@ static inline void list_splice_init_rcu(struct list_head *list, * primitives such as list_add_rcu() as long as it's guarded by rcu_read_lock(). */ #define list_entry_rcu(ptr, type, member) \ -({ \ - typeof(*ptr) __rcu *__ptr = (typeof(*ptr) __rcu __force *)ptr; \ - container_of((typeof(ptr))rcu_dereference_raw(__ptr), type, member); \ -}) + container_of(lockless_dereference(ptr), type, member) /** * Where are list_empty_rcu() and list_first_entry_rcu()? -- cgit v1.2.3 From e62e3f620ba8d437f4998441fc11cf3dc9d466d1 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Mon, 14 Sep 2015 12:23:01 -0700 Subject: rcu: Remove deprecated rcu_lockdep_assert() The old rcu_lockdep_assert() was retained to ease handling of incoming patches, but any use will result in deprecated warnings. However, its replacement, RCU_LOCKDEP_WARN(), is now upstream. It is therefore time to remove rcu_lockdep_assert(), which this commit does. Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcupdate.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcupdate.h b/include/linux/rcupdate.h index 587eb057e2fa..a0189ba67fde 100644 --- a/include/linux/rcupdate.h +++ b/include/linux/rcupdate.h @@ -537,28 +537,8 @@ static inline int rcu_read_lock_sched_held(void) #endif /* #else #ifdef CONFIG_DEBUG_LOCK_ALLOC */ -/* Deprecate rcu_lockdep_assert(): Use RCU_LOCKDEP_WARN() instead. */ -static inline void __attribute((deprecated)) deprecate_rcu_lockdep_assert(void) -{ -} - #ifdef CONFIG_PROVE_RCU -/** - * rcu_lockdep_assert - emit lockdep splat if specified condition not met - * @c: condition to check - * @s: informative message - */ -#define rcu_lockdep_assert(c, s) \ - do { \ - static bool __section(.data.unlikely) __warned; \ - deprecate_rcu_lockdep_assert(); \ - if (debug_lockdep_rcu_enabled() && !__warned && !(c)) { \ - __warned = true; \ - lockdep_rcu_suspicious(__FILE__, __LINE__, s); \ - } \ - } while (0) - /** * RCU_LOCKDEP_WARN - emit lockdep splat if specified condition is met * @c: condition to check @@ -596,7 +576,6 @@ static inline void rcu_preempt_sleep_check(void) #else /* #ifdef CONFIG_PROVE_RCU */ -#define rcu_lockdep_assert(c, s) deprecate_rcu_lockdep_assert() #define RCU_LOCKDEP_WARN(c, s) do { } while (0) #define rcu_sleep_check() do { } while (0) -- cgit v1.2.3 From 7f5f873c6a0772970d5fee1f364231207051ecd8 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 18 Sep 2015 08:45:22 -0700 Subject: rculist: Use WRITE_ONCE() when deleting from reader-visible list The various RCU list-deletion macros (list_del_rcu(), hlist_del_init_rcu(), hlist_del_rcu(), hlist_bl_del_init_rcu(), hlist_bl_del_rcu(), hlist_nulls_del_init_rcu(), and hlist_nulls_del_rcu()) do plain stores into the ->next pointer of the preceding list elemment. Unfortunately, the compiler is within its rights to (for example) use byte-at-a-time writes to update the pointer, which would fatally confuse concurrent readers. This patch therefore adds the needed WRITE_ONCE() macros. KernelThreadSanitizer (KTSAN) reported the __hlist_del() issue, which is a problem when __hlist_del() is invoked by hlist_del_rcu(). Reported-by: Dmitry Vyukov Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/list.h | 5 +++-- include/linux/list_bl.h | 5 +++-- include/linux/list_nulls.h | 3 ++- 3 files changed, 8 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/list.h b/include/linux/list.h index 3e3e64a61002..993395a2e55c 100644 --- a/include/linux/list.h +++ b/include/linux/list.h @@ -87,7 +87,7 @@ static inline void list_add_tail(struct list_head *new, struct list_head *head) static inline void __list_del(struct list_head * prev, struct list_head * next) { next->prev = prev; - prev->next = next; + WRITE_ONCE(prev->next, next); } /** @@ -615,7 +615,8 @@ static inline void __hlist_del(struct hlist_node *n) { struct hlist_node *next = n->next; struct hlist_node **pprev = n->pprev; - *pprev = next; + + WRITE_ONCE(*pprev, next); if (next) next->pprev = pprev; } diff --git a/include/linux/list_bl.h b/include/linux/list_bl.h index 2eb88556c5c5..8132214e8efd 100644 --- a/include/linux/list_bl.h +++ b/include/linux/list_bl.h @@ -93,9 +93,10 @@ static inline void __hlist_bl_del(struct hlist_bl_node *n) LIST_BL_BUG_ON((unsigned long)n & LIST_BL_LOCKMASK); /* pprev may be `first`, so be careful not to lose the lock bit */ - *pprev = (struct hlist_bl_node *) + WRITE_ONCE(*pprev, + (struct hlist_bl_node *) ((unsigned long)next | - ((unsigned long)*pprev & LIST_BL_LOCKMASK)); + ((unsigned long)*pprev & LIST_BL_LOCKMASK))); if (next) next->pprev = pprev; } diff --git a/include/linux/list_nulls.h b/include/linux/list_nulls.h index f266661d2666..444d2b1313bd 100644 --- a/include/linux/list_nulls.h +++ b/include/linux/list_nulls.h @@ -76,7 +76,8 @@ static inline void __hlist_nulls_del(struct hlist_nulls_node *n) { struct hlist_nulls_node *next = n->next; struct hlist_nulls_node **pprev = n->pprev; - *pprev = next; + + WRITE_ONCE(*pprev, next); if (!is_a_nulls(next)) next->pprev = pprev; } -- cgit v1.2.3 From cc44ca848f5e517aeca9f5eabbe13609a3f71450 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Aug 2015 19:42:44 +0200 Subject: rcu: Create rcu_sync infrastructure The rcu_sync infrastructure can be thought of as infrastructure to be used to implement reader-writer primitives having extremely lightweight readers during times when there are no writers. The first use is in the percpu_rwsem used by the VFS subsystem. This infrastructure is functionally equivalent to struct rcu_sync_struct { atomic_t counter; }; /* Check possibility of fast-path read-side operations. */ static inline bool rcu_sync_is_idle(struct rcu_sync_struct *rss) { return atomic_read(&rss->counter) == 0; } /* Tell readers to use slowpaths. */ static inline void rcu_sync_enter(struct rcu_sync_struct *rss) { atomic_inc(&rss->counter); synchronize_sched(); } /* Allow readers to once again use fastpaths. */ static inline void rcu_sync_exit(struct rcu_sync_struct *rss) { synchronize_sched(); atomic_dec(&rss->counter); } The main difference is that it records the state and only calls synchronize_sched() if required. At least some of the calls to synchronize_sched() will be optimized away when rcu_sync_enter() and rcu_sync_exit() are invoked repeatedly in quick succession. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcu_sync.h | 94 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 94 insertions(+) create mode 100644 include/linux/rcu_sync.h (limited to 'include/linux') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h new file mode 100644 index 000000000000..cb044df2e21c --- /dev/null +++ b/include/linux/rcu_sync.h @@ -0,0 +1,94 @@ +/* + * RCU-based infrastructure for lightweight reader-writer locking + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, you can access it online at + * http://www.gnu.org/licenses/gpl-2.0.html. + * + * Copyright (c) 2015, Red Hat, Inc. + * + * Author: Oleg Nesterov + */ + +#ifndef _LINUX_RCU_SYNC_H_ +#define _LINUX_RCU_SYNC_H_ + +#include +#include + +/* Structure to mediate between updaters and fastpath-using readers. */ +struct rcu_sync { + int gp_state; + int gp_count; + wait_queue_head_t gp_wait; + + int cb_state; + struct rcu_head cb_head; + + void (*sync)(void); + void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); +}; + +#define ___RCU_SYNC_INIT(name) \ + .gp_state = 0, \ + .gp_count = 0, \ + .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ + .cb_state = 0 + +#define __RCU_SCHED_SYNC_INIT(name) { \ + ___RCU_SYNC_INIT(name), \ + .sync = synchronize_sched, \ + .call = call_rcu_sched, \ +} + +#define __RCU_BH_SYNC_INIT(name) { \ + ___RCU_SYNC_INIT(name), \ + .sync = synchronize_rcu_bh, \ + .call = call_rcu_bh, \ +} + +#define __RCU_SYNC_INIT(name) { \ + ___RCU_SYNC_INIT(name), \ + .sync = synchronize_rcu, \ + .call = call_rcu, \ +} + +#define DEFINE_RCU_SCHED_SYNC(name) \ + struct rcu_sync name = __RCU_SCHED_SYNC_INIT(name) + +#define DEFINE_RCU_BH_SYNC(name) \ + struct rcu_sync name = __RCU_BH_SYNC_INIT(name) + +#define DEFINE_RCU_SYNC(name) \ + struct rcu_sync name = __RCU_SYNC_INIT(name) + +/** + * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? + * @rsp: Pointer to rcu_sync structure to use for synchronization + * + * Returns true if readers are permitted to use their fastpaths. + * Must be invoked within an RCU read-side critical section whose + * flavor matches that of the rcu_sync struture. + */ +static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) +{ + return !rsp->gp_state; /* GP_IDLE */ +} + +enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; + +extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); +extern void rcu_sync_enter(struct rcu_sync *); +extern void rcu_sync_exit(struct rcu_sync *); + +#endif /* _LINUX_RCU_SYNC_H_ */ -- cgit v1.2.3 From 82e8c565be8a72957570d7da8dd9b441db7bb648 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Aug 2015 19:42:47 +0200 Subject: rcu_sync: Simplify rcu_sync using new rcu_sync_ops structure This commit adds the new struct rcu_sync_ops which holds sync/call methods, and turns the function pointers in rcu_sync_struct into an array of struct rcu_sync_ops. This simplifies the "init" helpers by collapsing a switch statement and explicit multiple definitions into a simple assignment and a helper macro, respectively. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcu_sync.h | 60 +++++++++++++++++++----------------------------- 1 file changed, 23 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index cb044df2e21c..c6d2272c4459 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -26,6 +26,8 @@ #include #include +enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; + /* Structure to mediate between updaters and fastpath-using readers. */ struct rcu_sync { int gp_state; @@ -35,43 +37,9 @@ struct rcu_sync { int cb_state; struct rcu_head cb_head; - void (*sync)(void); - void (*call)(struct rcu_head *, void (*)(struct rcu_head *)); + enum rcu_sync_type gp_type; }; -#define ___RCU_SYNC_INIT(name) \ - .gp_state = 0, \ - .gp_count = 0, \ - .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ - .cb_state = 0 - -#define __RCU_SCHED_SYNC_INIT(name) { \ - ___RCU_SYNC_INIT(name), \ - .sync = synchronize_sched, \ - .call = call_rcu_sched, \ -} - -#define __RCU_BH_SYNC_INIT(name) { \ - ___RCU_SYNC_INIT(name), \ - .sync = synchronize_rcu_bh, \ - .call = call_rcu_bh, \ -} - -#define __RCU_SYNC_INIT(name) { \ - ___RCU_SYNC_INIT(name), \ - .sync = synchronize_rcu, \ - .call = call_rcu, \ -} - -#define DEFINE_RCU_SCHED_SYNC(name) \ - struct rcu_sync name = __RCU_SCHED_SYNC_INIT(name) - -#define DEFINE_RCU_BH_SYNC(name) \ - struct rcu_sync name = __RCU_BH_SYNC_INIT(name) - -#define DEFINE_RCU_SYNC(name) \ - struct rcu_sync name = __RCU_SYNC_INIT(name) - /** * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? * @rsp: Pointer to rcu_sync structure to use for synchronization @@ -85,10 +53,28 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) return !rsp->gp_state; /* GP_IDLE */ } -enum rcu_sync_type { RCU_SYNC, RCU_SCHED_SYNC, RCU_BH_SYNC }; - extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); +#define __RCU_SYNC_INITIALIZER(name, type) { \ + .gp_state = 0, \ + .gp_count = 0, \ + .gp_wait = __WAIT_QUEUE_HEAD_INITIALIZER(name.gp_wait), \ + .cb_state = 0, \ + .gp_type = type, \ + } + +#define __DEFINE_RCU_SYNC(name, type) \ + struct rcu_sync_struct name = __RCU_SYNC_INITIALIZER(name, type) + +#define DEFINE_RCU_SYNC(name) \ + __DEFINE_RCU_SYNC(name, RCU_SYNC) + +#define DEFINE_RCU_SCHED_SYNC(name) \ + __DEFINE_RCU_SYNC(name, RCU_SCHED_SYNC) + +#define DEFINE_RCU_BH_SYNC(name) \ + __DEFINE_RCU_SYNC(name, RCU_BH_SYNC) + #endif /* _LINUX_RCU_SYNC_H_ */ -- cgit v1.2.3 From 3a518b76af7bb411efe6dd090fbf098e29accb2e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Aug 2015 19:42:50 +0200 Subject: rcu_sync: Add CONFIG_PROVE_RCU checks This commit validates that the caller of rcu_sync_is_idle() holds the corresponding type of RCU read-side lock, but only in kernels built with CONFIG_PROVE_RCU=y. This validation is carried out via a new rcu_sync_ops->held() method that is checked within rcu_sync_is_idle(). Note that although this does add code to the fast path, it only does so in kernels built with CONFIG_PROVE_RCU=y. Suggested-by: "Paul E. McKenney" Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcu_sync.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index c6d2272c4459..1f2d4fc30b04 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -40,6 +40,8 @@ struct rcu_sync { enum rcu_sync_type gp_type; }; +extern bool __rcu_sync_is_idle(struct rcu_sync *); + /** * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? * @rsp: Pointer to rcu_sync structure to use for synchronization @@ -50,7 +52,11 @@ struct rcu_sync { */ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) { +#ifdef CONFIG_PROVE_RCU + return __rcu_sync_is_idle(rsp); +#else return !rsp->gp_state; /* GP_IDLE */ +#endif } extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); -- cgit v1.2.3 From 07899a6e5f56136028c44a57ad0451e797365ac3 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Aug 2015 19:42:52 +0200 Subject: rcu_sync: Introduce rcu_sync_dtor() This commit allows rcu_sync structures to be safely deallocated, The trick is to add a new ->wait field to the gp_ops array. This field is a pointer to the rcu_barrier() function corresponding to the flavor of RCU in question. This allows a new rcu_sync_dtor() to wait for any outstanding callbacks before freeing the rcu_sync structure. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcu_sync.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 1f2d4fc30b04..8069d6468bc4 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -62,6 +62,7 @@ static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); extern void rcu_sync_enter(struct rcu_sync *); extern void rcu_sync_exit(struct rcu_sync *); +extern void rcu_sync_dtor(struct rcu_sync *); #define __RCU_SYNC_INITIALIZER(name, type) { \ .gp_state = 0, \ -- cgit v1.2.3 From 001dac627ff37433d5528ffb0d897cd19c2b1e43 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 21 Aug 2015 19:42:57 +0200 Subject: locking/percpu-rwsem: Make use of the rcu_sync infrastructure Currently down_write/up_write calls synchronize_sched_expedited() twice, which is evil. Change this code to rely on rcu-sync primitives. This avoids the _expedited "big hammer", and this can be faster in the contended case or even in the case when a single thread does down_write/up_write in a loop. Of course, a single down_write() will take more time, but otoh it will be much more friendly to the whole system. To simplify the review this patch doesn't update the comments, fixed by the next change. Signed-off-by: Oleg Nesterov Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/percpu-rwsem.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/percpu-rwsem.h b/include/linux/percpu-rwsem.h index 834c4e52cb2d..c2fa3ecb0dce 100644 --- a/include/linux/percpu-rwsem.h +++ b/include/linux/percpu-rwsem.h @@ -5,11 +5,12 @@ #include #include #include +#include #include struct percpu_rw_semaphore { + struct rcu_sync rss; unsigned int __percpu *fast_read_ctr; - atomic_t write_ctr; struct rw_semaphore rw_sem; atomic_t slow_read_ctr; wait_queue_head_t write_waitq; -- cgit v1.2.3 From 4bace7344d6dbd7a1b0b801abf24ea9878064317 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 11 Sep 2015 17:59:18 +0200 Subject: rcu_sync: Cleanup the CONFIG_PROVE_RCU checks 1. Rename __rcu_sync_is_idle() to rcu_sync_lockdep_assert() and change it to use rcu_lockdep_assert(). 2. Change rcu_sync_is_idle() to return rsp->gp_state == GP_IDLE unconditonally, this way we can remove the same check from rcu_sync_lockdep_assert() and clearly isolate the debugging code. Note: rcu_sync_enter()->wait_event(gp_state == GP_PASSED) needs another CONFIG_PROVE_RCU check, the same as is done in ->sync(); but this needs some simple preparations in the core RCU code to avoid the code duplication. Signed-off-by: Oleg Nesterov Signed-off-by: Paul E. McKenney Reviewed-by: Josh Triplett --- include/linux/rcu_sync.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rcu_sync.h b/include/linux/rcu_sync.h index 8069d6468bc4..a63a33e6196e 100644 --- a/include/linux/rcu_sync.h +++ b/include/linux/rcu_sync.h @@ -40,7 +40,7 @@ struct rcu_sync { enum rcu_sync_type gp_type; }; -extern bool __rcu_sync_is_idle(struct rcu_sync *); +extern void rcu_sync_lockdep_assert(struct rcu_sync *); /** * rcu_sync_is_idle() - Are readers permitted to use their fastpaths? @@ -53,10 +53,9 @@ extern bool __rcu_sync_is_idle(struct rcu_sync *); static inline bool rcu_sync_is_idle(struct rcu_sync *rsp) { #ifdef CONFIG_PROVE_RCU - return __rcu_sync_is_idle(rsp); -#else - return !rsp->gp_state; /* GP_IDLE */ + rcu_sync_lockdep_assert(rsp); #endif + return !rsp->gp_state; /* GP_IDLE */ } extern void rcu_sync_init(struct rcu_sync *, enum rcu_sync_type); -- cgit v1.2.3 From b4eb6cdbbd13698704863f680c643c569909e1c2 Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Tue, 25 Aug 2015 20:25:37 -0400 Subject: PCI: Add builtin_pci_driver() to avoid registration boilerplate In f309d4443130 ("platform_device: better support builtin boilerplate avoidance"), we introduced the builtin_driver() macro. Here we use that support and extend it to PCI driver registration, so where a driver is clearly non-modular and builtin-only, we can register it in a similar fashion. Existing code that is clearly non-modular can be updated with the simple mapping of module_pci_driver(...) ---> builtin_pci_driver(...) We've essentially cloned the former to make the latter, and taken out the remove/module_exit parts since those never get used in a non-modular build of the code. Signed-off-by: Paul Gortmaker Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index e90eb22de628..b54fbf1571b1 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1192,6 +1192,17 @@ void pci_unregister_driver(struct pci_driver *dev); module_driver(__pci_driver, pci_register_driver, \ pci_unregister_driver) +/** + * builtin_pci_driver() - Helper macro for registering a PCI driver + * @__pci_driver: pci_driver struct + * + * Helper macro for PCI drivers which do not do anything special in their + * init code. This eliminates a lot of boilerplate. Each driver may only + * use this macro once, and calling it replaces device_initcall(...) + */ +#define builtin_pci_driver(__pci_driver) \ + builtin_driver(__pci_driver, pci_register_driver) + struct pci_driver *pci_dev_driver(const struct pci_dev *dev); int pci_add_dynid(struct pci_driver *drv, unsigned int vendor, unsigned int device, -- cgit v1.2.3 From fee6d4c777a125e56de9370db3b2bf359bf958d6 Mon Sep 17 00:00:00 2001 From: David Ahern Date: Mon, 5 Oct 2015 08:51:24 -0700 Subject: net: Add netif_is_l3_slave IPv6 addrconf keys off of IFF_SLAVE so can not use it for L3 slave. Add a new private flag and add netif_is_l3_slave function for checking it. Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b9450784ae06..b3374402c1ea 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1261,6 +1261,7 @@ struct net_device_ops { * @IFF_L3MDEV_MASTER: device is an L3 master device * @IFF_NO_QUEUE: device can run without qdisc attached * @IFF_OPENVSWITCH: device is a Open vSwitch master + * @IFF_L3MDEV_SLAVE: device is enslaved to an L3 master device */ enum netdev_priv_flags { IFF_802_1Q_VLAN = 1<<0, @@ -1286,6 +1287,7 @@ enum netdev_priv_flags { IFF_L3MDEV_MASTER = 1<<20, IFF_NO_QUEUE = 1<<21, IFF_OPENVSWITCH = 1<<22, + IFF_L3MDEV_SLAVE = 1<<23, }; #define IFF_802_1Q_VLAN IFF_802_1Q_VLAN @@ -3830,6 +3832,11 @@ static inline bool netif_is_l3_master(const struct net_device *dev) return dev->priv_flags & IFF_L3MDEV_MASTER; } +static inline bool netif_is_l3_slave(const struct net_device *dev) +{ + return dev->priv_flags & IFF_L3MDEV_SLAVE; +} + static inline bool netif_is_bridge_master(const struct net_device *dev) { return dev->priv_flags & IFF_EBRIDGE; -- cgit v1.2.3 From 1d27ff5aba2d36b152542e9d62158eb6584bcbd3 Mon Sep 17 00:00:00 2001 From: Mathieu Poirier Date: Wed, 7 Oct 2015 09:26:39 -0600 Subject: coresight: fixing typographical error Tracing gets enabled _for_ a source rather than _from_ a source. Signed-off-by: Mathieu Poirier Signed-off-by: Greg Kroah-Hartman --- include/linux/coresight.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/coresight.h b/include/linux/coresight.h index c69e1b932809..a7cabfa23b55 100644 --- a/include/linux/coresight.h +++ b/include/linux/coresight.h @@ -207,7 +207,7 @@ struct coresight_ops_link { * Operations available for sources. * @trace_id: returns the value of the component's trace ID as known to the HW. - * @enable: enables tracing from a source. + * @enable: enables tracing for a source. * @disable: disables tracing for a source. */ struct coresight_ops_source { -- cgit v1.2.3 From 6a8c3be7ec8eb3c1197766f9245e0d65a4e5aff8 Mon Sep 17 00:00:00 2001 From: Alan Tull Date: Wed, 7 Oct 2015 16:36:28 +0100 Subject: add FPGA manager core API to support programming FPGA's. The following functions are exported as GPL: * fpga_mgr_buf_load Load fpga from image in buffer * fpga_mgr_firmware_load Request firmware and load it to the FPGA. * fpga_mgr_register * fpga_mgr_unregister FPGA device drivers can be added by calling fpga_mgr_register() to register a set of fpga_manager_ops to do device specific stuff. * of_fpga_mgr_get * fpga_mgr_put Get/put a reference to a fpga manager. The following sysfs files are created: * /sys/class/fpga_manager//name Name of low level driver. * /sys/class/fpga_manager//state State of fpga manager Signed-off-by: Alan Tull Acked-by: Michal Simek Signed-off-by: Greg Kroah-Hartman --- include/linux/fpga/fpga-mgr.h | 127 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 127 insertions(+) create mode 100644 include/linux/fpga/fpga-mgr.h (limited to 'include/linux') diff --git a/include/linux/fpga/fpga-mgr.h b/include/linux/fpga/fpga-mgr.h new file mode 100644 index 000000000000..0940bf45e2f2 --- /dev/null +++ b/include/linux/fpga/fpga-mgr.h @@ -0,0 +1,127 @@ +/* + * FPGA Framework + * + * Copyright (C) 2013-2015 Altera Corporation + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + * + * You should have received a copy of the GNU General Public License along with + * this program. If not, see . + */ +#include +#include + +#ifndef _LINUX_FPGA_MGR_H +#define _LINUX_FPGA_MGR_H + +struct fpga_manager; + +/** + * enum fpga_mgr_states - fpga framework states + * @FPGA_MGR_STATE_UNKNOWN: can't determine state + * @FPGA_MGR_STATE_POWER_OFF: FPGA power is off + * @FPGA_MGR_STATE_POWER_UP: FPGA reports power is up + * @FPGA_MGR_STATE_RESET: FPGA in reset state + * @FPGA_MGR_STATE_FIRMWARE_REQ: firmware request in progress + * @FPGA_MGR_STATE_FIRMWARE_REQ_ERR: firmware request failed + * @FPGA_MGR_STATE_WRITE_INIT: preparing FPGA for programming + * @FPGA_MGR_STATE_WRITE_INIT_ERR: Error during WRITE_INIT stage + * @FPGA_MGR_STATE_WRITE: writing image to FPGA + * @FPGA_MGR_STATE_WRITE_ERR: Error while writing FPGA + * @FPGA_MGR_STATE_WRITE_COMPLETE: Doing post programming steps + * @FPGA_MGR_STATE_WRITE_COMPLETE_ERR: Error during WRITE_COMPLETE + * @FPGA_MGR_STATE_OPERATING: FPGA is programmed and operating + */ +enum fpga_mgr_states { + /* default FPGA states */ + FPGA_MGR_STATE_UNKNOWN, + FPGA_MGR_STATE_POWER_OFF, + FPGA_MGR_STATE_POWER_UP, + FPGA_MGR_STATE_RESET, + + /* getting an image for loading */ + FPGA_MGR_STATE_FIRMWARE_REQ, + FPGA_MGR_STATE_FIRMWARE_REQ_ERR, + + /* write sequence: init, write, complete */ + FPGA_MGR_STATE_WRITE_INIT, + FPGA_MGR_STATE_WRITE_INIT_ERR, + FPGA_MGR_STATE_WRITE, + FPGA_MGR_STATE_WRITE_ERR, + FPGA_MGR_STATE_WRITE_COMPLETE, + FPGA_MGR_STATE_WRITE_COMPLETE_ERR, + + /* fpga is programmed and operating */ + FPGA_MGR_STATE_OPERATING, +}; + +/* + * FPGA Manager flags + * FPGA_MGR_PARTIAL_RECONFIG: do partial reconfiguration if supported + */ +#define FPGA_MGR_PARTIAL_RECONFIG BIT(0) + +/** + * struct fpga_manager_ops - ops for low level fpga manager drivers + * @state: returns an enum value of the FPGA's state + * @write_init: prepare the FPGA to receive confuration data + * @write: write count bytes of configuration data to the FPGA + * @write_complete: set FPGA to operating state after writing is done + * @fpga_remove: optional: Set FPGA into a specific state during driver remove + * + * fpga_manager_ops are the low level functions implemented by a specific + * fpga manager driver. The optional ones are tested for NULL before being + * called, so leaving them out is fine. + */ +struct fpga_manager_ops { + enum fpga_mgr_states (*state)(struct fpga_manager *mgr); + int (*write_init)(struct fpga_manager *mgr, u32 flags, + const char *buf, size_t count); + int (*write)(struct fpga_manager *mgr, const char *buf, size_t count); + int (*write_complete)(struct fpga_manager *mgr, u32 flags); + void (*fpga_remove)(struct fpga_manager *mgr); +}; + +/** + * struct fpga_manager - fpga manager structure + * @name: name of low level fpga manager + * @dev: fpga manager device + * @ref_mutex: only allows one reference to fpga manager + * @state: state of fpga manager + * @mops: pointer to struct of fpga manager ops + * @priv: low level driver private date + */ +struct fpga_manager { + const char *name; + struct device dev; + struct mutex ref_mutex; + enum fpga_mgr_states state; + const struct fpga_manager_ops *mops; + void *priv; +}; + +#define to_fpga_manager(d) container_of(d, struct fpga_manager, dev) + +int fpga_mgr_buf_load(struct fpga_manager *mgr, u32 flags, + const char *buf, size_t count); + +int fpga_mgr_firmware_load(struct fpga_manager *mgr, u32 flags, + const char *image_name); + +struct fpga_manager *of_fpga_mgr_get(struct device_node *node); + +void fpga_mgr_put(struct fpga_manager *mgr); + +int fpga_mgr_register(struct device *dev, const char *name, + const struct fpga_manager_ops *mops, void *priv); + +void fpga_mgr_unregister(struct device *dev); + +#endif /*_LINUX_FPGA_MGR_H */ -- cgit v1.2.3 From 7f5028cf6190407b7a632b0f30b83187577824cc Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20L=C3=B3pez?= Date: Mon, 21 Sep 2015 10:38:20 -0300 Subject: sysfs: Support is_visible() on binary attributes MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit According to the sysfs header file: "The returned value will replace static permissions defined in struct attribute or struct bin_attribute." but this isn't the case, as is_visible is only called on struct attribute only. This patch introduces a new is_bin_visible() function to implement the same functionality for binary attributes, and updates documentation accordingly. Note that to keep functionality and code similar to that of normal attributes, the mode is now checked as well to ensure it contains only read/write permissions or SYSFS_PREALLOC. Reviewed-by: Guenter Roeck Signed-off-by: Emilio López Acked-by: Greg Kroah-Hartman Signed-off-by: Olof Johansson --- include/linux/sysfs.h | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 9f65758311a4..2f66050d073b 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -64,10 +64,18 @@ do { \ * a new subdirectory with this name. * @is_visible: Optional: Function to return permissions associated with an * attribute of the group. Will be called repeatedly for each - * attribute in the group. Only read/write permissions as well as - * SYSFS_PREALLOC are accepted. Must return 0 if an attribute is - * not visible. The returned value will replace static permissions - * defined in struct attribute or struct bin_attribute. + * non-binary attribute in the group. Only read/write + * permissions as well as SYSFS_PREALLOC are accepted. Must + * return 0 if an attribute is not visible. The returned value + * will replace static permissions defined in struct attribute. + * @is_bin_visible: + * Optional: Function to return permissions associated with a + * binary attribute of the group. Will be called repeatedly + * for each binary attribute in the group. Only read/write + * permissions as well as SYSFS_PREALLOC are accepted. Must + * return 0 if a binary attribute is not visible. The returned + * value will replace static permissions defined in + * struct bin_attribute. * @attrs: Pointer to NULL terminated list of attributes. * @bin_attrs: Pointer to NULL terminated list of binary attributes. * Either attrs or bin_attrs or both must be provided. @@ -76,6 +84,8 @@ struct attribute_group { const char *name; umode_t (*is_visible)(struct kobject *, struct attribute *, int); + umode_t (*is_bin_visible)(struct kobject *, + struct bin_attribute *, int); struct attribute **attrs; struct bin_attribute **bin_attrs; }; -- cgit v1.2.3 From 18800fc7a04e7df8a345e7ef4fc3064368276f83 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Emilio=20L=C3=B3pez?= Date: Mon, 21 Sep 2015 10:38:22 -0300 Subject: platform/chrome: Support reading/writing the vboot context MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some EC implementations include a small nvram space used to store verified boot context data. This patch offers a way to expose this data to userspace. Reviewed-by: Javier Martinez Canillas Signed-off-by: Emilio López Signed-off-by: Olof Johansson --- include/linux/mfd/cros_ec.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/cros_ec.h b/include/linux/mfd/cros_ec.h index da72671a42fa..494682ce4bf3 100644 --- a/include/linux/mfd/cros_ec.h +++ b/include/linux/mfd/cros_ec.h @@ -255,5 +255,6 @@ int cros_ec_query_all(struct cros_ec_device *ec_dev); /* sysfs stuff */ extern struct attribute_group cros_ec_attr_group; extern struct attribute_group cros_ec_lightbar_attr_group; +extern struct attribute_group cros_ec_vbc_attr_group; #endif /* __LINUX_MFD_CROS_EC_H */ -- cgit v1.2.3 From 02ef3c4a2aae65a1632b27770bfea3f83ca06772 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Fri, 7 Aug 2015 15:14:30 -0700 Subject: cpu: Remove try_get_online_cpus() Now that synchronize_sched_expedited() no longer uses it, there are no users of try_get_online_cpus() in mainline. This commit therefore removes it. Signed-off-by: Paul E. McKenney Cc: Peter Zijlstra Cc: Thomas Gleixner --- include/linux/cpu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 23c30bdcca86..d2ca8c38f9c4 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -228,7 +228,6 @@ extern struct bus_type cpu_subsys; extern void cpu_hotplug_begin(void); extern void cpu_hotplug_done(void); extern void get_online_cpus(void); -extern bool try_get_online_cpus(void); extern void put_online_cpus(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); @@ -246,7 +245,6 @@ int cpu_down(unsigned int cpu); static inline void cpu_hotplug_begin(void) {} static inline void cpu_hotplug_done(void) {} #define get_online_cpus() do { } while (0) -#define try_get_online_cpus() true #define put_online_cpus() do { } while (0) #define cpu_hotplug_disable() do { } while (0) #define cpu_hotplug_enable() do { } while (0) -- cgit v1.2.3 From a1cba5613edf50c2a213fa90c30aa10500b241b7 Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Tue, 6 Oct 2015 12:25:48 -0700 Subject: net: phy: Add Broadcom phy library for common interfaces This patch adds the Broadcom phy library to consolidate common interfaces shared by Broadcom phy's. Moved the common interfaces to the 'bcm-phy-lib.c' and updated the Broadcom PHY drivers to use the new APIs. Signed-off-by: Arun Parameswaran Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 22 +++------------------- 1 file changed, 3 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 697ca7795bd9..6a53ab91407c 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -138,7 +138,10 @@ /* 01010: Auto Power-Down */ #define BCM54XX_SHD_APD 0x0a +#define BCM_APD_CLR_MASK 0xFE9F /* clear bits 5, 6 & 8 */ #define BCM54XX_SHD_APD_EN 0x0020 +#define BCM_NO_ANEG_APD_EN 0x0060 /* bits 5 & 6 */ +#define BCM_APD_SINGLELP_EN 0x0100 /* Bit 8 */ #define BCM5482_SHD_LEDS1 0x0d /* 01101: LED Selector 1 */ /* LED3 / ~LINKSPD[2] selector */ @@ -209,25 +212,6 @@ #define MII_BRCM_FET_SHDW_AUXSTAT2 0x1b /* Auxiliary status 2 */ #define MII_BRCM_FET_SHDW_AS2_APDE 0x0020 /* Auto power down enable */ -/* - * Indirect register access functions for the 1000BASE-T/100BASE-TX/10BASE-T - * 0x1c shadow registers. - */ -static inline int bcm54xx_shadow_read(struct phy_device *phydev, u16 shadow) -{ - phy_write(phydev, MII_BCM54XX_SHD, MII_BCM54XX_SHD_VAL(shadow)); - return MII_BCM54XX_SHD_DATA(phy_read(phydev, MII_BCM54XX_SHD)); -} - -static inline int bcm54xx_shadow_write(struct phy_device *phydev, u16 shadow, - u16 val) -{ - return phy_write(phydev, MII_BCM54XX_SHD, - MII_BCM54XX_SHD_WRITE | - MII_BCM54XX_SHD_VAL(shadow) | - MII_BCM54XX_SHD_DATA(val)); -} - #define BRCM_CL45VEN_EEE_CONTROL 0x803d #define LPI_FEATURE_EN 0x8000 #define LPI_FEATURE_EN_DIG1000X 0x4000 -- cgit v1.2.3 From 8e185d6997bb67068f0ca8f062a50caa2608cf1b Mon Sep 17 00:00:00 2001 From: Arun Parameswaran Date: Tue, 6 Oct 2015 12:25:49 -0700 Subject: net: phy: Broadcom Cygnus internal Etherent PHY driver Add support for the Broadcom Cygnus SoCs internal PHY's. The PHYs are 1000M/100M/10M capable with support for 'EEE' and 'APD' (Auto Power Down). This driver supports the following Broadcom Cygnus SoCs: - BCM583XX (BCM58300, BCM58302, BCM58303, BCM58305) - BCM113XX (BCM11300, BCM11320, BCM11350, BCM11360) The PHY's on these SoC's require some workarounds for stable operation, both during configuration time and during suspend/resume. This driver handles the application of the workarounds. Signed-off-by: Arun Parameswaran Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index 6a53ab91407c..59f4a7304419 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -30,6 +30,8 @@ #define PHY_ID_BCM7439_2 0xae025080 #define PHY_ID_BCM7445 0x600d8510 +#define PHY_ID_BCM_CYGNUS 0xae025200 + #define PHY_BCM_OUI_MASK 0xfffffc00 #define PHY_BCM_OUI_1 0x00206000 #define PHY_BCM_OUI_2 0x0143bc00 @@ -216,4 +218,9 @@ #define LPI_FEATURE_EN 0x8000 #define LPI_FEATURE_EN_DIG1000X 0x4000 +/* Core register definitions*/ +#define MII_BRCM_CORE_BASE1E 0x1E +#define MII_BRCM_CORE_EXPB0 0xB0 +#define MII_BRCM_CORE_EXPB1 0xB1 + #endif /* _LINUX_BRCMPHY_H */ -- cgit v1.2.3 From 46234253b9363894a254844a6550b4cc5f3edfe8 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 8 Oct 2015 01:20:35 +0200 Subject: net: move net_get_random_once to lib There's no good reason why users outside of networking should not be using this facility, f.e. for initializing their seeds. Therefore, make it accessible from there as get_random_once(). Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/net.h | 21 ++++----------------- include/linux/once.h | 24 ++++++++++++++++++++++++ 2 files changed, 28 insertions(+), 17 deletions(-) create mode 100644 include/linux/once.h (limited to 'include/linux') diff --git a/include/linux/net.h b/include/linux/net.h index 049d4b03c4c4..70ac5e28e6b7 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -24,7 +24,8 @@ #include /* For O_CLOEXEC and O_NONBLOCK */ #include #include -#include +#include + #include struct poll_table_struct; @@ -250,22 +251,8 @@ do { \ } while (0) #endif -bool __net_get_random_once(void *buf, int nbytes, bool *done, - struct static_key *done_key); - -#define net_get_random_once(buf, nbytes) \ - ({ \ - bool ___ret = false; \ - static bool ___done = false; \ - static struct static_key ___once_key = \ - STATIC_KEY_INIT_TRUE; \ - if (static_key_true(&___once_key)) \ - ___ret = __net_get_random_once(buf, \ - nbytes, \ - &___done, \ - &___once_key); \ - ___ret; \ - }) +#define net_get_random_once(buf, nbytes) \ + get_random_once((buf), (nbytes)) int kernel_sendmsg(struct socket *sock, struct msghdr *msg, struct kvec *vec, size_t num, size_t len); diff --git a/include/linux/once.h b/include/linux/once.h new file mode 100644 index 000000000000..2a83b538dd6a --- /dev/null +++ b/include/linux/once.h @@ -0,0 +1,24 @@ +#ifndef _LINUX_ONCE_H +#define _LINUX_ONCE_H + +#include +#include + +bool __get_random_once(void *buf, int nbytes, bool *done, + struct static_key *once_key); + +#define get_random_once(buf, nbytes) \ + ({ \ + bool ___ret = false; \ + static bool ___done = false; \ + static struct static_key ___once_key = \ + STATIC_KEY_INIT_TRUE; \ + if (static_key_true(&___once_key)) \ + ___ret = __get_random_once((buf), \ + (nbytes), \ + &___done, \ + &___once_key); \ + ___ret; \ + }) + +#endif /* _LINUX_ONCE_H */ -- cgit v1.2.3 From c90aeb948222a7b3d3391d232ec4f50fd8322ad3 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Thu, 8 Oct 2015 01:20:36 +0200 Subject: once: make helper generic for calling functions once Make the get_random_once() helper generic enough, so that functions in general would only be called once, where one user of this is then net_get_random_once(). The only implementation specific call is to get_random_bytes(), all the rest of this *_once() facility would be duplicated among different subsystems otherwise. The new DO_ONCE() helper will be used by prandom() later on, but might also be useful for other scenarios/subsystems as well where a one-time initialization in often-called, possibly fast path code could occur. Signed-off-by: Hannes Frederic Sowa Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/once.h | 61 ++++++++++++++++++++++++++++++++++++++++------------ 1 file changed, 47 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/once.h b/include/linux/once.h index 2a83b538dd6a..285f12cb40e6 100644 --- a/include/linux/once.h +++ b/include/linux/once.h @@ -4,21 +4,54 @@ #include #include -bool __get_random_once(void *buf, int nbytes, bool *done, - struct static_key *once_key); +bool __do_once_start(bool *done, unsigned long *flags); +void __do_once_done(bool *done, struct static_key *once_key, + unsigned long *flags); -#define get_random_once(buf, nbytes) \ - ({ \ - bool ___ret = false; \ - static bool ___done = false; \ - static struct static_key ___once_key = \ - STATIC_KEY_INIT_TRUE; \ - if (static_key_true(&___once_key)) \ - ___ret = __get_random_once((buf), \ - (nbytes), \ - &___done, \ - &___once_key); \ - ___ret; \ +/* Call a function exactly once. The idea of DO_ONCE() is to perform + * a function call such as initialization of random seeds, etc, only + * once, where DO_ONCE() can live in the fast-path. After @func has + * been called with the passed arguments, the static key will patch + * out the condition into a nop. DO_ONCE() guarantees type safety of + * arguments! + * + * Not that the following is not equivalent ... + * + * DO_ONCE(func, arg); + * DO_ONCE(func, arg); + * + * ... to this version: + * + * void foo(void) + * { + * DO_ONCE(func, arg); + * } + * + * foo(); + * foo(); + * + * In case the one-time invocation could be triggered from multiple + * places, then a common helper function must be defined, so that only + * a single static key will be placed there! + */ +#define DO_ONCE(func, ...) \ + ({ \ + bool ___ret = false; \ + static bool ___done = false; \ + static struct static_key ___once_key = STATIC_KEY_INIT_TRUE; \ + if (static_key_true(&___once_key)) { \ + unsigned long ___flags; \ + ___ret = __do_once_start(&___done, &___flags); \ + if (unlikely(___ret)) { \ + func(__VA_ARGS__); \ + __do_once_done(&___done, &___once_key, \ + &___flags); \ + } \ + } \ + ___ret; \ }) +#define get_random_once(buf, nbytes) \ + DO_ONCE(get_random_bytes, (buf), (nbytes)) + #endif /* _LINUX_ONCE_H */ -- cgit v1.2.3 From 897ece56e714a2cc64e6914cb89a362d7021b36e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 8 Oct 2015 01:20:38 +0200 Subject: random32: add prandom_init_once helper for own rngs Add a prandom_init_once() facility that works on the rnd_state, so that users that are keeping their own state independent from prandom_u32() can initialize their taus113 per cpu states. The motivation here is similar to net_get_random_once(): initialize the state as late as possible in the hope that enough entropy has been collected for the seeding. prandom_init_once() makes use of the recently introduced prandom_seed_full_state() helper and is generic enough so that it could also be used on fast-paths due to the DO_ONCE(). Signed-off-by: Daniel Borkmann Acked-by: Hannes Frederic Sowa Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/random.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/random.h b/include/linux/random.h index e651874df2c9..a75840c1aa71 100644 --- a/include/linux/random.h +++ b/include/linux/random.h @@ -7,6 +7,8 @@ #define _LINUX_RANDOM_H #include +#include + #include struct random_ready_callback { @@ -45,6 +47,10 @@ struct rnd_state { u32 prandom_u32_state(struct rnd_state *state); void prandom_bytes_state(struct rnd_state *state, void *buf, size_t nbytes); +void prandom_seed_full_state(struct rnd_state __percpu *pcpu_state); + +#define prandom_init_once(pcpu_state) \ + DO_ONCE(prandom_seed_full_state, (pcpu_state)) /** * prandom_u32_max - returns a pseudo-random number in interval [0, ep_ro) -- cgit v1.2.3 From 3ad0040573b0c00f88488bc31958acd07a55ee2e Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 8 Oct 2015 01:20:39 +0200 Subject: bpf: split state from prandom_u32() and consolidate {c, e}BPF prngs While recently arguing on a seccomp discussion that raw prandom_u32() access shouldn't be exposed to unpriviledged user space, I forgot the fact that SKF_AD_RANDOM extension actually already does it for some time in cBPF via commit 4cd3675ebf74 ("filter: added BPF random opcode"). Since prandom_u32() is being used in a lot of critical networking code, lets be more conservative and split their states. Furthermore, consolidate eBPF and cBPF prandom handlers to use the new internal PRNG. For eBPF, bpf_get_prandom_u32() was only accessible for priviledged users, but should that change one day, we also don't want to leak raw sequences through things like eBPF maps. One thought was also to have own per bpf_prog states, but due to ABI reasons this is not easily possible, i.e. the program code currently cannot access bpf_prog itself, and copying the rnd_state to/from the stack scratch space whenever a program uses the prng seems not really worth the trouble and seems too hacky. If needed, taus113 could in such cases be implemented within eBPF using a map entry to keep the state space, or get_random_bytes() could become a second helper in cases where performance would not be critical. Both sides can trigger a one-time late init via prandom_init_once() on the shared state. Performance-wise, there should even be a tiny gain as bpf_user_rnd_u32() saves one function call. The PRNG needs to live inside the BPF core since kernels could have a NET-less config as well. Signed-off-by: Daniel Borkmann Acked-by: Hannes Frederic Sowa Acked-by: Alexei Starovoitov Cc: Chema Gonzalez Signed-off-by: David S. Miller --- include/linux/bpf.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index c915a6b54570..3697ad563899 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -200,4 +200,8 @@ extern const struct bpf_func_proto bpf_get_current_comm_proto; extern const struct bpf_func_proto bpf_skb_vlan_push_proto; extern const struct bpf_func_proto bpf_skb_vlan_pop_proto; +/* Shared helpers among cBPF and eBPF. */ +void bpf_user_rnd_init_once(void); +u64 bpf_user_rnd_u32(u64 r1, u64 r2, u64 r3, u64 r4, u64 r5); + #endif /* _LINUX_BPF_H */ -- cgit v1.2.3 From edc1b01cd3b20a5fff049e98f82a2b0d24a34c89 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 5 Oct 2015 10:53:49 -0400 Subject: SUNRPC: Move TCP receive data path into a workqueue context Stream protocols such as TCP can often build up a backlog of data to be read due to ordering. Combine this with the fact that some workloads such as NFS read()-intensive workloads need to receive a lot of data per RPC call, and it turns out that receiving the data from inside a softirq context can cause starvation. The following patch moves the TCP data receive into a workqueue context. We still end up calling tcp_read_sock(), but we do so from a process context, meaning that softirqs are enabled for most of the time. With this patch, I see a doubling of read bandwidth when running a multi-threaded iozone workload between a virtual client and server setup. Signed-off-by: Trond Myklebust --- include/linux/sunrpc/xprtsock.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprtsock.h b/include/linux/sunrpc/xprtsock.h index 357e44c1a46b..0ece4ba06f06 100644 --- a/include/linux/sunrpc/xprtsock.h +++ b/include/linux/sunrpc/xprtsock.h @@ -44,6 +44,8 @@ struct sock_xprt { */ unsigned long sock_state; struct delayed_work connect_worker; + struct work_struct recv_worker; + struct mutex recv_mutex; struct sockaddr_storage srcaddr; unsigned short srcport; -- cgit v1.2.3 From 516285ebe0efadc40b914a0e61a913a390604810 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sun, 20 Sep 2015 16:15:24 -0400 Subject: NFSv4: nfs4_async_handle_error should take a non-const nfs_server For symmetry with the synchronous handler, and so that we can potentially handle errors such as NFS4ERR_BADNAME. Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 52faf7e96c65..53f2acc68baf 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -528,7 +528,7 @@ struct nfs4_delegreturnargs { struct nfs4_delegreturnres { struct nfs4_sequence_res seq_res; struct nfs_fattr * fattr; - const struct nfs_server *server; + struct nfs_server *server; }; /* @@ -601,7 +601,7 @@ struct nfs_removeargs { struct nfs_removeres { struct nfs4_sequence_res seq_res; - const struct nfs_server *server; + struct nfs_server *server; struct nfs_fattr *dir_attr; struct nfs4_change_info cinfo; }; @@ -619,7 +619,7 @@ struct nfs_renameargs { struct nfs_renameres { struct nfs4_sequence_res seq_res; - const struct nfs_server *server; + struct nfs_server *server; struct nfs4_change_info old_cinfo; struct nfs_fattr *old_fattr; struct nfs4_change_info new_cinfo; -- cgit v1.2.3 From 38a1bdc9ff9f6c8cfad228eac5c1ce31ce038b25 Mon Sep 17 00:00:00 2001 From: Punit Agrawal Date: Fri, 19 Jun 2015 15:31:46 +0100 Subject: firmware: arm_scpi: Extend to support sensors ARM System Control Processor (SCP) provides an API to query and use the sensors available in the system. Extend the SCPI driver to support sensor messages. Signed-off-by: Punit Agrawal Acked-by: Sudeep Holla --- include/linux/scpi_protocol.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/scpi_protocol.h b/include/linux/scpi_protocol.h index e7169cd54e19..80af3cd35ae4 100644 --- a/include/linux/scpi_protocol.h +++ b/include/linux/scpi_protocol.h @@ -28,6 +28,20 @@ struct scpi_dvfs_info { struct scpi_opp *opps; }; +enum scpi_sensor_class { + TEMPERATURE, + VOLTAGE, + CURRENT, + POWER, +}; + +struct scpi_sensor_info { + u16 sensor_id; + u8 class; + u8 trigger_type; + char name[20]; +} __packed; + /** * struct scpi_ops - represents the various operations provided * by SCP through SCPI message protocol @@ -52,6 +66,9 @@ struct scpi_ops { int (*dvfs_get_idx)(u8); int (*dvfs_set_idx)(u8, u8); struct scpi_dvfs_info *(*dvfs_get_info)(u8); + int (*sensor_get_capability)(u16 *sensors); + int (*sensor_get_info)(u16 sensor_id, struct scpi_sensor_info *); + int (*sensor_get_value)(u16, u32 *); }; #if IS_ENABLED(CONFIG_ARM_SCPI_PROTOCOL) -- cgit v1.2.3 From 020446e01eebc9dbe7eda038e570ab9c7ab13586 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 8 Oct 2015 17:13:58 +0300 Subject: net/mlx5_core: Prepare cmd interface to system errors handling In preparation to handling system errors at the mlx5_core level, change the interface of cmd_work_handler to accept a 64 bit argument for the vector. This allows to encode a flag that signifies when the handler is called as a result of a driver logic that wishes to terminate commands that the hardware may not be able to terminate. Such command completions are detected at the handler and proper return status is encoded. To be able to terminate page handler commands, we make sure to set the corresponding bit in the bitmask. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 8b6d6f2154a4..aa899559eec0 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -731,7 +731,7 @@ void mlx5_eq_pagefault(struct mlx5_core_dev *dev, struct mlx5_eqe *eqe); #endif void mlx5_srq_event(struct mlx5_core_dev *dev, u32 srqn, int event_type); struct mlx5_core_srq *mlx5_core_get_srq(struct mlx5_core_dev *dev, u32 srqn); -void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, unsigned long vector); +void mlx5_cmd_comp_handler(struct mlx5_core_dev *dev, u64 vec); void mlx5_cq_event(struct mlx5_core_dev *dev, u32 cqn, int event_type); int mlx5_create_map_eq(struct mlx5_core_dev *dev, struct mlx5_eq *eq, u8 vecidx, int nent, u64 mask, const char *name, struct mlx5_uar *uar); @@ -865,4 +865,8 @@ static inline int mlx5_get_gid_table_len(u16 param) return 8 * (1 << param); } +enum { + MLX5_TRIGGERED_CMD_COMP = (u64)1 << 32, +}; + #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From ac6ea6e81a80172612e0c9ef93720f371b198918 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 8 Oct 2015 17:14:00 +0300 Subject: net/mlx5_core: Use private health thread for each device Use a single threaded work queue for each device in the system instead of using one thread for any device. This is required so we can concurrently process system error handling for all the devices that need that. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index aa899559eec0..41a32873f608 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -391,9 +391,10 @@ struct mlx5_core_health { struct health_buffer __iomem *health; __be32 __iomem *health_counter; struct timer_list timer; - struct list_head list; u32 prev; int miss_counter; + struct workqueue_struct *wq; + struct work_struct work; }; struct mlx5_cq_table { @@ -676,8 +677,8 @@ int mlx5_alloc_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); int mlx5_free_uuars(struct mlx5_core_dev *dev, struct mlx5_uuar_info *uuari); int mlx5_alloc_map_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); void mlx5_unmap_free_uar(struct mlx5_core_dev *mdev, struct mlx5_uar *uar); -void mlx5_health_cleanup(void); -void __init mlx5_health_init(void); +void mlx5_health_cleanup(struct mlx5_core_dev *dev); +int mlx5_health_init(struct mlx5_core_dev *dev); void mlx5_start_health_poll(struct mlx5_core_dev *dev); void mlx5_stop_health_poll(struct mlx5_core_dev *dev); int mlx5_buf_alloc_node(struct mlx5_core_dev *dev, int size, -- cgit v1.2.3 From 61d03535e4be3a46c1e171a25458237e343195e3 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:28:54 +0800 Subject: net/netlink: lockdep_genl_is_held can be boolean This patch makes lockdep_genl_is_held return bool to improve readability due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/genetlink.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genetlink.h b/include/linux/genetlink.h index 09460d6d6682..a4c61cbce777 100644 --- a/include/linux/genetlink.h +++ b/include/linux/genetlink.h @@ -8,7 +8,7 @@ extern void genl_lock(void); extern void genl_unlock(void); #ifdef CONFIG_LOCKDEP -extern int lockdep_genl_is_held(void); +extern bool lockdep_genl_is_held(void); #endif /* for synchronisation between af_netlink and genetlink */ -- cgit v1.2.3 From 35498edc6481d588feadee7e76220884d5bbca48 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:28:55 +0800 Subject: net/ieee80211: ieee80211_is_* can be boolean This patch makes ieee80211_is_* return bool to improve readability due to these particular functions only using either one or zero as their return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/ieee80211.h | 76 +++++++++++++++++++++++------------------------ 1 file changed, 38 insertions(+), 38 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index f79a02a69d26..dcfb2f43d316 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -121,7 +121,7 @@ #define IEEE80211_MAX_SN IEEE80211_SN_MASK #define IEEE80211_SN_MODULO (IEEE80211_MAX_SN + 1) -static inline int ieee80211_sn_less(u16 sn1, u16 sn2) +static inline bool ieee80211_sn_less(u16 sn1, u16 sn2) { return ((sn1 - sn2) & IEEE80211_SN_MASK) > (IEEE80211_SN_MODULO >> 1); } @@ -250,7 +250,7 @@ struct ieee80211_qos_hdr { * ieee80211_has_tods - check if IEEE80211_FCTL_TODS is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_tods(__le16 fc) +static inline bool ieee80211_has_tods(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_TODS)) != 0; } @@ -259,7 +259,7 @@ static inline int ieee80211_has_tods(__le16 fc) * ieee80211_has_fromds - check if IEEE80211_FCTL_FROMDS is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_fromds(__le16 fc) +static inline bool ieee80211_has_fromds(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FROMDS)) != 0; } @@ -268,7 +268,7 @@ static inline int ieee80211_has_fromds(__le16 fc) * ieee80211_has_a4 - check if IEEE80211_FCTL_TODS and IEEE80211_FCTL_FROMDS are set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_a4(__le16 fc) +static inline bool ieee80211_has_a4(__le16 fc) { __le16 tmp = cpu_to_le16(IEEE80211_FCTL_TODS | IEEE80211_FCTL_FROMDS); return (fc & tmp) == tmp; @@ -278,7 +278,7 @@ static inline int ieee80211_has_a4(__le16 fc) * ieee80211_has_morefrags - check if IEEE80211_FCTL_MOREFRAGS is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_morefrags(__le16 fc) +static inline bool ieee80211_has_morefrags(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_MOREFRAGS)) != 0; } @@ -287,7 +287,7 @@ static inline int ieee80211_has_morefrags(__le16 fc) * ieee80211_has_retry - check if IEEE80211_FCTL_RETRY is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_retry(__le16 fc) +static inline bool ieee80211_has_retry(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_RETRY)) != 0; } @@ -296,7 +296,7 @@ static inline int ieee80211_has_retry(__le16 fc) * ieee80211_has_pm - check if IEEE80211_FCTL_PM is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_pm(__le16 fc) +static inline bool ieee80211_has_pm(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_PM)) != 0; } @@ -305,7 +305,7 @@ static inline int ieee80211_has_pm(__le16 fc) * ieee80211_has_moredata - check if IEEE80211_FCTL_MOREDATA is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_moredata(__le16 fc) +static inline bool ieee80211_has_moredata(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_MOREDATA)) != 0; } @@ -314,7 +314,7 @@ static inline int ieee80211_has_moredata(__le16 fc) * ieee80211_has_protected - check if IEEE80211_FCTL_PROTECTED is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_protected(__le16 fc) +static inline bool ieee80211_has_protected(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_PROTECTED)) != 0; } @@ -323,7 +323,7 @@ static inline int ieee80211_has_protected(__le16 fc) * ieee80211_has_order - check if IEEE80211_FCTL_ORDER is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_has_order(__le16 fc) +static inline bool ieee80211_has_order(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_ORDER)) != 0; } @@ -332,7 +332,7 @@ static inline int ieee80211_has_order(__le16 fc) * ieee80211_is_mgmt - check if type is IEEE80211_FTYPE_MGMT * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_mgmt(__le16 fc) +static inline bool ieee80211_is_mgmt(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT); @@ -342,7 +342,7 @@ static inline int ieee80211_is_mgmt(__le16 fc) * ieee80211_is_ctl - check if type is IEEE80211_FTYPE_CTL * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_ctl(__le16 fc) +static inline bool ieee80211_is_ctl(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL); @@ -352,7 +352,7 @@ static inline int ieee80211_is_ctl(__le16 fc) * ieee80211_is_data - check if type is IEEE80211_FTYPE_DATA * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_data(__le16 fc) +static inline bool ieee80211_is_data(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA); @@ -362,7 +362,7 @@ static inline int ieee80211_is_data(__le16 fc) * ieee80211_is_data_qos - check if type is IEEE80211_FTYPE_DATA and IEEE80211_STYPE_QOS_DATA is set * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_data_qos(__le16 fc) +static inline bool ieee80211_is_data_qos(__le16 fc) { /* * mask with QOS_DATA rather than IEEE80211_FCTL_STYPE as we just need @@ -376,7 +376,7 @@ static inline int ieee80211_is_data_qos(__le16 fc) * ieee80211_is_data_present - check if type is IEEE80211_FTYPE_DATA and has data * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_data_present(__le16 fc) +static inline bool ieee80211_is_data_present(__le16 fc) { /* * mask with 0x40 and test that that bit is clear to only return true @@ -390,7 +390,7 @@ static inline int ieee80211_is_data_present(__le16 fc) * ieee80211_is_assoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_REQ * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_assoc_req(__le16 fc) +static inline bool ieee80211_is_assoc_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_REQ); @@ -400,7 +400,7 @@ static inline int ieee80211_is_assoc_req(__le16 fc) * ieee80211_is_assoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ASSOC_RESP * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_assoc_resp(__le16 fc) +static inline bool ieee80211_is_assoc_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ASSOC_RESP); @@ -410,7 +410,7 @@ static inline int ieee80211_is_assoc_resp(__le16 fc) * ieee80211_is_reassoc_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_REQ * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_reassoc_req(__le16 fc) +static inline bool ieee80211_is_reassoc_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_REQ); @@ -420,7 +420,7 @@ static inline int ieee80211_is_reassoc_req(__le16 fc) * ieee80211_is_reassoc_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_REASSOC_RESP * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_reassoc_resp(__le16 fc) +static inline bool ieee80211_is_reassoc_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_REASSOC_RESP); @@ -430,7 +430,7 @@ static inline int ieee80211_is_reassoc_resp(__le16 fc) * ieee80211_is_probe_req - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_REQ * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_probe_req(__le16 fc) +static inline bool ieee80211_is_probe_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_REQ); @@ -440,7 +440,7 @@ static inline int ieee80211_is_probe_req(__le16 fc) * ieee80211_is_probe_resp - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_PROBE_RESP * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_probe_resp(__le16 fc) +static inline bool ieee80211_is_probe_resp(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_PROBE_RESP); @@ -450,7 +450,7 @@ static inline int ieee80211_is_probe_resp(__le16 fc) * ieee80211_is_beacon - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_BEACON * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_beacon(__le16 fc) +static inline bool ieee80211_is_beacon(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_BEACON); @@ -460,7 +460,7 @@ static inline int ieee80211_is_beacon(__le16 fc) * ieee80211_is_atim - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ATIM * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_atim(__le16 fc) +static inline bool ieee80211_is_atim(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ATIM); @@ -470,7 +470,7 @@ static inline int ieee80211_is_atim(__le16 fc) * ieee80211_is_disassoc - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DISASSOC * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_disassoc(__le16 fc) +static inline bool ieee80211_is_disassoc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_DISASSOC); @@ -480,7 +480,7 @@ static inline int ieee80211_is_disassoc(__le16 fc) * ieee80211_is_auth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_AUTH * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_auth(__le16 fc) +static inline bool ieee80211_is_auth(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_AUTH); @@ -490,7 +490,7 @@ static inline int ieee80211_is_auth(__le16 fc) * ieee80211_is_deauth - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_DEAUTH * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_deauth(__le16 fc) +static inline bool ieee80211_is_deauth(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_DEAUTH); @@ -500,7 +500,7 @@ static inline int ieee80211_is_deauth(__le16 fc) * ieee80211_is_action - check if IEEE80211_FTYPE_MGMT && IEEE80211_STYPE_ACTION * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_action(__le16 fc) +static inline bool ieee80211_is_action(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_MGMT | IEEE80211_STYPE_ACTION); @@ -510,7 +510,7 @@ static inline int ieee80211_is_action(__le16 fc) * ieee80211_is_back_req - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK_REQ * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_back_req(__le16 fc) +static inline bool ieee80211_is_back_req(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK_REQ); @@ -520,7 +520,7 @@ static inline int ieee80211_is_back_req(__le16 fc) * ieee80211_is_back - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_BACK * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_back(__le16 fc) +static inline bool ieee80211_is_back(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_BACK); @@ -530,7 +530,7 @@ static inline int ieee80211_is_back(__le16 fc) * ieee80211_is_pspoll - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_PSPOLL * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_pspoll(__le16 fc) +static inline bool ieee80211_is_pspoll(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_PSPOLL); @@ -540,7 +540,7 @@ static inline int ieee80211_is_pspoll(__le16 fc) * ieee80211_is_rts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_RTS * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_rts(__le16 fc) +static inline bool ieee80211_is_rts(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_RTS); @@ -550,7 +550,7 @@ static inline int ieee80211_is_rts(__le16 fc) * ieee80211_is_cts - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CTS * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_cts(__le16 fc) +static inline bool ieee80211_is_cts(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CTS); @@ -560,7 +560,7 @@ static inline int ieee80211_is_cts(__le16 fc) * ieee80211_is_ack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_ACK * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_ack(__le16 fc) +static inline bool ieee80211_is_ack(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_ACK); @@ -570,7 +570,7 @@ static inline int ieee80211_is_ack(__le16 fc) * ieee80211_is_cfend - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFEND * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_cfend(__le16 fc) +static inline bool ieee80211_is_cfend(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CFEND); @@ -580,7 +580,7 @@ static inline int ieee80211_is_cfend(__le16 fc) * ieee80211_is_cfendack - check if IEEE80211_FTYPE_CTL && IEEE80211_STYPE_CFENDACK * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_cfendack(__le16 fc) +static inline bool ieee80211_is_cfendack(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_CTL | IEEE80211_STYPE_CFENDACK); @@ -590,7 +590,7 @@ static inline int ieee80211_is_cfendack(__le16 fc) * ieee80211_is_nullfunc - check if frame is a regular (non-QoS) nullfunc frame * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_nullfunc(__le16 fc) +static inline bool ieee80211_is_nullfunc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_NULLFUNC); @@ -600,7 +600,7 @@ static inline int ieee80211_is_nullfunc(__le16 fc) * ieee80211_is_qos_nullfunc - check if frame is a QoS nullfunc frame * @fc: frame control bytes in little-endian byteorder */ -static inline int ieee80211_is_qos_nullfunc(__le16 fc) +static inline bool ieee80211_is_qos_nullfunc(__le16 fc) { return (fc & cpu_to_le16(IEEE80211_FCTL_FTYPE | IEEE80211_FCTL_STYPE)) == cpu_to_le16(IEEE80211_FTYPE_DATA | IEEE80211_STYPE_QOS_NULLFUNC); @@ -624,7 +624,7 @@ static inline bool ieee80211_is_bufferable_mmpdu(__le16 fc) * ieee80211_is_first_frag - check if IEEE80211_SCTL_FRAG is not set * @seq_ctrl: frame sequence control bytes in little-endian byteorder */ -static inline int ieee80211_is_first_frag(__le16 seq_ctrl) +static inline bool ieee80211_is_first_frag(__le16 seq_ctrl) { return (seq_ctrl & cpu_to_le16(IEEE80211_SCTL_FRAG)) == 0; } -- cgit v1.2.3 From 875e08294911b3cb8c60416d64d990809421de29 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:28:56 +0800 Subject: net/nfnetlink: lockdep_nfnl_is_held can be boolean This patch makes lockdep_nfnl_is_held return bool to improve readability due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/netfilter/nfnetlink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index e955d4730625..249d1bb01e03 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -45,11 +45,11 @@ int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, void nfnl_lock(__u8 subsys_id); void nfnl_unlock(__u8 subsys_id); #ifdef CONFIG_PROVE_LOCKING -int lockdep_nfnl_is_held(__u8 subsys_id); +bool lockdep_nfnl_is_held(__u8 subsys_id); #else -static inline int lockdep_nfnl_is_held(__u8 subsys_id) +static inline bool lockdep_nfnl_is_held(__u8 subsys_id) { - return 1; + return true; } #endif /* CONFIG_PROVE_LOCKING */ -- cgit v1.2.3 From d6fbaea5f635216c9861587c4e658086cf3b1b6b Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:28:57 +0800 Subject: net/can: can_dropped_invalid_skb can be boolean This patch makes can_dropped_invalid_skb return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Acked-by: Marc Kleine-Budde Signed-off-by: David S. Miller --- include/linux/can/dev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/can/dev.h b/include/linux/can/dev.h index 56dcadd83716..735f9f8c4e43 100644 --- a/include/linux/can/dev.h +++ b/include/linux/can/dev.h @@ -78,7 +78,7 @@ struct can_priv { #define get_canfd_dlc(i) (min_t(__u8, (i), CANFD_MAX_DLC)) /* Drop a given socketbuffer if it does not contain a valid CAN frame. */ -static inline int can_dropped_invalid_skb(struct net_device *dev, +static inline bool can_dropped_invalid_skb(struct net_device *dev, struct sk_buff *skb) { const struct canfd_frame *cfd = (struct canfd_frame *)skb->data; @@ -94,12 +94,12 @@ static inline int can_dropped_invalid_skb(struct net_device *dev, } else goto inval_skb; - return 0; + return false; inval_skb: kfree_skb(skb); dev->stats.tx_dropped++; - return 1; + return true; } static inline bool can_is_canfd_skb(const struct sk_buff *skb) -- cgit v1.2.3 From 0c6119d99bf5df9403a688d267537284e9cc8bcb Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:28:58 +0800 Subject: net/dccp: dccp_list_has_service can be boolean This patch makes dccp_list_has_service return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/dccp.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dccp.h b/include/linux/dccp.h index 221025423e6c..61d042bbbf60 100644 --- a/include/linux/dccp.h +++ b/include/linux/dccp.h @@ -202,16 +202,16 @@ struct dccp_service_list { #define DCCP_SERVICE_INVALID_VALUE htonl((__u32)-1) #define DCCP_SERVICE_CODE_IS_ABSENT 0 -static inline int dccp_list_has_service(const struct dccp_service_list *sl, +static inline bool dccp_list_has_service(const struct dccp_service_list *sl, const __be32 service) { if (likely(sl != NULL)) { u32 i = sl->dccpsl_nr; while (i--) if (sl->dccpsl_list[i] == service) - return 1; + return true; } - return 0; + return false; } struct dccp_ackvec; -- cgit v1.2.3 From c3225164cf60ccecce2459dcb5813dd798233f2d Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:29:00 +0800 Subject: net/inetdevice: inet_ifa_match can be boolean This patch makes inet_ifa_match return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index a4328cea376a..3b0999e0260f 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -171,7 +171,7 @@ __be32 inet_confirm_addr(struct net *net, struct in_device *in_dev, __be32 dst, __be32 local, int scope); struct in_ifaddr *inet_ifa_byprefix(struct in_device *in_dev, __be32 prefix, __be32 mask); -static __inline__ int inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) +static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) { return !((addr^ifa->ifa_address)&ifa->ifa_mask); } -- cgit v1.2.3 From f06cc7b284f3dfb2c5decbf9fde711b50a530050 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:29:01 +0800 Subject: net/inetdevice: bad_mask can be boolean This patch makes bad_mask return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/inetdevice.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/inetdevice.h b/include/linux/inetdevice.h index 3b0999e0260f..ee971f335a8b 100644 --- a/include/linux/inetdevice.h +++ b/include/linux/inetdevice.h @@ -180,15 +180,15 @@ static __inline__ bool inet_ifa_match(__be32 addr, struct in_ifaddr *ifa) * Check if a mask is acceptable. */ -static __inline__ int bad_mask(__be32 mask, __be32 addr) +static __inline__ bool bad_mask(__be32 mask, __be32 addr) { __u32 hmask; if (addr & (mask = ~mask)) - return 1; + return true; hmask = ntohl(mask); if (hmask & (hmask+1)) - return 1; - return 0; + return true; + return false; } #define for_primary_ifa(in_dev) { struct in_ifaddr *ifa; \ -- cgit v1.2.3 From 0cbf334376d5e82d7a2f5cd234ca4f5d0843f3ea Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 8 Oct 2015 21:29:02 +0800 Subject: net/core: lockdep_rtnl_is_held can be boolean This patch makes lockdep_rtnl_is_held return bool due to this particular function only using either one or zero as its return value. In another patch lockdep_is_held is also made return bool. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: David S. Miller --- include/linux/rtnetlink.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rtnetlink.h b/include/linux/rtnetlink.h index 39adaa9529eb..4be5048b1fbe 100644 --- a/include/linux/rtnetlink.h +++ b/include/linux/rtnetlink.h @@ -33,11 +33,11 @@ extern wait_queue_head_t netdev_unregistering_wq; extern struct mutex net_mutex; #ifdef CONFIG_PROVE_LOCKING -extern int lockdep_rtnl_is_held(void); +extern bool lockdep_rtnl_is_held(void); #else -static inline int lockdep_rtnl_is_held(void) +static inline bool lockdep_rtnl_is_held(void) { - return 1; + return true; } #endif /* #ifdef CONFIG_PROVE_LOCKING */ -- cgit v1.2.3 From 10abc7df9277a81971924a6c03f74e86d799daf1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 9 Oct 2015 15:50:11 +0100 Subject: irqdomain: Add an accessor for the of_node field As we're about to remove the of_node field from the irqdomain structure, introduce an accessor for it. Subsequent patches will take care of the actual repainting. Signed-off-by: Marc Zyngier Cc: Jiang Liu Cc: Jason Cooper Link: http://lkml.kernel.org/r/1444402211-1141-1-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index d3ca79236fb0..f644fdb06dd6 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -161,6 +161,11 @@ enum { IRQ_DOMAIN_FLAG_NONCORE = (1 << 16), }; +static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) +{ + return d->of_node; +} + #ifdef CONFIG_IRQ_DOMAIN struct irq_domain *__irq_domain_add(struct device_node *of_node, int size, irq_hw_number_t hwirq_max, int direct_max, -- cgit v1.2.3 From 188c3568f814fea965947ed24739987ba9c5a87e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 1 Oct 2015 17:14:10 -0600 Subject: NVMe: Reference count open namespaces Dynamic namespace attachment means the namespace may be removed at any time, so the namespace reference count can not be tied to the device reference count. This fixes a NULL dereference if an opened namespace is detached from a controller. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b5812c395351..992b9c118678 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -135,6 +135,7 @@ struct nvme_ns { struct nvme_dev *dev; struct request_queue *queue; struct gendisk *disk; + struct kref kref; unsigned ns_id; int lba_shift; -- cgit v1.2.3 From 0a7385ad69f0f210c5cfbfd334b42423a6e05e5a Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 2 Oct 2015 10:37:29 -0600 Subject: NVMe: Simplify device resume on io queue failure Releasing IO queues and disks was done in a work queue outside the controller resume context to delete namespaces if the controller failed after a resume from suspend. This is unnecessary since we can resume a device asynchronously. This patch makes resume use probe_work so it can directly remove namespaces if the device is manageable but not IO capable. Since the deleting disks was the only reason we had the convoluted "reset_workfn", this patch removes that unnecessary indirection. Signed-off-by: Keith Busch Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 992b9c118678..7725b4c8b718 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -104,7 +104,6 @@ struct nvme_dev { struct list_head namespaces; struct kref kref; struct device *device; - work_func_t reset_workfn; struct work_struct reset_work; struct work_struct probe_work; struct work_struct scan_work; -- cgit v1.2.3 From f11bb3e244c4b14e2d0a3b9d7e41895752997170 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 15:46:41 +0200 Subject: nvme: add a local nvme.h header Add a new drivers/block/nvme.h which contains all the driver internal interface. Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 114 --------------------------------------------------- 1 file changed, 114 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7725b4c8b718..364cb9adbbbc 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -16,9 +16,6 @@ #define _LINUX_NVME_H #include -#include -#include -#include struct nvme_bar { __u64 cap; /* Controller Capabilities */ @@ -76,115 +73,4 @@ enum { NVME_CSTS_SHST_MASK = 3 << 2, }; -extern unsigned char nvme_io_timeout; -#define NVME_IO_TIMEOUT (nvme_io_timeout * HZ) - -/* - * Represents an NVM Express device. Each nvme_dev is a PCI function. - */ -struct nvme_dev { - struct list_head node; - struct nvme_queue **queues; - struct request_queue *admin_q; - struct blk_mq_tag_set tagset; - struct blk_mq_tag_set admin_tagset; - u32 __iomem *dbs; - struct device *dev; - struct dma_pool *prp_page_pool; - struct dma_pool *prp_small_pool; - int instance; - unsigned queue_count; - unsigned online_queues; - unsigned max_qid; - int q_depth; - u32 db_stride; - u32 ctrl_config; - struct msix_entry *entry; - struct nvme_bar __iomem *bar; - struct list_head namespaces; - struct kref kref; - struct device *device; - struct work_struct reset_work; - struct work_struct probe_work; - struct work_struct scan_work; - char name[12]; - char serial[20]; - char model[40]; - char firmware_rev[8]; - bool subsystem; - u32 max_hw_sectors; - u32 stripe_size; - u32 page_size; - void __iomem *cmb; - dma_addr_t cmb_dma_addr; - u64 cmb_size; - u32 cmbsz; - u16 oncs; - u16 abort_limit; - u8 event_limit; - u8 vwc; -}; - -/* - * An NVM Express namespace is equivalent to a SCSI LUN - */ -struct nvme_ns { - struct list_head list; - - struct nvme_dev *dev; - struct request_queue *queue; - struct gendisk *disk; - struct kref kref; - - unsigned ns_id; - int lba_shift; - u16 ms; - bool ext; - u8 pi_type; - u64 mode_select_num_blocks; - u32 mode_select_block_len; -}; - -/* - * The nvme_iod describes the data in an I/O, including the list of PRP - * entries. You can't see it in this data structure because C doesn't let - * me express that. Use nvme_alloc_iod to ensure there's enough space - * allocated to store the PRP list. - */ -struct nvme_iod { - unsigned long private; /* For the use of the submitter of the I/O */ - int npages; /* In the PRP list. 0 means small pool in use */ - int offset; /* Of PRP list */ - int nents; /* Used in scatterlist */ - int length; /* Of data, in bytes */ - dma_addr_t first_dma; - struct scatterlist meta_sg[1]; /* metadata requires single contiguous buffer */ - struct scatterlist sg[0]; -}; - -static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) -{ - return (sector >> (ns->lba_shift - 9)); -} - -int nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - void *buf, unsigned bufflen); -int __nvme_submit_sync_cmd(struct request_queue *q, struct nvme_command *cmd, - void *buffer, void __user *ubuffer, unsigned bufflen, - u32 *result, unsigned timeout); -int nvme_identify_ctrl(struct nvme_dev *dev, struct nvme_id_ctrl **id); -int nvme_identify_ns(struct nvme_dev *dev, unsigned nsid, - struct nvme_id_ns **id); -int nvme_get_log_page(struct nvme_dev *dev, struct nvme_smart_log **log); -int nvme_get_features(struct nvme_dev *dev, unsigned fid, unsigned nsid, - dma_addr_t dma_addr, u32 *result); -int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, - dma_addr_t dma_addr, u32 *result); - -struct sg_io_hdr; - -int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); -int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg); -int nvme_sg_get_version_num(int __user *ip); - #endif /* _LINUX_NVME_H */ -- cgit v1.2.3 From 9d99a8dda154f38307d43d9c9aa504bd3703d596 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Oct 2015 15:25:49 +0200 Subject: nvme: move hardware structures out of the uapi version of nvme.h Currently all NVMe command and completion structures are exposed to userspace through the uapi version of nvme.h. They are not an ABI between the kernel and userspace, and will change in C-incompatible way for future versions of the spec. Move them to the kernel version of the file and rename the uapi header to nvme_ioctl.h so that userspace can easily detect the presence of the new clean header. Nvme-cli already carries a local copy of the header, so it won't be affected by this move. Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 526 ++++++++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 524 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 364cb9adbbbc..91a805437876 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -15,8 +15,6 @@ #ifndef _LINUX_NVME_H #define _LINUX_NVME_H -#include - struct nvme_bar { __u64 cap; /* Controller Capabilities */ __u32 vs; /* Version */ @@ -73,4 +71,528 @@ enum { NVME_CSTS_SHST_MASK = 3 << 2, }; +struct nvme_id_power_state { + __le16 max_power; /* centiwatts */ + __u8 rsvd2; + __u8 flags; + __le32 entry_lat; /* microseconds */ + __le32 exit_lat; /* microseconds */ + __u8 read_tput; + __u8 read_lat; + __u8 write_tput; + __u8 write_lat; + __le16 idle_power; + __u8 idle_scale; + __u8 rsvd19; + __le16 active_power; + __u8 active_work_scale; + __u8 rsvd23[9]; +}; + +enum { + NVME_PS_FLAGS_MAX_POWER_SCALE = 1 << 0, + NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, +}; + +struct nvme_id_ctrl { + __le16 vid; + __le16 ssvid; + char sn[20]; + char mn[40]; + char fr[8]; + __u8 rab; + __u8 ieee[3]; + __u8 mic; + __u8 mdts; + __u16 cntlid; + __u32 ver; + __u8 rsvd84[172]; + __le16 oacs; + __u8 acl; + __u8 aerl; + __u8 frmw; + __u8 lpa; + __u8 elpe; + __u8 npss; + __u8 avscc; + __u8 apsta; + __le16 wctemp; + __le16 cctemp; + __u8 rsvd270[242]; + __u8 sqes; + __u8 cqes; + __u8 rsvd514[2]; + __le32 nn; + __le16 oncs; + __le16 fuses; + __u8 fna; + __u8 vwc; + __le16 awun; + __le16 awupf; + __u8 nvscc; + __u8 rsvd531; + __le16 acwu; + __u8 rsvd534[2]; + __le32 sgls; + __u8 rsvd540[1508]; + struct nvme_id_power_state psd[32]; + __u8 vs[1024]; +}; + +enum { + NVME_CTRL_ONCS_COMPARE = 1 << 0, + NVME_CTRL_ONCS_WRITE_UNCORRECTABLE = 1 << 1, + NVME_CTRL_ONCS_DSM = 1 << 2, + NVME_CTRL_VWC_PRESENT = 1 << 0, +}; + +struct nvme_lbaf { + __le16 ms; + __u8 ds; + __u8 rp; +}; + +struct nvme_id_ns { + __le64 nsze; + __le64 ncap; + __le64 nuse; + __u8 nsfeat; + __u8 nlbaf; + __u8 flbas; + __u8 mc; + __u8 dpc; + __u8 dps; + __u8 nmic; + __u8 rescap; + __u8 fpi; + __u8 rsvd33; + __le16 nawun; + __le16 nawupf; + __le16 nacwu; + __le16 nabsn; + __le16 nabo; + __le16 nabspf; + __u16 rsvd46; + __le64 nvmcap[2]; + __u8 rsvd64[40]; + __u8 nguid[16]; + __u8 eui64[8]; + struct nvme_lbaf lbaf[16]; + __u8 rsvd192[192]; + __u8 vs[3712]; +}; + +enum { + NVME_NS_FEAT_THIN = 1 << 0, + NVME_NS_FLBAS_LBA_MASK = 0xf, + NVME_NS_FLBAS_META_EXT = 0x10, + NVME_LBAF_RP_BEST = 0, + NVME_LBAF_RP_BETTER = 1, + NVME_LBAF_RP_GOOD = 2, + NVME_LBAF_RP_DEGRADED = 3, + NVME_NS_DPC_PI_LAST = 1 << 4, + NVME_NS_DPC_PI_FIRST = 1 << 3, + NVME_NS_DPC_PI_TYPE3 = 1 << 2, + NVME_NS_DPC_PI_TYPE2 = 1 << 1, + NVME_NS_DPC_PI_TYPE1 = 1 << 0, + NVME_NS_DPS_PI_FIRST = 1 << 3, + NVME_NS_DPS_PI_MASK = 0x7, + NVME_NS_DPS_PI_TYPE1 = 1, + NVME_NS_DPS_PI_TYPE2 = 2, + NVME_NS_DPS_PI_TYPE3 = 3, +}; + +struct nvme_smart_log { + __u8 critical_warning; + __u8 temperature[2]; + __u8 avail_spare; + __u8 spare_thresh; + __u8 percent_used; + __u8 rsvd6[26]; + __u8 data_units_read[16]; + __u8 data_units_written[16]; + __u8 host_reads[16]; + __u8 host_writes[16]; + __u8 ctrl_busy_time[16]; + __u8 power_cycles[16]; + __u8 power_on_hours[16]; + __u8 unsafe_shutdowns[16]; + __u8 media_errors[16]; + __u8 num_err_log_entries[16]; + __le32 warning_temp_time; + __le32 critical_comp_time; + __le16 temp_sensor[8]; + __u8 rsvd216[296]; +}; + +enum { + NVME_SMART_CRIT_SPARE = 1 << 0, + NVME_SMART_CRIT_TEMPERATURE = 1 << 1, + NVME_SMART_CRIT_RELIABILITY = 1 << 2, + NVME_SMART_CRIT_MEDIA = 1 << 3, + NVME_SMART_CRIT_VOLATILE_MEMORY = 1 << 4, +}; + +enum { + NVME_AER_NOTICE_NS_CHANGED = 0x0002, +}; + +struct nvme_lba_range_type { + __u8 type; + __u8 attributes; + __u8 rsvd2[14]; + __u64 slba; + __u64 nlb; + __u8 guid[16]; + __u8 rsvd48[16]; +}; + +enum { + NVME_LBART_TYPE_FS = 0x01, + NVME_LBART_TYPE_RAID = 0x02, + NVME_LBART_TYPE_CACHE = 0x03, + NVME_LBART_TYPE_SWAP = 0x04, + + NVME_LBART_ATTRIB_TEMP = 1 << 0, + NVME_LBART_ATTRIB_HIDE = 1 << 1, +}; + +struct nvme_reservation_status { + __le32 gen; + __u8 rtype; + __u8 regctl[2]; + __u8 resv5[2]; + __u8 ptpls; + __u8 resv10[13]; + struct { + __le16 cntlid; + __u8 rcsts; + __u8 resv3[5]; + __le64 hostid; + __le64 rkey; + } regctl_ds[]; +}; + +/* I/O commands */ + +enum nvme_opcode { + nvme_cmd_flush = 0x00, + nvme_cmd_write = 0x01, + nvme_cmd_read = 0x02, + nvme_cmd_write_uncor = 0x04, + nvme_cmd_compare = 0x05, + nvme_cmd_write_zeroes = 0x08, + nvme_cmd_dsm = 0x09, + nvme_cmd_resv_register = 0x0d, + nvme_cmd_resv_report = 0x0e, + nvme_cmd_resv_acquire = 0x11, + nvme_cmd_resv_release = 0x15, +}; + +struct nvme_common_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __le32 cdw2[2]; + __le64 metadata; + __le64 prp1; + __le64 prp2; + __le32 cdw10[6]; +}; + +struct nvme_rw_command { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2; + __le64 metadata; + __le64 prp1; + __le64 prp2; + __le64 slba; + __le16 length; + __le16 control; + __le32 dsmgmt; + __le32 reftag; + __le16 apptag; + __le16 appmask; +}; + +enum { + NVME_RW_LR = 1 << 15, + NVME_RW_FUA = 1 << 14, + NVME_RW_DSM_FREQ_UNSPEC = 0, + NVME_RW_DSM_FREQ_TYPICAL = 1, + NVME_RW_DSM_FREQ_RARE = 2, + NVME_RW_DSM_FREQ_READS = 3, + NVME_RW_DSM_FREQ_WRITES = 4, + NVME_RW_DSM_FREQ_RW = 5, + NVME_RW_DSM_FREQ_ONCE = 6, + NVME_RW_DSM_FREQ_PREFETCH = 7, + NVME_RW_DSM_FREQ_TEMP = 8, + NVME_RW_DSM_LATENCY_NONE = 0 << 4, + NVME_RW_DSM_LATENCY_IDLE = 1 << 4, + NVME_RW_DSM_LATENCY_NORM = 2 << 4, + NVME_RW_DSM_LATENCY_LOW = 3 << 4, + NVME_RW_DSM_SEQ_REQ = 1 << 6, + NVME_RW_DSM_COMPRESSED = 1 << 7, + NVME_RW_PRINFO_PRCHK_REF = 1 << 10, + NVME_RW_PRINFO_PRCHK_APP = 1 << 11, + NVME_RW_PRINFO_PRCHK_GUARD = 1 << 12, + NVME_RW_PRINFO_PRACT = 1 << 13, +}; + +struct nvme_dsm_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 nr; + __le32 attributes; + __u32 rsvd12[4]; +}; + +enum { + NVME_DSMGMT_IDR = 1 << 0, + NVME_DSMGMT_IDW = 1 << 1, + NVME_DSMGMT_AD = 1 << 2, +}; + +struct nvme_dsm_range { + __le32 cattr; + __le32 nlb; + __le64 slba; +}; + +/* Admin commands */ + +enum nvme_admin_opcode { + nvme_admin_delete_sq = 0x00, + nvme_admin_create_sq = 0x01, + nvme_admin_get_log_page = 0x02, + nvme_admin_delete_cq = 0x04, + nvme_admin_create_cq = 0x05, + nvme_admin_identify = 0x06, + nvme_admin_abort_cmd = 0x08, + nvme_admin_set_features = 0x09, + nvme_admin_get_features = 0x0a, + nvme_admin_async_event = 0x0c, + nvme_admin_activate_fw = 0x10, + nvme_admin_download_fw = 0x11, + nvme_admin_format_nvm = 0x80, + nvme_admin_security_send = 0x81, + nvme_admin_security_recv = 0x82, +}; + +enum { + NVME_QUEUE_PHYS_CONTIG = (1 << 0), + NVME_CQ_IRQ_ENABLED = (1 << 1), + NVME_SQ_PRIO_URGENT = (0 << 1), + NVME_SQ_PRIO_HIGH = (1 << 1), + NVME_SQ_PRIO_MEDIUM = (2 << 1), + NVME_SQ_PRIO_LOW = (3 << 1), + NVME_FEAT_ARBITRATION = 0x01, + NVME_FEAT_POWER_MGMT = 0x02, + NVME_FEAT_LBA_RANGE = 0x03, + NVME_FEAT_TEMP_THRESH = 0x04, + NVME_FEAT_ERR_RECOVERY = 0x05, + NVME_FEAT_VOLATILE_WC = 0x06, + NVME_FEAT_NUM_QUEUES = 0x07, + NVME_FEAT_IRQ_COALESCE = 0x08, + NVME_FEAT_IRQ_CONFIG = 0x09, + NVME_FEAT_WRITE_ATOMIC = 0x0a, + NVME_FEAT_ASYNC_EVENT = 0x0b, + NVME_FEAT_AUTO_PST = 0x0c, + NVME_FEAT_SW_PROGRESS = 0x80, + NVME_FEAT_HOST_ID = 0x81, + NVME_FEAT_RESV_MASK = 0x82, + NVME_FEAT_RESV_PERSIST = 0x83, + NVME_LOG_ERROR = 0x01, + NVME_LOG_SMART = 0x02, + NVME_LOG_FW_SLOT = 0x03, + NVME_LOG_RESERVATION = 0x80, + NVME_FWACT_REPL = (0 << 3), + NVME_FWACT_REPL_ACTV = (1 << 3), + NVME_FWACT_ACTV = (2 << 3), +}; + +struct nvme_identify { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 cns; + __u32 rsvd11[5]; +}; + +struct nvme_features { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[2]; + __le64 prp1; + __le64 prp2; + __le32 fid; + __le32 dword11; + __u32 rsvd12[4]; +}; + +struct nvme_create_cq { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __u64 rsvd8; + __le16 cqid; + __le16 qsize; + __le16 cq_flags; + __le16 irq_vector; + __u32 rsvd12[4]; +}; + +struct nvme_create_sq { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __u64 rsvd8; + __le16 sqid; + __le16 qsize; + __le16 sq_flags; + __le16 cqid; + __u32 rsvd12[4]; +}; + +struct nvme_delete_queue { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 qid; + __u16 rsvd10; + __u32 rsvd11[5]; +}; + +struct nvme_abort_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[9]; + __le16 sqid; + __u16 cid; + __u32 rsvd11[5]; +}; + +struct nvme_download_firmware { + __u8 opcode; + __u8 flags; + __u16 command_id; + __u32 rsvd1[5]; + __le64 prp1; + __le64 prp2; + __le32 numd; + __le32 offset; + __u32 rsvd12[4]; +}; + +struct nvme_format_cmd { + __u8 opcode; + __u8 flags; + __u16 command_id; + __le32 nsid; + __u64 rsvd2[4]; + __le32 cdw10; + __u32 rsvd11[5]; +}; + +struct nvme_command { + union { + struct nvme_common_command common; + struct nvme_rw_command rw; + struct nvme_identify identify; + struct nvme_features features; + struct nvme_create_cq create_cq; + struct nvme_create_sq create_sq; + struct nvme_delete_queue delete_queue; + struct nvme_download_firmware dlfw; + struct nvme_format_cmd format; + struct nvme_dsm_cmd dsm; + struct nvme_abort_cmd abort; + }; +}; + +enum { + NVME_SC_SUCCESS = 0x0, + NVME_SC_INVALID_OPCODE = 0x1, + NVME_SC_INVALID_FIELD = 0x2, + NVME_SC_CMDID_CONFLICT = 0x3, + NVME_SC_DATA_XFER_ERROR = 0x4, + NVME_SC_POWER_LOSS = 0x5, + NVME_SC_INTERNAL = 0x6, + NVME_SC_ABORT_REQ = 0x7, + NVME_SC_ABORT_QUEUE = 0x8, + NVME_SC_FUSED_FAIL = 0x9, + NVME_SC_FUSED_MISSING = 0xa, + NVME_SC_INVALID_NS = 0xb, + NVME_SC_CMD_SEQ_ERROR = 0xc, + NVME_SC_SGL_INVALID_LAST = 0xd, + NVME_SC_SGL_INVALID_COUNT = 0xe, + NVME_SC_SGL_INVALID_DATA = 0xf, + NVME_SC_SGL_INVALID_METADATA = 0x10, + NVME_SC_SGL_INVALID_TYPE = 0x11, + NVME_SC_LBA_RANGE = 0x80, + NVME_SC_CAP_EXCEEDED = 0x81, + NVME_SC_NS_NOT_READY = 0x82, + NVME_SC_RESERVATION_CONFLICT = 0x83, + NVME_SC_CQ_INVALID = 0x100, + NVME_SC_QID_INVALID = 0x101, + NVME_SC_QUEUE_SIZE = 0x102, + NVME_SC_ABORT_LIMIT = 0x103, + NVME_SC_ABORT_MISSING = 0x104, + NVME_SC_ASYNC_LIMIT = 0x105, + NVME_SC_FIRMWARE_SLOT = 0x106, + NVME_SC_FIRMWARE_IMAGE = 0x107, + NVME_SC_INVALID_VECTOR = 0x108, + NVME_SC_INVALID_LOG_PAGE = 0x109, + NVME_SC_INVALID_FORMAT = 0x10a, + NVME_SC_FIRMWARE_NEEDS_RESET = 0x10b, + NVME_SC_INVALID_QUEUE = 0x10c, + NVME_SC_FEATURE_NOT_SAVEABLE = 0x10d, + NVME_SC_FEATURE_NOT_CHANGEABLE = 0x10e, + NVME_SC_FEATURE_NOT_PER_NS = 0x10f, + NVME_SC_FW_NEEDS_RESET_SUBSYS = 0x110, + NVME_SC_BAD_ATTRIBUTES = 0x180, + NVME_SC_INVALID_PI = 0x181, + NVME_SC_READ_ONLY = 0x182, + NVME_SC_WRITE_FAULT = 0x280, + NVME_SC_READ_ERROR = 0x281, + NVME_SC_GUARD_CHECK = 0x282, + NVME_SC_APPTAG_CHECK = 0x283, + NVME_SC_REFTAG_CHECK = 0x284, + NVME_SC_COMPARE_FAILED = 0x285, + NVME_SC_ACCESS_DENIED = 0x286, + NVME_SC_DNR = 0x4000, +}; + +struct nvme_completion { + __le32 result; /* Used by admin commands to return data */ + __u32 rsvd; + __le16 sq_head; /* how much of this queue may be reclaimed */ + __le16 sq_id; /* submission queue that generated this entry */ + __u16 command_id; /* of the command which completed */ + __le16 status; /* did the command fail, and if so, why? */ +}; + +#define NVME_VS(major, minor) (((major) << 16) | ((minor) << 8)) + #endif /* _LINUX_NVME_H */ -- cgit v1.2.3 From 08c69640cfcbdcc7aaed31c05bbfaf03bb60611c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 2 Oct 2015 15:27:16 +0200 Subject: nvme.h: add missing nvme_id_ctrl endianess annotations Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 91a805437876..9668d3571497 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -104,8 +104,8 @@ struct nvme_id_ctrl { __u8 ieee[3]; __u8 mic; __u8 mdts; - __u16 cntlid; - __u32 ver; + __le16 cntlid; + __le32 ver; __u8 rsvd84[172]; __le16 oacs; __u8 acl; -- cgit v1.2.3 From 2812dfe370516ef958b5c9e2eca1b2f002236d2d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Oct 2015 18:19:20 +0200 Subject: =?UTF-8?q?nvme:=20include=20=20in=20?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The buildbot complains about this even if it doesn't generate a a build warning. But it's an easy fix, so here we go: Reported-by: kbuild test robot Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 9668d3571497..3af5f454c04a 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -15,6 +15,8 @@ #ifndef _LINUX_NVME_H #define _LINUX_NVME_H +#include + struct nvme_bar { __u64 cap; /* Controller Capabilities */ __u32 vs; /* Version */ -- cgit v1.2.3 From 9a764234eee689ea800424ab99b08ff07a8bdbcd Mon Sep 17 00:00:00 2001 From: Florian Fainelli Date: Mon, 14 Sep 2015 12:09:34 -0700 Subject: soc: brcmstb: Add Bus Interface Unit control setup Broadcom STB SoCs (brcmstb) require an early setup of their Bus Interface Unit control register, this needs to happen before SMP is brought up because it affects how the CPU complex will be interfaced to the memory controller. Add support code which properly initializes the BIU registers based on whether "brcm,write-pairing" is present in Device Tree, and take care of saving and restoring credit register settings during system-wide suspend/resume operations. Acked-by: Gregory Fong Signed-off-by: Florian Fainelli --- include/linux/soc/brcmstb/brcmstb.h | 10 ++++++++++ 1 file changed, 10 insertions(+) create mode 100644 include/linux/soc/brcmstb/brcmstb.h (limited to 'include/linux') diff --git a/include/linux/soc/brcmstb/brcmstb.h b/include/linux/soc/brcmstb/brcmstb.h new file mode 100644 index 000000000000..337ce414e898 --- /dev/null +++ b/include/linux/soc/brcmstb/brcmstb.h @@ -0,0 +1,10 @@ +#ifndef __BRCMSTB_SOC_H +#define __BRCMSTB_SOC_H + +/* + * Bus Interface Unit control register setup, must happen early during boot, + * before SMP is brought up, called by machine entry point. + */ +void brcmstb_biuctrl_init(void); + +#endif /* __BRCMSTB_SOC_H */ -- cgit v1.2.3 From a639315d6c536c806724c9328941a2517507e3e3 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 15 Sep 2015 02:14:03 -0400 Subject: pmem: kill memremap_pmem() Now that the pmem-api is defined as "a set of apis that enables access to WB mapped pmem", the mapping type is implied. Remove the wrapper and push the functionality down into the pmem driver in preparation for adding support for direct-mapped pmem. Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/pmem.h | 26 +------------------------- 1 file changed, 1 insertion(+), 25 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 85f810b33917..acfea8ce4a07 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -65,11 +65,6 @@ static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t si memcpy(dst, (void __force const *) src, size); } -static inline void memunmap_pmem(struct device *dev, void __pmem *addr) -{ - devm_memunmap(dev, (void __force *) addr); -} - static inline bool arch_has_pmem_api(void) { return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); @@ -93,7 +88,7 @@ static inline bool arch_has_wmb_pmem(void) * These defaults seek to offer decent performance and minimize the * window between i/o completion and writes being durable on media. * However, it is undefined / architecture specific whether - * default_memremap_pmem + default_memcpy_to_pmem is sufficient for + * ARCH_MEMREMAP_PMEM + default_memcpy_to_pmem is sufficient for * making data durable relative to i/o completion. */ static inline void default_memcpy_to_pmem(void __pmem *dst, const void *src, @@ -116,25 +111,6 @@ static inline void default_clear_pmem(void __pmem *addr, size_t size) memset((void __force *)addr, 0, size); } -/** - * memremap_pmem - map physical persistent memory for pmem api - * @offset: physical address of persistent memory - * @size: size of the mapping - * - * Establish a mapping of the architecture specific memory type expected - * by memcpy_to_pmem() and wmb_pmem(). For example, it may be - * the case that an uncacheable or writethrough mapping is sufficient, - * or a writeback mapping provided memcpy_to_pmem() and - * wmb_pmem() arrange for the data to be written through the - * cache to persistent media. - */ -static inline void __pmem *memremap_pmem(struct device *dev, - resource_size_t offset, unsigned long size) -{ - return (void __pmem *) devm_memremap(dev, offset, size, - ARCH_MEMREMAP_PMEM); -} - /** * memcpy_to_pmem - copy data to persistent memory * @dst: destination buffer for the copy -- cgit v1.2.3 From 7c683941f30a977c10ec6be174ec5f16939c7ce5 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Mon, 5 Oct 2015 20:35:55 -0400 Subject: devm: make allocations numa aware by default Given we already have a device just use dev_to_node() to provide hint allocations for devres. However, current devres_alloc() users will need to explicitly opt-in with devres_alloc_node(). Reviewed-by: Tejun Heo Reviewed-by: Christoph Hellwig Signed-off-by: Dan Williams --- include/linux/device.h | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device.h b/include/linux/device.h index 5d7bc6349930..b8f411b57dcb 100644 --- a/include/linux/device.h +++ b/include/linux/device.h @@ -604,13 +604,21 @@ typedef void (*dr_release_t)(struct device *dev, void *res); typedef int (*dr_match_t)(struct device *dev, void *res, void *match_data); #ifdef CONFIG_DEBUG_DEVRES -extern void *__devres_alloc(dr_release_t release, size_t size, gfp_t gfp, - const char *name); +extern void *__devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, + int nid, const char *name); #define devres_alloc(release, size, gfp) \ - __devres_alloc(release, size, gfp, #release) + __devres_alloc_node(release, size, gfp, NUMA_NO_NODE, #release) +#define devres_alloc_node(release, size, gfp, nid) \ + __devres_alloc_node(release, size, gfp, nid, #release) #else -extern void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp); +extern void *devres_alloc_node(dr_release_t release, size_t size, gfp_t gfp, + int nid); +static inline void *devres_alloc(dr_release_t release, size_t size, gfp_t gfp) +{ + return devres_alloc_node(release, size, gfp, NUMA_NO_NODE); +} #endif + extern void devres_for_each_res(struct device *dev, dr_release_t release, dr_match_t match, void *match_data, void (*fn)(struct device *, void *, void *), -- cgit v1.2.3 From 7cabd0086acd8f204d9b11a9b0aca90d6a9fcc5b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Wed, 30 Sep 2015 11:48:01 +0100 Subject: irqchip/gic-v3: Make gic_enable_sre an inline function In order for gic_enable_sre to be used by the arm64 core code, move it to arm-gic-v3.h. As a bonus, we now also check if system registers have been already enabled, and return early if they have. In all cases, the function now returns a boolean indicating if the enabling has been successful. Reviewed-by: Catalin Marinas Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index c0c8a2ef9d90..9001b0bbe878 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -398,6 +398,22 @@ static inline void gic_write_dir(u64 irq) isb(); } +static inline bool gic_enable_sre(void) +{ + u64 val; + + asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + if (val & ICC_SRE_EL1_SRE) + return true; + + val |= ICC_SRE_EL1_SRE; + asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val)); + isb(); + asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + + return !!(val & ICC_SRE_EL1_SRE); +} + struct irq_domain; int its_cpu_init(void); int its_init(struct device_node *node, struct rdists *rdists, -- cgit v1.2.3 From 7936e914f7b0827c2dcfe63fbefdc21de2d61dcb Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 1 Oct 2015 13:47:14 +0100 Subject: irqchip/gic-v3: Refactor the arm64 specific parts This patch moves the GICv3 system register access helpers to arch/arm64/. Their 32bit counterparts will need to use mrc/mcr accesses instead of mrs_s/msr_s. [maz: fixed conflict with Cavium erratum handling] Reviewed-by: Marc Zyngier Signed-off-by: Jean-Philippe Brucker Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 88 ++++---------------------------------- 1 file changed, 9 insertions(+), 79 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index 9001b0bbe878..b4ee60076ff8 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -18,8 +18,6 @@ #ifndef __LINUX_IRQCHIP_ARM_GIC_V3_H #define __LINUX_IRQCHIP_ARM_GIC_V3_H -#include - /* * Distributor registers. We assume we're running non-secure, with ARE * being set. Secure-only and non-ARE registers are not described. @@ -293,19 +291,8 @@ #define ICH_VMCR_PMR_SHIFT 24 #define ICH_VMCR_PMR_MASK (0xffUL << ICH_VMCR_PMR_SHIFT) -#define ICC_EOIR1_EL1 sys_reg(3, 0, 12, 12, 1) -#define ICC_DIR_EL1 sys_reg(3, 0, 12, 11, 1) -#define ICC_IAR1_EL1 sys_reg(3, 0, 12, 12, 0) -#define ICC_SGI1R_EL1 sys_reg(3, 0, 12, 11, 5) -#define ICC_PMR_EL1 sys_reg(3, 0, 4, 6, 0) -#define ICC_CTLR_EL1 sys_reg(3, 0, 12, 12, 4) -#define ICC_SRE_EL1 sys_reg(3, 0, 12, 12, 5) -#define ICC_GRPEN1_EL1 sys_reg(3, 0, 12, 12, 7) - #define ICC_IAR1_EL1_SPURIOUS 0x3ff -#define ICC_SRE_EL2 sys_reg(3, 4, 12, 9, 5) - #define ICC_SRE_EL2_SRE (1 << 0) #define ICC_SRE_EL2_ENABLE (1 << 3) @@ -321,54 +308,10 @@ #define ICC_SGI1R_AFFINITY_3_SHIFT 48 #define ICC_SGI1R_AFFINITY_3_MASK (0xffULL << ICC_SGI1R_AFFINITY_1_SHIFT) -/* - * System register definitions - */ -#define ICH_VSEIR_EL2 sys_reg(3, 4, 12, 9, 4) -#define ICH_HCR_EL2 sys_reg(3, 4, 12, 11, 0) -#define ICH_VTR_EL2 sys_reg(3, 4, 12, 11, 1) -#define ICH_MISR_EL2 sys_reg(3, 4, 12, 11, 2) -#define ICH_EISR_EL2 sys_reg(3, 4, 12, 11, 3) -#define ICH_ELSR_EL2 sys_reg(3, 4, 12, 11, 5) -#define ICH_VMCR_EL2 sys_reg(3, 4, 12, 11, 7) - -#define __LR0_EL2(x) sys_reg(3, 4, 12, 12, x) -#define __LR8_EL2(x) sys_reg(3, 4, 12, 13, x) - -#define ICH_LR0_EL2 __LR0_EL2(0) -#define ICH_LR1_EL2 __LR0_EL2(1) -#define ICH_LR2_EL2 __LR0_EL2(2) -#define ICH_LR3_EL2 __LR0_EL2(3) -#define ICH_LR4_EL2 __LR0_EL2(4) -#define ICH_LR5_EL2 __LR0_EL2(5) -#define ICH_LR6_EL2 __LR0_EL2(6) -#define ICH_LR7_EL2 __LR0_EL2(7) -#define ICH_LR8_EL2 __LR8_EL2(0) -#define ICH_LR9_EL2 __LR8_EL2(1) -#define ICH_LR10_EL2 __LR8_EL2(2) -#define ICH_LR11_EL2 __LR8_EL2(3) -#define ICH_LR12_EL2 __LR8_EL2(4) -#define ICH_LR13_EL2 __LR8_EL2(5) -#define ICH_LR14_EL2 __LR8_EL2(6) -#define ICH_LR15_EL2 __LR8_EL2(7) - -#define __AP0Rx_EL2(x) sys_reg(3, 4, 12, 8, x) -#define ICH_AP0R0_EL2 __AP0Rx_EL2(0) -#define ICH_AP0R1_EL2 __AP0Rx_EL2(1) -#define ICH_AP0R2_EL2 __AP0Rx_EL2(2) -#define ICH_AP0R3_EL2 __AP0Rx_EL2(3) - -#define __AP1Rx_EL2(x) sys_reg(3, 4, 12, 9, x) -#define ICH_AP1R0_EL2 __AP1Rx_EL2(0) -#define ICH_AP1R1_EL2 __AP1Rx_EL2(1) -#define ICH_AP1R2_EL2 __AP1Rx_EL2(2) -#define ICH_AP1R3_EL2 __AP1Rx_EL2(3) +#include #ifndef __ASSEMBLY__ -#include -#include - /* * We need a value to serve as a irq-type for LPIs. Choose one that will * hopefully pique the interest of the reviewer. @@ -386,39 +329,26 @@ struct rdists { u64 flags; }; -static inline void gic_write_eoir(u64 irq) -{ - asm volatile("msr_s " __stringify(ICC_EOIR1_EL1) ", %0" : : "r" (irq)); - isb(); -} - -static inline void gic_write_dir(u64 irq) -{ - asm volatile("msr_s " __stringify(ICC_DIR_EL1) ", %0" : : "r" (irq)); - isb(); -} +struct irq_domain; +int its_cpu_init(void); +int its_init(struct device_node *node, struct rdists *rdists, + struct irq_domain *domain); static inline bool gic_enable_sre(void) { - u64 val; + u32 val; - asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + val = gic_read_sre(); if (val & ICC_SRE_EL1_SRE) return true; val |= ICC_SRE_EL1_SRE; - asm volatile("msr_s " __stringify(ICC_SRE_EL1) ", %0" : : "r" (val)); - isb(); - asm volatile("mrs_s %0, " __stringify(ICC_SRE_EL1) : "=r" (val)); + gic_write_sre(val); + val = gic_read_sre(); return !!(val & ICC_SRE_EL1_SRE); } -struct irq_domain; -int its_cpu_init(void); -int its_init(struct device_node *node, struct rdists *rdists, - struct irq_domain *domain); - #endif #endif -- cgit v1.2.3 From f6c86a41e1dc2214363b00cc0eadb8a5401c892d Mon Sep 17 00:00:00 2001 From: Jean-Philippe Brucker Date: Thu, 1 Oct 2015 13:47:15 +0100 Subject: irqchip/gic-v3: Change unsigned types for AArch32 compatibility This patch does a few simple compatibility-related changes: - change the system register access prototypes to their actual size, - homogenise mpidr accesses with unsigned long, - force the 64bit register values to unsigned long long. Note: the list registers are 64bit on GICv3, but the AArch32 vGIC driver will need to split their values into two 32bit registers: LRn and LRCn. Reviewed-by: Marc Zyngier Signed-off-by: Jean-Philippe Brucker Signed-off-by: Marc Zyngier --- include/linux/irqchip/arm-gic-v3.h | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic-v3.h b/include/linux/irqchip/arm-gic-v3.h index b4ee60076ff8..c9ae0c6ec050 100644 --- a/include/linux/irqchip/arm-gic-v3.h +++ b/include/linux/irqchip/arm-gic-v3.h @@ -265,16 +265,16 @@ /* * Hypervisor interface registers (SRE only) */ -#define ICH_LR_VIRTUAL_ID_MASK ((1UL << 32) - 1) - -#define ICH_LR_EOI (1UL << 41) -#define ICH_LR_GROUP (1UL << 60) -#define ICH_LR_HW (1UL << 61) -#define ICH_LR_STATE (3UL << 62) -#define ICH_LR_PENDING_BIT (1UL << 62) -#define ICH_LR_ACTIVE_BIT (1UL << 63) +#define ICH_LR_VIRTUAL_ID_MASK ((1ULL << 32) - 1) + +#define ICH_LR_EOI (1ULL << 41) +#define ICH_LR_GROUP (1ULL << 60) +#define ICH_LR_HW (1ULL << 61) +#define ICH_LR_STATE (3ULL << 62) +#define ICH_LR_PENDING_BIT (1ULL << 62) +#define ICH_LR_ACTIVE_BIT (1ULL << 63) #define ICH_LR_PHYS_ID_SHIFT 32 -#define ICH_LR_PHYS_ID_MASK (0x3ffUL << ICH_LR_PHYS_ID_SHIFT) +#define ICH_LR_PHYS_ID_MASK (0x3ffULL << ICH_LR_PHYS_ID_SHIFT) #define ICH_MISR_EOI (1 << 0) #define ICH_MISR_U (1 << 1) -- cgit v1.2.3 From e866a2e3950fe2f708d5cc67d641b1725ef7a708 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 1 Oct 2015 23:45:31 +0200 Subject: linux/thermal.h: rename KELVIN_TO_CELSIUS to DECI_KELVIN_TO_CELSIUS The macros KELVIN_TO_CELSIUS and CELSIUS_TO_KELVIN actually convert between deciKelvins and Celsius, so rename them to reflect that. While at it, use a statement expression in DECI_KELVIN_TO_CELSIUS to prevent expanding the argument multiple times and get rid of a few casts. Signed-off-by: Rasmus Villemoes Acked-by: Darren Hart Signed-off-by: Zhang Rui --- include/linux/thermal.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 157d366e761b..4014a59828fc 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -44,9 +44,11 @@ #define THERMAL_WEIGHT_DEFAULT 0 /* Unit conversion macros */ -#define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ - ((long)t-2732+5)/10 : ((long)t-2732-5)/10) -#define CELSIUS_TO_KELVIN(t) ((t)*10+2732) +#define DECI_KELVIN_TO_CELSIUS(t) ({ \ + long _t = (t); \ + ((_t-2732 >= 0) ? (_t-2732+5)/10 : (_t-2732-5)/10); \ +}) +#define CELSIUS_TO_DECI_KELVIN(t) ((t)*10+2732) #define DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, off) (((t) - (off)) * 100) #define DECI_KELVIN_TO_MILLICELSIUS(t) DECI_KELVIN_TO_MILLICELSIUS_WITH_OFFSET(t, 2732) #define MILLICELSIUS_TO_DECI_KELVIN_WITH_OFFSET(t, off) (((t) / 100) + (off)) -- cgit v1.2.3 From e9849777d0e27cdd2902805be51da73e7c79578c Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 9 Oct 2015 23:28:58 +0200 Subject: genirq: Add flag to force mask in disable_irq[_nosync]() If an irq chip does not implement the irq_disable callback, then we use a lazy approach for disabling the interrupt. That means that the interrupt is marked disabled, but the interrupt line is not immediately masked in the interrupt chip. It only becomes masked if the interrupt is raised while it's marked disabled. We use this to avoid possibly expensive mask/unmask operations for common case operations. Unfortunately there are devices which do not allow the interrupt to be disabled easily at the device level. They are forced to use disable_irq_nosync(). This can result in taking each interrupt twice. Instead of enforcing the non lazy mode on all interrupts of a irq chip, provide a settings flag, which can be set by the driver for that particular interrupt line. Reported-and-tested-by: Duc Dang Signed-off-by: Thomas Gleixner Cc: Marc Zyngier Cc: Jason Cooper Link: http://lkml.kernel.org/r/alpine.DEB.2.11.1510092348370.6097@nanos --- include/linux/irq.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irq.h b/include/linux/irq.h index ba72b60b57b1..3c1c96786248 100644 --- a/include/linux/irq.h +++ b/include/linux/irq.h @@ -72,6 +72,7 @@ enum irqchip_irq_state; * IRQ_IS_POLLED - Always polled by another interrupt. Exclude * it from the spurious interrupt detection * mechanism and from core side polling. + * IRQ_DISABLE_UNLAZY - Disable lazy irq disable */ enum { IRQ_TYPE_NONE = 0x00000000, @@ -97,13 +98,14 @@ enum { IRQ_NOTHREAD = (1 << 16), IRQ_PER_CPU_DEVID = (1 << 17), IRQ_IS_POLLED = (1 << 18), + IRQ_DISABLE_UNLAZY = (1 << 19), }; #define IRQF_MODIFY_MASK \ (IRQ_TYPE_SENSE_MASK | IRQ_NOPROBE | IRQ_NOREQUEST | \ IRQ_NOAUTOEN | IRQ_MOVE_PCNTXT | IRQ_LEVEL | IRQ_NO_BALANCING | \ IRQ_PER_CPU | IRQ_NESTED_THREAD | IRQ_NOTHREAD | IRQ_PER_CPU_DEVID | \ - IRQ_IS_POLLED) + IRQ_IS_POLLED | IRQ_DISABLE_UNLAZY) #define IRQ_NO_BALANCING_MASK (IRQ_PER_CPU | IRQ_NO_BALANCING) -- cgit v1.2.3 From ff936a04e5f28b7e0455be0e7fa91334f89e4b44 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 7 Oct 2015 10:55:41 -0700 Subject: bpf: fix cb access in socket filter programs eBPF socket filter programs may see junk in 'u32 cb[5]' area, since it could have been used by protocol layers earlier. For socket filter programs used in af_packet we need to clean 20 bytes of skb->cb area if it could be used by the program. For programs attached to TCP/UDP sockets we need to save/restore these 20 bytes, since it's used by protocol layers. Remove SK_RUN_FILTER macro, since it's no longer used. Long term we may move this bpf cb area to per-cpu scratch, but that requires addition of new 'per-cpu load/store' instructions, so not suitable as a short term fix. Fixes: d691f9e8d440 ("bpf: allow programs to write to certain skb fields") Reported-by: Eric Dumazet Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 6 +++--- include/linux/filter.h | 39 +++++++++++++++++++++++++++++++++++---- 2 files changed, 38 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 3697ad563899..b4fdee6cb686 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -100,6 +100,8 @@ enum bpf_access_type { BPF_WRITE = 2 }; +struct bpf_prog; + struct bpf_verifier_ops { /* return eBPF function prototype for verification */ const struct bpf_func_proto *(*get_func_proto)(enum bpf_func_id func_id); @@ -111,7 +113,7 @@ struct bpf_verifier_ops { u32 (*convert_ctx_access)(enum bpf_access_type type, int dst_reg, int src_reg, int ctx_off, - struct bpf_insn *insn); + struct bpf_insn *insn, struct bpf_prog *prog); }; struct bpf_prog_type_list { @@ -120,8 +122,6 @@ struct bpf_prog_type_list { enum bpf_prog_type type; }; -struct bpf_prog; - struct bpf_prog_aux { atomic_t refcnt; u32 used_map_cnt; diff --git a/include/linux/filter.h b/include/linux/filter.h index 1bbce14bcf17..4165e9ac9e36 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -13,6 +13,7 @@ #include #include #include +#include #include @@ -302,10 +303,6 @@ struct bpf_prog_aux; bpf_size; \ }) -/* Macro to invoke filter function. */ -#define SK_RUN_FILTER(filter, ctx) \ - (*filter->prog->bpf_func)(ctx, filter->prog->insnsi) - #ifdef CONFIG_COMPAT /* A struct sock_filter is architecture independent. */ struct compat_sock_fprog { @@ -329,6 +326,7 @@ struct bpf_prog { kmemcheck_bitfield_begin(meta); u16 jited:1, /* Is our filter JIT'ed? */ gpl_compatible:1, /* Is filter GPL compatible? */ + cb_access:1, /* Is control block accessed? */ dst_needed:1; /* Do we need dst entry? */ kmemcheck_bitfield_end(meta); u32 len; /* Number of filter blocks */ @@ -352,6 +350,39 @@ struct sk_filter { #define BPF_PROG_RUN(filter, ctx) (*filter->bpf_func)(ctx, filter->insnsi) +static inline u32 bpf_prog_run_save_cb(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + u8 *cb_data = qdisc_skb_cb(skb)->data; + u8 saved_cb[QDISC_CB_PRIV_LEN]; + u32 res; + + BUILD_BUG_ON(FIELD_SIZEOF(struct __sk_buff, cb) != + QDISC_CB_PRIV_LEN); + + if (unlikely(prog->cb_access)) { + memcpy(saved_cb, cb_data, sizeof(saved_cb)); + memset(cb_data, 0, sizeof(saved_cb)); + } + + res = BPF_PROG_RUN(prog, skb); + + if (unlikely(prog->cb_access)) + memcpy(cb_data, saved_cb, sizeof(saved_cb)); + + return res; +} + +static inline u32 bpf_prog_run_clear_cb(const struct bpf_prog *prog, + struct sk_buff *skb) +{ + u8 *cb_data = qdisc_skb_cb(skb)->data; + + if (unlikely(prog->cb_access)) + memset(cb_data, 0, QDISC_CB_PRIV_LEN); + return BPF_PROG_RUN(prog, skb); +} + static inline unsigned int bpf_prog_size(unsigned int proglen) { return max(sizeof(struct bpf_prog), -- cgit v1.2.3 From 7968c0e338085eba0ee2f0e0b0d833057a966679 Mon Sep 17 00:00:00 2001 From: Leif Lindholm Date: Wed, 26 Aug 2015 14:24:58 +0100 Subject: efi/arm64: Clean up efi_get_fdt_params() interface As we now have a common debug infrastructure between core and arm64 efi, drop the bit of the interface passing verbose output flags around. Signed-off-by: Leif Lindholm Acked-by: Ard Biesheuvel Tested-by: Ard Biesheuvel Cc: Mark Salter Cc: Catalin Marinas Cc: Will Deacon Signed-off-by: Matt Fleming --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 26ca9e2fd30e..4677d8a1bfd0 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -902,7 +902,7 @@ extern void efi_initialize_iomem_resources(struct resource *code_resource, struct resource *data_resource, struct resource *bss_resource); extern void efi_get_time(struct timespec *now); extern void efi_reserve_boot_services(void); -extern int efi_get_fdt_params(struct efi_fdt_params *params, int verbose); +extern int efi_get_fdt_params(struct efi_fdt_params *params); extern struct efi_memory_map memmap; extern struct kobject *efi_kobj; -- cgit v1.2.3 From bf924863c9445174c6e118f723dc477e2b6ccc7e Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 9 Sep 2015 10:08:15 +0200 Subject: efi: Add support for UEFIv2.5 Properties table Version 2.5 of the UEFI spec introduces a new configuration table called the 'EFI Properties table'. Currently, it is only used to convey whether the Memory Protection feature is enabled, which splits PE/COFF images into separate code and data memory regions. Signed-off-by: Ard Biesheuvel Cc: Leif Lindholm Acked-by: Dave Young Signed-off-by: Matt Fleming --- include/linux/efi.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4677d8a1bfd0..d6a9bee755f2 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -596,6 +596,9 @@ void efi_native_runtime_setup(void); #define DEVICE_TREE_GUID \ EFI_GUID( 0xb1b621d5, 0xf19c, 0x41a5, 0x83, 0x0b, 0xd9, 0x15, 0x2c, 0x69, 0xaa, 0xe0 ) +#define EFI_PROPERTIES_TABLE_GUID \ + EFI_GUID( 0x880aaca3, 0x4adc, 0x4a04, 0x90, 0x79, 0xb7, 0x47, 0x34, 0x08, 0x25, 0xe5 ) + typedef struct { efi_guid_t guid; u64 table; @@ -809,6 +812,15 @@ typedef struct _efi_file_io_interface { #define EFI_FILE_MODE_WRITE 0x0000000000000002 #define EFI_FILE_MODE_CREATE 0x8000000000000000 +typedef struct { + u32 version; + u32 length; + u64 memory_protection_attribute; +} efi_properties_table_t; + +#define EFI_PROPERTIES_TABLE_VERSION 0x00010000 +#define EFI_PROPERTIES_RUNTIME_MEMORY_PROTECTION_NON_EXECUTABLE_PE_DATA 0x1 + #define EFI_INVALID_TABLE_ADDR (~0UL) /* @@ -831,6 +843,7 @@ extern struct efi { unsigned long runtime; /* runtime table */ unsigned long config_table; /* config tables */ unsigned long esrt; /* ESRT table */ + unsigned long properties_table; /* properties table */ efi_get_time_t *get_time; efi_set_time_t *set_time; efi_get_wakeup_time_t *get_wakeup_time; -- cgit v1.2.3 From a1041713349d0b823b492d7b4ea4325d0b5666db Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Wed, 23 Sep 2015 07:29:34 -0700 Subject: efi: Introduce EFI_NX_PE_DATA bit and set it from properties table UEFI v2.5 introduces a runtime memory protection feature that splits PE/COFF runtime images into separate code and data regions. Since this may require special handling by the OS, allocate a EFI_xxx bit to keep track of whether this feature is currently active or not. Signed-off-by: Ard Biesheuvel Cc: Leif Lindholm Signed-off-by: Matt Fleming --- include/linux/efi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index d6a9bee755f2..fa5106c2f9f5 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -973,6 +973,7 @@ extern int __init efi_setup_pcdp_console(char *); #define EFI_PARAVIRT 6 /* Access is via a paravirt interface */ #define EFI_ARCH_1 7 /* First arch-specific bit */ #define EFI_DBG 8 /* Print additional debug info at runtime */ +#define EFI_NX_PE_DATA 9 /* Can runtime data regions be mapped non-executable? */ #ifdef CONFIG_EFI /* -- cgit v1.2.3 From 0f96a99dab366333439e110d6ad253bc7c557c09 Mon Sep 17 00:00:00 2001 From: Taku Izumi Date: Wed, 30 Sep 2015 23:01:56 +0900 Subject: efi: Add "efi_fake_mem" boot option This patch introduces new boot option named "efi_fake_mem". By specifying this parameter, you can add arbitrary attribute to specific memory range. This is useful for debugging of Address Range Mirroring feature. For example, if "efi_fake_mem=2G@4G:0x10000,2G@0x10a0000000:0x10000" is specified, the original (firmware provided) EFI memmap will be updated so that the specified memory regions have EFI_MEMORY_MORE_RELIABLE attribute (0x10000): efi: mem36: [Conventional Memory| | | | | | |WB|WT|WC|UC] range=[0x0000000100000000-0x00000020a0000000) (129536MB) efi: mem36: [Conventional Memory| |MR| | | | |WB|WT|WC|UC] range=[0x0000000100000000-0x0000000180000000) (2048MB) efi: mem37: [Conventional Memory| | | | | | |WB|WT|WC|UC] range=[0x0000000180000000-0x00000010a0000000) (61952MB) efi: mem38: [Conventional Memory| |MR| | | | |WB|WT|WC|UC] range=[0x00000010a0000000-0x0000001120000000) (2048MB) efi: mem39: [Conventional Memory| | | | | | |WB|WT|WC|UC] range=[0x0000001120000000-0x00000020a0000000) (63488MB) And you will find that the following message is output: efi: Memory: 4096M/131455M mirrored memory Signed-off-by: Taku Izumi Cc: Tony Luck Cc: Xishi Qiu Cc: Kamezawa Hiroyuki Cc: Ard Biesheuvel Signed-off-by: Matt Fleming --- include/linux/efi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index fa5106c2f9f5..4d01c1033fce 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -922,6 +922,12 @@ extern struct kobject *efi_kobj; extern int efi_reboot_quirk_mode; extern bool efi_poweroff_required(void); +#ifdef CONFIG_EFI_FAKE_MEMMAP +extern void __init efi_fake_memmap(void); +#else +static inline void efi_fake_memmap(void) { } +#endif + /* Iterate through an efi_memory_map */ #define for_each_efi_memory_desc(m, md) \ for ((md) = (m)->map; \ -- cgit v1.2.3 From b817525a4a80c04e4ca44192d97a1ffa9f2be572 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 2 Oct 2015 14:47:05 -0400 Subject: writeback: bdi_writeback iteration must not skip dying ones bdi_for_each_wb() is used in several places to wake up or issue writeback work items to all wb's (bdi_writeback's) on a given bdi. The iteration is performed by walking bdi->cgwb_tree; however, the tree only indexes wb's which are currently active. For example, when a memcg gets associated with a different blkcg, the old wb is removed from the tree so that the new one can be indexed. The old wb starts dying from then on but will linger till all its inodes are drained. As these dying wb's may still host dirty inodes, writeback operations which affect all wb's must include them. bdi_for_each_wb() skipping dying wb's led to sync(2) missing and failing to sync the inodes belonging to those wb's. This patch adds a RCU protected @bdi->wb_list which lists all wb's beloinging to that bdi. wb's are added on creation and removed on release rather than on the start of destruction. bdi_for_each_wb() usages are replaced with list_for_each[_continue]_rcu() iterations over @bdi->wb_list and bdi_for_each_wb() and its helpers are removed. v2: Updated as per Jan. last_wb ref leak in bdi_split_work_to_wbs() fixed and unnecessary list head severing in cgwb_bdi_destroy() removed. Signed-off-by: Tejun Heo Reported-and-tested-by: Artem Bityutskiy Fixes: ebe41ab0c79d ("writeback: implement bdi_for_each_wb()") Link: http://lkml.kernel.org/g/1443012552.19983.209.camel@gmail.com Cc: Jan Kara Signed-off-by: Jens Axboe --- include/linux/backing-dev-defs.h | 3 ++ include/linux/backing-dev.h | 63 ---------------------------------------- 2 files changed, 3 insertions(+), 63 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index a23209b43842..1b4d69f68c33 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -116,6 +116,8 @@ struct bdi_writeback { struct list_head work_list; struct delayed_work dwork; /* work item used for writeback */ + struct list_head bdi_node; /* anchored at bdi->wb_list */ + #ifdef CONFIG_CGROUP_WRITEBACK struct percpu_ref refcnt; /* used only for !root wb's */ struct fprop_local_percpu memcg_completions; @@ -150,6 +152,7 @@ struct backing_dev_info { atomic_long_t tot_write_bandwidth; struct bdi_writeback wb; /* the root writeback info for this bdi */ + struct list_head wb_list; /* list of all wbs */ #ifdef CONFIG_CGROUP_WRITEBACK struct radix_tree_root cgwb_tree; /* radix tree of active cgroup wbs */ struct rb_root cgwb_congested_tree; /* their congested states */ diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index d5eb4ad1c534..78677e5a65bf 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -408,61 +408,6 @@ static inline void unlocked_inode_to_wb_end(struct inode *inode, bool locked) rcu_read_unlock(); } -struct wb_iter { - int start_memcg_id; - struct radix_tree_iter tree_iter; - void **slot; -}; - -static inline struct bdi_writeback *__wb_iter_next(struct wb_iter *iter, - struct backing_dev_info *bdi) -{ - struct radix_tree_iter *titer = &iter->tree_iter; - - WARN_ON_ONCE(!rcu_read_lock_held()); - - if (iter->start_memcg_id >= 0) { - iter->slot = radix_tree_iter_init(titer, iter->start_memcg_id); - iter->start_memcg_id = -1; - } else { - iter->slot = radix_tree_next_slot(iter->slot, titer, 0); - } - - if (!iter->slot) - iter->slot = radix_tree_next_chunk(&bdi->cgwb_tree, titer, 0); - if (iter->slot) - return *iter->slot; - return NULL; -} - -static inline struct bdi_writeback *__wb_iter_init(struct wb_iter *iter, - struct backing_dev_info *bdi, - int start_memcg_id) -{ - iter->start_memcg_id = start_memcg_id; - - if (start_memcg_id) - return __wb_iter_next(iter, bdi); - else - return &bdi->wb; -} - -/** - * bdi_for_each_wb - walk all wb's of a bdi in ascending memcg ID order - * @wb_cur: cursor struct bdi_writeback pointer - * @bdi: bdi to walk wb's of - * @iter: pointer to struct wb_iter to be used as iteration buffer - * @start_memcg_id: memcg ID to start iteration from - * - * Iterate @wb_cur through the wb's (bdi_writeback's) of @bdi in ascending - * memcg ID order starting from @start_memcg_id. @iter is struct wb_iter - * to be used as temp storage during iteration. rcu_read_lock() must be - * held throughout iteration. - */ -#define bdi_for_each_wb(wb_cur, bdi, iter, start_memcg_id) \ - for ((wb_cur) = __wb_iter_init(iter, bdi, start_memcg_id); \ - (wb_cur); (wb_cur) = __wb_iter_next(iter, bdi)) - #else /* CONFIG_CGROUP_WRITEBACK */ static inline bool inode_cgwb_enabled(struct inode *inode) @@ -522,14 +467,6 @@ static inline void wb_blkcg_offline(struct blkcg *blkcg) { } -struct wb_iter { - int next_id; -}; - -#define bdi_for_each_wb(wb_cur, bdi, iter, start_blkcg_id) \ - for ((iter)->next_id = (start_blkcg_id); \ - ({ (wb_cur) = !(iter)->next_id++ ? &(bdi)->wb : NULL; }); ) - static inline int inode_congested(struct inode *inode, int cong_bits) { return wb_congested(&inode_to_bdi(inode)->wb, cong_bits); -- cgit v1.2.3 From c5edf9cdc4c483b9a94c03fc0b9f769bd090bf3e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 29 Sep 2015 13:04:26 -0400 Subject: writeback: fix incorrect calculation of available memory for memcg domains For memcg domains, the amount of available memory was calculated as min(the amount currently in use + headroom according to memcg, total clean memory) This isn't quite correct as what should be capped by the amount of clean memory is the headroom, not the sum of memory in use and headroom. For example, if a memcg domain has a significant amount of dirty memory, the above can lead to a value which is lower than the current amount in use which doesn't make much sense. In most circumstances, the above leads to a number which is somewhat but not drastically lower. As the amount of memory which can be readily allocated to the memcg domain is capped by the amount of system-wide clean memory which is not already assigned to the memcg itself, the number we want is the amount currently in use + min(headroom according to memcg, clean memory elsewhere in the system) This patch updates mem_cgroup_wb_stats() to return the number of filepages and headroom instead of the calculated available pages. mdtc_cap_avail() is renamed to mdtc_calc_avail() and performs the above calculation from file, headroom, dirty and globally clean pages. v2: Dummy mem_cgroup_wb_stats() implementation wasn't updated leading to build failure when !CGROUP_WRITEBACK. Fixed. Signed-off-by: Tejun Heo Fixes: c2aa723a6093 ("writeback: implement memcg writeback domain based throttling") Signed-off-by: Jens Axboe --- include/linux/memcontrol.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 6452ff4c463f..3e3318ddfc0e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -676,8 +676,9 @@ enum { struct list_head *mem_cgroup_cgwb_list(struct mem_cgroup *memcg); struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb); -void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pavail, - unsigned long *pdirty, unsigned long *pwriteback); +void mem_cgroup_wb_stats(struct bdi_writeback *wb, unsigned long *pfilepages, + unsigned long *pheadroom, unsigned long *pdirty, + unsigned long *pwriteback); #else /* CONFIG_CGROUP_WRITEBACK */ @@ -687,7 +688,8 @@ static inline struct wb_domain *mem_cgroup_wb_domain(struct bdi_writeback *wb) } static inline void mem_cgroup_wb_stats(struct bdi_writeback *wb, - unsigned long *pavail, + unsigned long *pfilepages, + unsigned long *pheadroom, unsigned long *pdirty, unsigned long *pwriteback) { -- cgit v1.2.3 From f96b3c4f34b294a2293a5aa1d55e12e66aee055d Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 29 Sep 2015 15:12:37 +0200 Subject: PM / Domains: Remove in_progress counter from struct generic_pm_domain Commit ba2bbfbf6307 ("PM / Domains: Remove intermediate states..") changed the power off sequence (pm_genpd_poweroff()), which from locking point of view means the genpd mutex is held throughout the sequence. The above change means the in_progress counter can't be updated while pm_genpd_poweroff() is executing, which allows us to remove the counter. Instead we inform pm_genpd_poweroff() via a bool parameter, to indicate whether we call it from the scheduled work or from the ->runtime_suspend() callback, since that all that matters. Signed-off-by: Ulf Hansson Reviewed-by: Lina Iyer Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 384b1d193707..0eaa730c890d 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -47,7 +47,6 @@ struct generic_pm_domain { struct dev_power_governor *gov; struct work_struct power_off_work; const char *name; - unsigned int in_progress; /* Number of devices being suspended now */ atomic_t sd_count; /* Number of subdomains with power "on" */ enum gpd_status status; /* Current state of the domain */ unsigned int device_count; /* Number of devices */ -- cgit v1.2.3 From bb4b72fc63d4c1c2ccd5e5af95e48b77d6cad80c Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 6 Oct 2015 14:27:42 +0200 Subject: PM / Domains: Remove pm_genpd_poweroff_unused() API As the last user of the pm_genpd_poweroff_unused() API has moved into relying on genpd to deal with this internally from a late_initcall, let's remove the API. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index 0eaa730c890d..a736f0f0ca66 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -125,9 +125,7 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); - extern int pm_genpd_poweron(struct generic_pm_domain *genpd); -extern void pm_genpd_poweroff_unused(void); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -170,7 +168,6 @@ static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) { return -ENOSYS; } -static inline void pm_genpd_poweroff_unused(void) {} #endif static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, -- cgit v1.2.3 From ea823c7cbffa7bae311d78761866ac4db344c89b Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 6 Oct 2015 14:27:51 +0200 Subject: PM / Domains: Remove pm_genpd_poweron() API Once genpd could be configured to be built with CONFIG_PM_RUNTIME unset (nowadays CONFIG_PM), the pm_genpd_poweron() API served a purpose, since it allowed users to power on a PM domain. As such configuration no longer is supported, users shall solely rely on using some of the runtime PM APIs to power on a PM domain. Signed-off-by: Ulf Hansson Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index a736f0f0ca66..f4dd8105b024 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -125,7 +125,6 @@ extern int pm_genpd_remove_subdomain(struct generic_pm_domain *genpd, struct generic_pm_domain *target); extern void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off); -extern int pm_genpd_poweron(struct generic_pm_domain *genpd); extern struct dev_power_governor simple_qos_governor; extern struct dev_power_governor pm_domain_always_on_gov; @@ -164,10 +163,6 @@ static inline void pm_genpd_init(struct generic_pm_domain *genpd, struct dev_power_governor *gov, bool is_off) { } -static inline int pm_genpd_poweron(struct generic_pm_domain *genpd) -{ - return -ENOSYS; -} #endif static inline int pm_genpd_add_device(struct generic_pm_domain *genpd, -- cgit v1.2.3 From 0ad95472bf169a3501991f8f33f5147f792a8116 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 23 Sep 2015 15:49:29 +0300 Subject: lockd: create NSM handles per net namespace Commit cb7323fffa85 ("lockd: create and use per-net NSM RPC clients on MON/UNMON requests") introduced per-net NSM RPC clients. Unfortunately this doesn't make any sense without per-net nsm_handle. E.g. the following scenario could happen Two hosts (X and Y) in different namespaces (A and B) share the same nsm struct. 1. nsm_monitor(host_X) called => NSM rpc client created, nsm->sm_monitored bit set. 2. nsm_mointor(host-Y) called => nsm->sm_monitored already set, we just exit. Thus in namespace B ln->nsm_clnt == NULL. 3. host X destroyed => nsm->sm_count decremented to 1 4. host Y destroyed => nsm_unmonitor() => nsm_mon_unmon() => NULL-ptr dereference of *ln->nsm_clnt So this could be fixed by making per-net nsm_handles list, instead of global. Thus different net namespaces will not be able share the same nsm_handle. Signed-off-by: Andrey Ryabinin Cc: Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index ff82a32871b5..fd3b65bf51b5 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -235,7 +235,8 @@ void nlm_rebind_host(struct nlm_host *); struct nlm_host * nlm_get_host(struct nlm_host *); void nlm_shutdown_hosts(void); void nlm_shutdown_hosts_net(struct net *net); -void nlm_host_rebooted(const struct nlm_reboot *); +void nlm_host_rebooted(const struct net *net, + const struct nlm_reboot *); /* * Host monitoring @@ -243,11 +244,13 @@ void nlm_host_rebooted(const struct nlm_reboot *); int nsm_monitor(const struct nlm_host *host); void nsm_unmonitor(const struct nlm_host *host); -struct nsm_handle *nsm_get_handle(const struct sockaddr *sap, +struct nsm_handle *nsm_get_handle(const struct net *net, + const struct sockaddr *sap, const size_t salen, const char *hostname, const size_t hostname_len); -struct nsm_handle *nsm_reboot_lookup(const struct nlm_reboot *info); +struct nsm_handle *nsm_reboot_lookup(const struct net *net, + const struct nlm_reboot *info); void nsm_release(struct nsm_handle *nsm); /* -- cgit v1.2.3 From 1be7f75d1668d6296b80bf35dcf6762393530afc Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 7 Oct 2015 22:23:21 -0700 Subject: bpf: enable non-root eBPF programs In order to let unprivileged users load and execute eBPF programs teach verifier to prevent pointer leaks. Verifier will prevent - any arithmetic on pointers (except R10+Imm which is used to compute stack addresses) - comparison of pointers (except if (map_value_ptr == 0) ... ) - passing pointers to helper functions - indirectly passing pointers in stack to helper functions - returning pointer from bpf program - storing pointers into ctx or maps Spill/fill of pointers into stack is allowed, but mangling of pointers stored in the stack or reading them byte by byte is not. Within bpf programs the pointers do exist, since programs need to be able to access maps, pass skb pointer to LD_ABS insns, etc but programs cannot pass such pointer values to the outside or obfuscate them. Only allow BPF_PROG_TYPE_SOCKET_FILTER unprivileged programs, so that socket filters (tcpdump), af_packet (quic acceleration) and future kcm can use it. tracing and tc cls/act program types still require root permissions, since tracing actually needs to be able to see all kernel pointers and tc is for root only. For example, the following unprivileged socket filter program is allowed: int bpf_prog1(struct __sk_buff *skb) { u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); u64 *value = bpf_map_lookup_elem(&my_map, &index); if (value) *value += skb->len; return 0; } but the following program is not: int bpf_prog1(struct __sk_buff *skb) { u32 index = load_byte(skb, ETH_HLEN + offsetof(struct iphdr, protocol)); u64 *value = bpf_map_lookup_elem(&my_map, &index); if (value) *value += (u64) skb; return 0; } since it would leak the kernel address into the map. Unprivileged socket filter bpf programs have access to the following helper functions: - map lookup/update/delete (but they cannot store kernel pointers into them) - get_random (it's already exposed to unprivileged user space) - get_smp_processor_id - tail_call into another socket filter program - ktime_get_ns The feature is controlled by sysctl kernel.unprivileged_bpf_disabled. This toggle defaults to off (0), but can be set true (1). Once true, bpf programs and maps cannot be accessed from unprivileged process, and the toggle cannot be set back to false. Signed-off-by: Alexei Starovoitov Reviewed-by: Kees Cook Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index b4fdee6cb686..02fa3db3c1ec 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -167,6 +167,8 @@ void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); +extern int sysctl_unprivileged_bpf_disabled; + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else -- cgit v1.2.3 From aaac3ba95e4c8b496d22f68bd1bc01cfbf525eca Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 7 Oct 2015 22:23:22 -0700 Subject: bpf: charge user for creation of BPF maps and programs since eBPF programs and maps use kernel memory consider it 'locked' memory from user accounting point of view and charge it against RLIMIT_MEMLOCK limit. This limit is typically set to 64Kbytes by distros, so almost all bpf+tracing programs would need to increase it, since they use maps, but kernel charges maximum map size upfront. For example the hash map of 1024 elements will be charged as 64Kbyte. It's inconvenient for current users and changes current behavior for root, but probably worth doing to be consistent root vs non-root. Similar accounting logic is done by mmap of perf_event. Signed-off-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 +++ include/linux/sched.h | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 02fa3db3c1ec..e3a51b74e275 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -36,6 +36,8 @@ struct bpf_map { u32 key_size; u32 value_size; u32 max_entries; + u32 pages; + struct user_struct *user; const struct bpf_map_ops *ops; struct work_struct work; }; @@ -128,6 +130,7 @@ struct bpf_prog_aux { const struct bpf_verifier_ops *ops; struct bpf_map **used_maps; struct bpf_prog *prog; + struct user_struct *user; union { struct work_struct work; struct rcu_head rcu; diff --git a/include/linux/sched.h b/include/linux/sched.h index b7b9501b41af..4817df5fffae 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -840,7 +840,7 @@ struct user_struct { struct hlist_node uidhash_node; kuid_t uid; -#ifdef CONFIG_PERF_EVENTS +#if defined(CONFIG_PERF_EVENTS) || defined(CONFIG_BPF_SYSCALL) atomic_long_t locked_vm; #endif }; -- cgit v1.2.3 From d475f090bf1c0dc2999e98bbf2e7cb2243358849 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 8 Oct 2015 19:33:24 -0700 Subject: tcp: shrink tcp_timewait_sock by 8 bytes Reducing tcp_timewait_sock from 280 bytes to 272 bytes allows SLAB to pack 15 objects per page instead of 14 (on x86) Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index e442e6e9a365..86a7edaa6797 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -356,8 +356,8 @@ static inline struct tcp_sock *tcp_sk(const struct sock *sk) struct tcp_timewait_sock { struct inet_timewait_sock tw_sk; - u32 tw_rcv_nxt; - u32 tw_snd_nxt; +#define tw_rcv_nxt tw_sk.__tw_common.skc_tw_rcv_nxt +#define tw_snd_nxt tw_sk.__tw_common.skc_tw_snd_nxt u32 tw_rcv_wnd; u32 tw_ts_offset; u32 tw_ts_recent; -- cgit v1.2.3 From a4288289f585d42a19145f266e214acb165fe9b3 Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Oct 2015 15:48:25 +0200 Subject: wireless: update robust action frame list Unprotected DMG and VHT action frames are not protected, reflect that in the list. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index dcfb2f43d316..0109f3847e9a 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -2397,6 +2397,8 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) return *category != WLAN_CATEGORY_PUBLIC && *category != WLAN_CATEGORY_HT && *category != WLAN_CATEGORY_SELF_PROTECTED && + *category != WLAN_CATEGORY_UNPROT_DMG && + *category != WLAN_CATEGORY_VHT && *category != WLAN_CATEGORY_VENDOR_SPECIFIC; } -- cgit v1.2.3 From af61426187cd854bffe013ca8547bd8fa3c4dfbf Mon Sep 17 00:00:00 2001 From: Johannes Berg Date: Wed, 7 Oct 2015 15:48:26 +0200 Subject: wireless: add WNM action frame categories Add the WNM and unprotected WNM categories and mark the latter as not robust. Signed-off-by: Johannes Berg --- include/linux/ieee80211.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ieee80211.h b/include/linux/ieee80211.h index 0109f3847e9a..452c0b0d2f32 100644 --- a/include/linux/ieee80211.h +++ b/include/linux/ieee80211.h @@ -1932,6 +1932,8 @@ enum ieee80211_category { WLAN_CATEGORY_HT = 7, WLAN_CATEGORY_SA_QUERY = 8, WLAN_CATEGORY_PROTECTED_DUAL_OF_ACTION = 9, + WLAN_CATEGORY_WNM = 10, + WLAN_CATEGORY_WNM_UNPROTECTED = 11, WLAN_CATEGORY_TDLS = 12, WLAN_CATEGORY_MESH_ACTION = 13, WLAN_CATEGORY_MULTIHOP_ACTION = 14, @@ -2396,6 +2398,7 @@ static inline bool _ieee80211_is_robust_mgmt_frame(struct ieee80211_hdr *hdr) category = ((u8 *) hdr) + 24; return *category != WLAN_CATEGORY_PUBLIC && *category != WLAN_CATEGORY_HT && + *category != WLAN_CATEGORY_WNM_UNPROTECTED && *category != WLAN_CATEGORY_SELF_PROTECTED && *category != WLAN_CATEGORY_UNPROT_DMG && *category != WLAN_CATEGORY_VHT && -- cgit v1.2.3 From 1ac710e08a86e4723286873db73edb2a6e99f591 Mon Sep 17 00:00:00 2001 From: Adam Thomson Date: Wed, 7 Oct 2015 14:54:08 +0100 Subject: mfd: da9150: Add support for Fuel-Gauge Signed-off-by: Adam Thomson Signed-off-by: Lee Jones --- include/linux/mfd/da9150/core.h | 19 ++++++++++++++++++- 1 file changed, 18 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9150/core.h b/include/linux/mfd/da9150/core.h index 76e668933a77..1bf50caeb9fa 100644 --- a/include/linux/mfd/da9150/core.h +++ b/include/linux/mfd/da9150/core.h @@ -15,6 +15,7 @@ #define __DA9150_CORE_H #include +#include #include #include @@ -46,23 +47,39 @@ #define DA9150_IRQ_GPADC 19 #define DA9150_IRQ_WKUP 20 +/* I2C sub-device address */ +#define DA9150_QIF_I2C_ADDR_LSB 0x5 + +struct da9150_fg_pdata { + u32 update_interval; /* msecs */ + u8 warn_soc_lvl; /* % value */ + u8 crit_soc_lvl; /* % value */ +}; + struct da9150_pdata { int irq_base; + struct da9150_fg_pdata *fg_pdata; }; struct da9150 { struct device *dev; struct regmap *regmap; + struct i2c_client *core_qif; + struct regmap_irq_chip_data *regmap_irq_data; int irq; int irq_base; }; -/* Device I/O */ +/* Device I/O - Query Interface for FG and standard register access */ +void da9150_read_qif(struct da9150 *da9150, u8 addr, int count, u8 *buf); +void da9150_write_qif(struct da9150 *da9150, u8 addr, int count, const u8 *buf); + u8 da9150_reg_read(struct da9150 *da9150, u16 reg); void da9150_reg_write(struct da9150 *da9150, u16 reg, u8 val); void da9150_set_bits(struct da9150 *da9150, u16 reg, u8 mask, u8 val); void da9150_bulk_read(struct da9150 *da9150, u16 reg, int count, u8 *buf); void da9150_bulk_write(struct da9150 *da9150, u16 reg, int count, const u8 *buf); + #endif /* __DA9150_CORE_H */ -- cgit v1.2.3 From 42160a041db89807691b2a3fbf42e36a98b6019e Mon Sep 17 00:00:00 2001 From: Alexandre Belloni Date: Thu, 8 Oct 2015 16:56:07 +0200 Subject: can: at91: remove at91_can_data struct at91_can_data was used to pass a callback to the driver, allowing it to switch the transceiver on and off. As all at91 boards are now using DT, this is not used anymore, remove that structure. Signed-off-by: Alexandre Belloni Signed-off-by: Marc Kleine-Budde --- include/linux/platform_data/atmel.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/atmel.h b/include/linux/platform_data/atmel.h index 527a85c61924..c121ddf74f7f 100644 --- a/include/linux/platform_data/atmel.h +++ b/include/linux/platform_data/atmel.h @@ -74,11 +74,6 @@ struct atmel_uart_data { struct serial_rs485 rs485; /* rs485 settings */ }; -/* CAN */ -struct at91_can_data { - void (*transceiver_switch)(int on); -}; - /* FIXME: this needs a better location, but gets stuff building again */ extern int at91_suspend_entering_slow_clock(void); -- cgit v1.2.3 From f110711a6053f08731858aa91420104094188973 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:30 +0100 Subject: irqdomain: Convert irqdomain-%3Eof_node to fwnode Now that we have everyone accessing the of_node field via the irq_domain_get_of_node accessor, it is pretty easy to swap it for a pointer to a fwnode_handle. This translates into a few limited changes in __irq_domain_add, and an updated irq_domain_get_of_node. Signed-off-by: Marc Zyngier Reviewed-and-tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-3-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index f644fdb06dd6..2f508f4ac2ea 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -34,6 +34,7 @@ #include #include +#include #include struct device_node; @@ -130,7 +131,7 @@ struct irq_domain { unsigned int flags; /* Optional data */ - struct device_node *of_node; + struct fwnode_handle *fwnode; enum irq_domain_bus_token bus_token; struct irq_domain_chip_generic *gc; #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY @@ -163,7 +164,7 @@ enum { static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) { - return d->of_node; + return to_of_node(d->fwnode); } #ifdef CONFIG_IRQ_DOMAIN -- cgit v1.2.3 From 130b8c6c8d86075304952241bf2365cea6489df1 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:31 +0100 Subject: irqdomain: Allow irq domain lookup by fwnode So far, our irq domains are still looked up by device node. Let's change this and allow a domain to be looked up using a fwnode_handle pointer. The existing interfaces are preserved with a couple of helpers. Signed-off-by: Marc Zyngier Reviewed-and-tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-4-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 2f508f4ac2ea..607c1856cc01 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -183,10 +183,17 @@ struct irq_domain *irq_domain_add_legacy(struct device_node *of_node, irq_hw_number_t first_hwirq, const struct irq_domain_ops *ops, void *host_data); -extern struct irq_domain *irq_find_matching_host(struct device_node *node, - enum irq_domain_bus_token bus_token); +extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, + enum irq_domain_bus_token bus_token); extern void irq_set_default_host(struct irq_domain *host); +static inline struct irq_domain *irq_find_matching_host(struct device_node *node, + enum irq_domain_bus_token bus_token) +{ + return irq_find_matching_fwnode(node ? &node->fwnode : NULL, + bus_token); +} + static inline struct irq_domain *irq_find_host(struct device_node *node) { return irq_find_matching_host(node, DOMAIN_BUS_ANY); -- cgit v1.2.3 From 11e4438ee330fab0f216ee7cc1b651cb2ddceb5d Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:32 +0100 Subject: irqdomain: Introduce a firmware-specific IRQ specifier structure So far the closest thing to a generic IRQ specifier structure is of_phandle_args, which happens to be pretty OF specific (the of_node pointer in there is quite annoying). Let's introduce 'struct irq_fwspec' that can be used in place of of_phandle_args for OF, but also for other firmware implementations (that'd be ACPI). This is used together with a new 'translate' method that is the pendent of 'xlate'. We convert irq_create_of_mapping to use this new structure (with a small hack that will be removed later). Signed-off-by: Marc Zyngier Reviewed-and-tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-5-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 607c1856cc01..533c974b9d94 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -46,6 +46,24 @@ struct irq_data; /* Number of irqs reserved for a legacy isa controller */ #define NUM_ISA_INTERRUPTS 16 +#define IRQ_DOMAIN_IRQ_SPEC_PARAMS 16 + +/** + * struct irq_fwspec - generic IRQ specifier structure + * + * @fwnode: Pointer to a firmware-specific descriptor + * @param_count: Number of device-specific parameters + * @param: Device-specific parameters + * + * This structure, directly modeled after of_phandle_args, is used to + * pass a device-specific description of an interrupt. + */ +struct irq_fwspec { + struct fwnode_handle *fwnode; + int param_count; + u32 param[IRQ_DOMAIN_IRQ_SPEC_PARAMS]; +}; + /* * Should several domains have the same device node, but serve * different purposes (for example one domain is for PCI/MSI, and the @@ -92,6 +110,8 @@ struct irq_domain_ops { unsigned int nr_irqs); void (*activate)(struct irq_domain *d, struct irq_data *irq_data); void (*deactivate)(struct irq_domain *d, struct irq_data *irq_data); + int (*translate)(struct irq_domain *d, struct irq_fwspec *fwspec, + unsigned long *out_hwirq, unsigned int *out_type); #endif }; -- cgit v1.2.3 From c0131f09de8c2d301814cac86d78f643b8ee0574 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:34 +0100 Subject: irqdomain: Introduce irq_create_fwspec_mapping Just like we have irq_create_of_mapping, irq_create_fwspec_mapping creates a IRQ domain mapping for an interrupt described in a struct irq_fwspec. irq_create_of_mapping gets rewritten in terms of the new function, and the hack we introduced before gets removed (now that no stacked irqchip uses of_phandle_args anymore). Signed-off-by: Marc Zyngier Reviewed-and-tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-7-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 533c974b9d94..7e7842e90d40 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -267,6 +267,7 @@ extern void irq_domain_disassociate(struct irq_domain *domain, extern unsigned int irq_create_mapping(struct irq_domain *host, irq_hw_number_t hwirq); +extern unsigned int irq_create_fwspec_mapping(struct irq_fwspec *fwspec); extern void irq_dispose_mapping(unsigned int virq); /** -- cgit v1.2.3 From 1bf4ddc46c5d6123897a54cea4ffe3e90f30600b Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:35 +0100 Subject: irqdomain: Introduce irq_domain_create_{linear, tree} Just like we have irq_domain_add_{linear,tree} to create a irq domain identified by an of_node, introduce irq_domain_create_{linear,tree} that do the same thing, except that they take a struct fwnode_handle. Existing functions get rewritten in terms of the new ones so that everything keeps working as before (and __irq_domain_add is now fwnode_handle based as well). Signed-off-by: Marc Zyngier Reviewed-and-tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-8-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 31 +++++++++++++++++++++++++------ 1 file changed, 25 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 7e7842e90d40..995d4c5100d3 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -188,7 +188,7 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) } #ifdef CONFIG_IRQ_DOMAIN -struct irq_domain *__irq_domain_add(struct device_node *of_node, int size, +struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, void *host_data); @@ -207,11 +207,15 @@ extern struct irq_domain *irq_find_matching_fwnode(struct fwnode_handle *fwnode, enum irq_domain_bus_token bus_token); extern void irq_set_default_host(struct irq_domain *host); +static inline struct fwnode_handle *of_node_to_fwnode(struct device_node *node) +{ + return node ? &node->fwnode : NULL; +} + static inline struct irq_domain *irq_find_matching_host(struct device_node *node, enum irq_domain_bus_token bus_token) { - return irq_find_matching_fwnode(node ? &node->fwnode : NULL, - bus_token); + return irq_find_matching_fwnode(of_node_to_fwnode(node), bus_token); } static inline struct irq_domain *irq_find_host(struct device_node *node) @@ -231,14 +235,14 @@ static inline struct irq_domain *irq_domain_add_linear(struct device_node *of_no const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node, size, size, 0, ops, host_data); + return __irq_domain_add(of_node_to_fwnode(of_node), size, size, 0, ops, host_data); } static inline struct irq_domain *irq_domain_add_nomap(struct device_node *of_node, unsigned int max_irq, const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node, 0, max_irq, max_irq, ops, host_data); + return __irq_domain_add(of_node_to_fwnode(of_node), 0, max_irq, max_irq, ops, host_data); } static inline struct irq_domain *irq_domain_add_legacy_isa( struct device_node *of_node, @@ -252,7 +256,22 @@ static inline struct irq_domain *irq_domain_add_tree(struct device_node *of_node const struct irq_domain_ops *ops, void *host_data) { - return __irq_domain_add(of_node, 0, ~0, 0, ops, host_data); + return __irq_domain_add(of_node_to_fwnode(of_node), 0, ~0, 0, ops, host_data); +} + +static inline struct irq_domain *irq_domain_create_linear(struct fwnode_handle *fwnode, + unsigned int size, + const struct irq_domain_ops *ops, + void *host_data) +{ + return __irq_domain_add(fwnode, size, size, 0, ops, host_data); +} + +static inline struct irq_domain *irq_domain_create_tree(struct fwnode_handle *fwnode, + const struct irq_domain_ops *ops, + void *host_data) +{ + return __irq_domain_add(fwnode, 0, ~0, 0, ops, host_data); } extern void irq_domain_remove(struct irq_domain *host); -- cgit v1.2.3 From b145dcc45a6af0abfcf9b4de8006d40559c50fc6 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:36 +0100 Subject: irqdomain: Add a fwnode_handle allocator In order to be able to reference an irqdomain from ACPI, we need to be able to create an identifier, which is usually a struct device_node. This device node does't really fit the ACPI infrastructure, so we cunningly allocate a new structure containing a fwnode_handle, and return that. This structure doesn't really point to a device (interrupt controllers are not "real" devices in Linux), but as we cannot really deny that they exist, we create them with a new fwnode_type (FWNODE_IRQCHIP). Signed-off-by: Marc Zyngier Acked-by: Rafael J. Wysocki Reviewed-and-tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-9-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/fwnode.h | 1 + include/linux/irqdomain.h | 2 ++ 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fwnode.h b/include/linux/fwnode.h index 0408545bce42..37ec668546ab 100644 --- a/include/linux/fwnode.h +++ b/include/linux/fwnode.h @@ -17,6 +17,7 @@ enum fwnode_type { FWNODE_OF, FWNODE_ACPI, FWNODE_PDATA, + FWNODE_IRQCHIP, }; struct fwnode_handle { diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 995d4c5100d3..949caa728758 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -188,6 +188,8 @@ static inline struct device_node *irq_domain_get_of_node(struct irq_domain *d) } #ifdef CONFIG_IRQ_DOMAIN +struct fwnode_handle *irq_domain_alloc_fwnode(void *data); +void irq_domain_free_fwnode(struct fwnode_handle *fwnode); struct irq_domain *__irq_domain_add(struct fwnode_handle *fwnode, int size, irq_hw_number_t hwirq_max, int direct_max, const struct irq_domain_ops *ops, -- cgit v1.2.3 From 2bc6eba4a322e70eac8cde76442c4ac90699fb39 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:38 +0100 Subject: acpi/gsi: Add acpi_set_irq_model to initialize the GSI layer In order to start embrassing irqdomains at the GSI level, introduce a new initializer: void acpi_set_irq_model(enum acpi_irq_model_id model, struct fwnode_handle *fwnode); where: - model is the value assigned to acpi_irq_model - fwnode is the identifier for the irqdomain mapping GSI interrupts As nobody calls this code yet, the current code is (mostly) left in place. Signed-off-by: Marc Zyngier Acked-by: Rafael J. Wysocki Reviewed-and-tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-11-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/acpi.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 43856d19cf4d..d863e12bbead 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -201,6 +201,9 @@ int acpi_register_gsi (struct device *dev, u32 gsi, int triggering, int polarity int acpi_gsi_to_irq (u32 gsi, unsigned int *irq); int acpi_isa_irq_to_gsi (unsigned isa_irq, u32 *gsi); +void acpi_set_irq_model(enum acpi_irq_model_id model, + struct fwnode_handle *fwnode); + #ifdef CONFIG_X86_IO_APIC extern int acpi_get_override_irq(u32 gsi, int *trigger, int *polarity); #else -- cgit v1.2.3 From e81a7cd96bd55bb57d92486c514b7b8f8c8cd8ce Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:39 +0100 Subject: irqchip/gic: Get rid of gic_init_bases() Since nobody is using gic_init_bases anymore outside of the GIC driver itself, let's do a bit of housekeeping and remove the now useless entry point. Only gic_init() is now exposed to the rest of the kernel for the benefit of legacy systems. Signed-off-by: Marc Zyngier Reviewed-and-tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-12-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqchip/arm-gic.h | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqchip/arm-gic.h b/include/linux/irqchip/arm-gic.h index b8901dfd9e95..bae69e5d693c 100644 --- a/include/linux/irqchip/arm-gic.h +++ b/include/linux/irqchip/arm-gic.h @@ -100,16 +100,11 @@ struct device_node; -void gic_init_bases(unsigned int, int, void __iomem *, void __iomem *, - u32 offset, struct device_node *); void gic_cascade_irq(unsigned int gic_nr, unsigned int irq); int gic_cpu_if_down(unsigned int gic_nr); -static inline void gic_init(unsigned int nr, int start, - void __iomem *dist , void __iomem *cpu) -{ - gic_init_bases(nr, start, dist, cpu, 0, NULL); -} +void gic_init(unsigned int nr, int start, + void __iomem *dist , void __iomem *cpu); int gicv2m_of_init(struct device_node *node, struct irq_domain *parent); -- cgit v1.2.3 From 2a5e9a072da6469a37d1f0b1577416f51223c280 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:43 +0100 Subject: irqdomain: Introduce irq_domain_create_hierarchy As we're about to start converting the various MSI layers to use fwnode_handle instead of device_node, add irq_domain_create_hierarchy as a directly equivalent of irq_domain_add_hierarchy (which still exists as a compatibility interface). Signed-off-by: Marc Zyngier Tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-16-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 949caa728758..2b3340ae915d 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -340,10 +340,23 @@ extern void irq_domain_set_info(struct irq_domain *domain, unsigned int virq, void *chip_data, irq_flow_handler_t handler, void *handler_data, const char *handler_name); #ifdef CONFIG_IRQ_DOMAIN_HIERARCHY -extern struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent, +extern struct irq_domain *irq_domain_create_hierarchy(struct irq_domain *parent, unsigned int flags, unsigned int size, - struct device_node *node, + struct fwnode_handle *fwnode, const struct irq_domain_ops *ops, void *host_data); + +static inline struct irq_domain *irq_domain_add_hierarchy(struct irq_domain *parent, + unsigned int flags, + unsigned int size, + struct device_node *node, + const struct irq_domain_ops *ops, + void *host_data) +{ + return irq_domain_create_hierarchy(parent, flags, size, + of_node_to_fwnode(node), + ops, host_data); +} + extern int __irq_domain_alloc_irqs(struct irq_domain *domain, int irq_base, unsigned int nr_irqs, int node, void *arg, bool realloc); -- cgit v1.2.3 From be5436c83ac8921f33fe07323fab03c6644ce52e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:44 +0100 Subject: irqdomain/msi: Use fwnode instead of of_node As we continue to push of_node towards the outskirts of irq domains, let's start tackling the case of msi_create_irq_domain and its little friends. This has limited impact in both PCI/MSI, platform MSI, and a few drivers. Signed-off-by: Marc Zyngier Tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-17-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/msi.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index ad939d0ba816..32a24b9a9556 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -174,6 +174,7 @@ struct msi_controller { struct irq_domain; struct irq_chip; struct device_node; +struct fwnode_handle; struct msi_domain_info; /** @@ -262,7 +263,7 @@ enum { int msi_domain_set_affinity(struct irq_data *data, const struct cpumask *mask, bool force); -struct irq_domain *msi_create_irq_domain(struct device_node *of_node, +struct irq_domain *msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, @@ -270,7 +271,7 @@ int msi_domain_alloc_irqs(struct irq_domain *domain, struct device *dev, void msi_domain_free_irqs(struct irq_domain *domain, struct device *dev); struct msi_domain_info *msi_get_domain_info(struct irq_domain *domain); -struct irq_domain *platform_msi_create_irq_domain(struct device_node *np, +struct irq_domain *platform_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); int platform_msi_domain_alloc_irqs(struct device *dev, unsigned int nvec, @@ -280,13 +281,13 @@ void platform_msi_domain_free_irqs(struct device *dev); #ifdef CONFIG_PCI_MSI_IRQ_DOMAIN void pci_msi_domain_write_msg(struct irq_data *irq_data, struct msi_msg *msg); -struct irq_domain *pci_msi_create_irq_domain(struct device_node *node, +struct irq_domain *pci_msi_create_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); int pci_msi_domain_alloc_irqs(struct irq_domain *domain, struct pci_dev *dev, int nvec, int type); void pci_msi_domain_free_irqs(struct irq_domain *domain, struct pci_dev *dev); -struct irq_domain *pci_msi_create_default_irq_domain(struct device_node *node, +struct irq_domain *pci_msi_create_default_irq_domain(struct fwnode_handle *fwnode, struct msi_domain_info *info, struct irq_domain *parent); irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev, -- cgit v1.2.3 From e7a46c818564329f977f8fa157b5e9e1d0d83012 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 13 Oct 2015 12:51:45 +0100 Subject: irqdomain: Documentation updates Update the IRQ domain documentation to reflect the changes made while divorcing the domain infrastructure from Device Tree. Signed-off-by: Marc Zyngier Tested-by: Hanjun Guo Tested-by: Lorenzo Pieralisi Cc: Cc: Tomasz Nowicki Cc: Suravee Suthikulpanit Cc: Graeme Gregory Cc: Jake Oshins Cc: Jiang Liu Cc: Jason Cooper Cc: Rafael J. Wysocki Link: http://lkml.kernel.org/r/1444737105-31573-18-git-send-email-marc.zyngier@arm.com Signed-off-by: Thomas Gleixner --- include/linux/irqdomain.h | 23 ++++++++++------------- 1 file changed, 10 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqdomain.h b/include/linux/irqdomain.h index 2b3340ae915d..d5e5c5bef28c 100644 --- a/include/linux/irqdomain.h +++ b/include/linux/irqdomain.h @@ -5,9 +5,10 @@ * helpful for interrupt controllers to implement mapping between hardware * irq numbers and the Linux irq number space. * - * irq_domains also have a hook for translating device tree interrupt - * representation into a hardware irq number that can be mapped back to a - * Linux irq number without any extra platform support code. + * irq_domains also have hooks for translating device tree or other + * firmware interrupt representations into a hardware irq number that + * can be mapped back to a Linux irq number without any extra platform + * support code. * * Interrupt controller "domain" data structure. This could be defined as a * irq domain controller. That is, it handles the mapping between hardware @@ -17,16 +18,12 @@ * model). It's the domain callbacks that are responsible for setting the * irq_chip on a given irq_desc after it's been mapped. * - * The host code and data structures are agnostic to whether or not - * we use an open firmware device-tree. We do have references to struct - * device_node in two places: in irq_find_host() to find the host matching - * a given interrupt controller node, and of course as an argument to its - * counterpart domain->ops->match() callback. However, those are treated as - * generic pointers by the core and the fact that it's actually a device-node - * pointer is purely a convention between callers and implementation. This - * code could thus be used on other architectures by replacing those two - * by some sort of arch-specific void * "token" used to identify interrupt - * controllers. + * The host code and data structures use a fwnode_handle pointer to + * identify the domain. In some cases, and in order to preserve source + * code compatibility, this fwnode pointer is "upgraded" to a DT + * device_node. For those firmware infrastructures that do not provide + * a unique identifier for an interrupt controller, the irq_domain + * code offers a fwnode allocator. */ #ifndef _LINUX_IRQDOMAIN_H -- cgit v1.2.3 From 17a7b0b4d9749f80d365d7baff5dec2f54b0e992 Mon Sep 17 00:00:00 2001 From: Lars Svensson Date: Wed, 7 Oct 2015 09:20:12 +0200 Subject: fb.h: Provide alternate screen_base pointer Some drivers use member screen_base of struct fb_info to store non- __iomem pointers, creating the need for ugly __force typecasts to avoid sparse warnings. This adds an alternate pointer without the __iomem qualifyer for this use. Signed-off-by: Lars Svensson Signed-off-by: Greg Kroah-Hartman --- include/linux/fb.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fb.h b/include/linux/fb.h index bc9afa74ee11..41a3b11f7796 100644 --- a/include/linux/fb.h +++ b/include/linux/fb.h @@ -483,7 +483,10 @@ struct fb_info { #ifdef CONFIG_FB_TILEBLITTING struct fb_tile_ops *tileops; /* Tile Blitting */ #endif - char __iomem *screen_base; /* Virtual address */ + union { + char __iomem *screen_base; /* Virtual address */ + char *screen_buffer; + }; unsigned long screen_size; /* Amount of ioremapped VRAM or 0 */ void *pseudo_palette; /* Fake palette of 16 colors */ #define FBINFO_STATE_RUNNING 0 -- cgit v1.2.3 From f9f9f11dcf0f3b757b282ce7cefea8696212a422 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 12 Oct 2015 14:43:22 +0200 Subject: of/irq: move of_msi_configure to the right guard and add a dummy of_msi_configure is part of of_irq.c, which is compiled in when OF_IRQ is enabled, not just OF. Also It is unconditionally called from of_platform_device_create_pdata, which does not depend on OF_IRQ, just OF_ADDRESS, so we need a dummy implementation in case of OF_ADDRESS=y but OF_IRQ=n. Fixes: c706c239 ("of/platform: Assign MSI domain to platform device") Signed-off-by: Jonas Gorski Signed-off-by: Rob Herring --- include/linux/of_irq.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 4bcbd586a672..0088038d5ccd 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -46,6 +46,7 @@ extern int of_irq_get(struct device_node *dev, int index); extern int of_irq_get_byname(struct device_node *dev, const char *name); extern int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs); +extern void of_msi_configure(struct device *dev, struct device_node *np); #else static inline int of_irq_count(struct device_node *dev) { @@ -64,6 +65,9 @@ static inline int of_irq_to_resource_table(struct device_node *dev, { return 0; } +static inline void of_msi_configure(struct device *dev, struct device_node *np) +{ +} #endif #if defined(CONFIG_OF) @@ -74,7 +78,6 @@ static inline int of_irq_to_resource_table(struct device_node *dev, */ extern unsigned int irq_of_parse_and_map(struct device_node *node, int index); extern struct device_node *of_irq_find_parent(struct device_node *child); -extern void of_msi_configure(struct device *dev, struct device_node *np); #else /* !CONFIG_OF */ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, -- cgit v1.2.3 From 52493d446141b07c8ba28dd6a529513f8b2342bd Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 12 Oct 2015 14:43:23 +0200 Subject: of/irq: make of_irq_find_parent static of_irq_find_parent has no users outside of of_irq.c, so it does not make sense to expose it in of_irq.h. Therefore remove the prototype and dummy implmeentation and make the function static instead. Signed-off-by: Jonas Gorski Signed-off-by: Rob Herring --- include/linux/of_irq.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 0088038d5ccd..a58e2e51eeb2 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -77,7 +77,6 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np) * so declare it here regardless of the CONFIG_OF_IRQ setting. */ extern unsigned int irq_of_parse_and_map(struct device_node *node, int index); -extern struct device_node *of_irq_find_parent(struct device_node *child); #else /* !CONFIG_OF */ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, @@ -85,11 +84,6 @@ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, { return 0; } - -static inline void *of_irq_find_parent(struct device_node *child) -{ - return NULL; -} #endif /* !CONFIG_OF */ #endif /* __OF_IRQ_H */ -- cgit v1.2.3 From 62ebf931935964230d6fe39026bc5fbcfac330d3 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Mon, 12 Oct 2015 14:43:24 +0200 Subject: of/irq: fix guards for irq_of_parse_and_map prototype Since OF is now a userselectable config symbol, having OF=y but OF_IRQ=n is a valid combination for non-OF platforms, and OF=y does not guarantee anymore that OF_IRQ is enabled (or we are building for SPARC). Fixes the following build error with OF=y, IRQ_DOMAIN=n and SPI=y: drivers/built-in.o: In function `spi_register_master': (.text+0xc3ae): undefined reference to `irq_of_parse_and_map' Makefile:935: recipe for target 'vmlinux' failed make: *** [vmlinux] Error 1 Signed-off-by: Jonas Gorski Signed-off-by: Rob Herring --- include/linux/of_irq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index a58e2e51eeb2..580818d90475 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -70,7 +70,7 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np) } #endif -#if defined(CONFIG_OF) +#if defined(CONFIG_OF_IRQ) || defined(CONFIG_SPARC) /* * irq_of_parse_and_map() is used by all OF enabled platforms; but SPARC * implements it differently. However, the prototype is the same for all, @@ -78,7 +78,7 @@ static inline void of_msi_configure(struct device *dev, struct device_node *np) */ extern unsigned int irq_of_parse_and_map(struct device_node *node, int index); -#else /* !CONFIG_OF */ +#else /* !CONFIG_OF && !CONFIG_SPARC */ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, int index) { -- cgit v1.2.3 From 50d3fb562561fcf5b745e71945834735d7386a1f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 13 Oct 2015 20:48:21 +0100 Subject: iommu/vt-d: Use plain writeq() for dmar_writeq() where available Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 6240063bdcac..08802b99f057 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -59,14 +59,11 @@ #define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ #define OFFSET_STRIDE (9) -/* -#define dmar_readl(dmar, reg) readl(dmar + reg) -#define dmar_readq(dmar, reg) ({ \ - u32 lo, hi; \ - lo = readl(dmar + reg); \ - hi = readl(dmar + reg + 4); \ - (((u64) hi) << 32) + lo; }) -*/ + +#ifdef CONFIG_64BIT +#define dmar_readq(a) readq(a) +#define dmar_writeq(a,v) writeq(v,a) +#else static inline u64 dmar_readq(void __iomem *addr) { u32 lo, hi; @@ -80,6 +77,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) writel((u32)val, addr); writel((u32)(val >> 32), addr + 4); } +#endif #define DMAR_VER_MAJOR(v) (((v) & 0xf0) >> 4) #define DMAR_VER_MINOR(v) ((v) & 0x0f) -- cgit v1.2.3 From 45aaeff947190e4b57b2d0db4d74ab5eea450825 Mon Sep 17 00:00:00 2001 From: Boris BREZILLON Date: Tue, 13 Oct 2015 11:22:18 +0200 Subject: mtd: nand: pass page number to ecc->write_xxx() methods The ->read_xxx() methods are all passed the page number the NAND controller is supposed to read, but ->write_xxx() do not have such a parameter. This is a problem if we want to properly implement data scrambling/randomization in order to mitigate MLC sensibility to repeated pattern: to prevent bitflips in adjacent pages in the same block we need to avoid repeating the same pattern at the same offset in those pages, hence the randomizer/scrambler engine need to be passed the page value in order to adapt its seed accordingly. Moreover, adding the page parameter to the ->write_xxx() methods add some consistency to the current API. Signed-off-by: Boris Brezillon CC: Josh Wu CC: Ezequiel Garcia CC: Maxime Ripard CC: Greg Kroah-Hartman CC: Huang Shijie CC: Stefan Agner CC: devel@driverdev.osuosl.org CC: linux-arm-kernel@lists.infradead.org CC: linux-kernel@vger.kernel.org Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 50e1f94fa377..5a9d1d4c2487 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -504,16 +504,16 @@ struct nand_ecc_ctrl { int (*read_page_raw)(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page); int (*write_page_raw)(struct mtd_info *mtd, struct nand_chip *chip, - const uint8_t *buf, int oob_required); + const uint8_t *buf, int oob_required, int page); int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page); int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip, uint32_t offs, uint32_t len, uint8_t *buf, int page); int (*write_subpage)(struct mtd_info *mtd, struct nand_chip *chip, uint32_t offset, uint32_t data_len, - const uint8_t *data_buf, int oob_required); + const uint8_t *data_buf, int oob_required, int page); int (*write_page)(struct mtd_info *mtd, struct nand_chip *chip, - const uint8_t *buf, int oob_required); + const uint8_t *buf, int oob_required, int page); int (*write_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, int page); int (*read_oob_raw)(struct mtd_info *mtd, struct nand_chip *chip, -- cgit v1.2.3 From ef25ba0476015908ef5960f9faac149ddf34ede0 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Oct 2015 00:49:34 +0200 Subject: PM / sleep: Add flags to indicate platform firmware involvement There are quite a few cases in which device drivers, bus types or even the PM core itself may benefit from knowing whether or not the platform firmware will be involved in the upcoming system power transition (during system suspend) or whether or not it was involved in it (during system resume). For this reason, introduce global system suspend flags that can be used by the platform code to expose that information for the benefit of the other parts of the kernel and make the ACPI core set them as appropriate. Users of the new flags will be added later. Signed-off-by: Rafael J. Wysocki --- include/linux/suspend.h | 36 ++++++++++++++++++++++++++++++++++++ 1 file changed, 36 insertions(+) (limited to 'include/linux') diff --git a/include/linux/suspend.h b/include/linux/suspend.h index 33aaf9a9596c..8b6ec7ef0854 100644 --- a/include/linux/suspend.h +++ b/include/linux/suspend.h @@ -202,6 +202,36 @@ struct platform_freeze_ops { extern void suspend_set_ops(const struct platform_suspend_ops *ops); extern int suspend_valid_only_mem(suspend_state_t state); +extern unsigned int pm_suspend_global_flags; + +#define PM_SUSPEND_FLAG_FW_SUSPEND (1 << 0) +#define PM_SUSPEND_FLAG_FW_RESUME (1 << 1) + +static inline void pm_suspend_clear_flags(void) +{ + pm_suspend_global_flags = 0; +} + +static inline void pm_set_suspend_via_firmware(void) +{ + pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_SUSPEND; +} + +static inline void pm_set_resume_via_firmware(void) +{ + pm_suspend_global_flags |= PM_SUSPEND_FLAG_FW_RESUME; +} + +static inline bool pm_suspend_via_firmware(void) +{ + return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_SUSPEND); +} + +static inline bool pm_resume_via_firmware(void) +{ + return !!(pm_suspend_global_flags & PM_SUSPEND_FLAG_FW_RESUME); +} + /* Suspend-to-idle state machnine. */ enum freeze_state { FREEZE_STATE_NONE, /* Not suspended/suspending. */ @@ -241,6 +271,12 @@ extern int pm_suspend(suspend_state_t state); #else /* !CONFIG_SUSPEND */ #define suspend_valid_only_mem NULL +static inline void pm_suspend_clear_flags(void) {} +static inline void pm_set_suspend_via_firmware(void) {} +static inline void pm_set_resume_via_firmware(void) {} +static inline bool pm_suspend_via_firmware(void) { return false; } +static inline bool pm_resume_via_firmware(void) { return false; } + static inline void suspend_set_ops(const struct platform_suspend_ops *ops) {} static inline int pm_suspend(suspend_state_t state) { return -ENOSYS; } static inline bool idle_should_freeze(void) { return false; } -- cgit v1.2.3 From 58a1fbbb2ee873dd1fe327e80bc7b08e80866269 Mon Sep 17 00:00:00 2001 From: "Rafael J. Wysocki" Date: Wed, 7 Oct 2015 00:50:24 +0200 Subject: PM / PCI / ACPI: Kick devices that might have been reset by firmware There is a concern that if the platform firmware was involved in the system resume that's being completed, some devices might have been reset by it and if those devices had the power.direct_complete flag set during the preceding suspend transition, they may stay in a reset-power-on state indefinitely (until they are runtime-resumed and then suspended again). That may not be a big deal from the individual device's perspective, but if the system is an SoC, it may be prevented from entering deep SoC-wide low-power states on idle because of that. The devices that are most likely to be affected by this issue are PCI devices and ACPI-enumerated devices using the general ACPI PM domain, so to prevent it from happening for those devices, force a runtime resume for them if they have their power.direct_complete flags set and the platform firmware was involved in the resume transition currently in progress. Signed-off-by: Rafael J. Wysocki --- include/linux/pm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pm.h b/include/linux/pm.h index 35d599e7250d..528be6787796 100644 --- a/include/linux/pm.h +++ b/include/linux/pm.h @@ -732,6 +732,7 @@ extern int pm_generic_poweroff_noirq(struct device *dev); extern int pm_generic_poweroff_late(struct device *dev); extern int pm_generic_poweroff(struct device *dev); extern void pm_generic_complete(struct device *dev); +extern void pm_complete_with_resume_check(struct device *dev); #else /* !CONFIG_PM_SLEEP */ -- cgit v1.2.3 From 801cf21bb5806e39c593bad76b257bba20eaf66f Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 1 Sep 2015 12:57:06 -0700 Subject: mtd: spi-nor: make implicit dependency explicit We use BIT() in the header. No real problem for now, but it's better to be accurate. Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 672595a381c5..bc4c321707e3 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -10,6 +10,8 @@ #ifndef __LINUX_MTD_SPI_NOR_H #define __LINUX_MTD_SPI_NOR_H +#include + /* * Note on opcode nomenclature: some opcodes have a format like * SPINOR_OP_FUNCTION{4,}_x_y_z. The numbers x, y, and z stand for the number -- cgit v1.2.3 From a8a16454edb364858a0a54e17acaeab329e7b47f Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 1 Sep 2015 12:57:07 -0700 Subject: mtd: spi-nor: make bitfield constants more consistent These status bits use different ways of representing similar integer constants -- some are decimal, some are hex. Make them more consistent. At the same time, impose my own preference, since IMO it's clearer what these are when using the BIT() macro. Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index bc4c321707e3..768b900a72cf 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -63,24 +63,24 @@ #define SPINOR_OP_WD_EVCR 0x61 /* Write EVCR register */ /* Status Register bits. */ -#define SR_WIP 1 /* Write in progress */ -#define SR_WEL 2 /* Write enable latch */ +#define SR_WIP BIT(0) /* Write in progress */ +#define SR_WEL BIT(1) /* Write enable latch */ /* meaning of other SR_* bits may differ between vendors */ -#define SR_BP0 4 /* Block protect 0 */ -#define SR_BP1 8 /* Block protect 1 */ -#define SR_BP2 0x10 /* Block protect 2 */ -#define SR_SRWD 0x80 /* SR write protect */ +#define SR_BP0 BIT(2) /* Block protect 0 */ +#define SR_BP1 BIT(3) /* Block protect 1 */ +#define SR_BP2 BIT(4) /* Block protect 2 */ +#define SR_SRWD BIT(7) /* SR write protect */ -#define SR_QUAD_EN_MX 0x40 /* Macronix Quad I/O */ +#define SR_QUAD_EN_MX BIT(6) /* Macronix Quad I/O */ /* Enhanced Volatile Configuration Register bits */ -#define EVCR_QUAD_EN_MICRON 0x80 /* Micron Quad I/O */ +#define EVCR_QUAD_EN_MICRON BIT(7) /* Micron Quad I/O */ /* Flag Status Register bits */ -#define FSR_READY 0x80 +#define FSR_READY BIT(7) /* Configuration Register bits. */ -#define CR_QUAD_EN_SPAN 0x2 /* Spansion Quad I/O */ +#define CR_QUAD_EN_SPAN BIT(1) /* Spansion Quad I/O */ enum read_mode { SPI_NOR_NORMAL = 0, -- cgit v1.2.3 From db4745edb282766f6532d4b0f643c2348e450e25 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 1 Sep 2015 12:57:08 -0700 Subject: mtd: spi-nor: add SPI NOR manufacturer IDs These are often similar for CFI (parallel NOR) and for SPI NOR, but they aren't always the same, for various reasons (different namespaces, company acquisitions and renames, etc.). And some don't have CFI_MFR_* entries at all. So let's make a proper place to list the SPI NOR IDs, with all the SPI NOR specific assumptions and comments. Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 768b900a72cf..88297ee22de4 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -11,6 +11,21 @@ #define __LINUX_MTD_SPI_NOR_H #include +#include + +/* + * Manufacturer IDs + * + * The first byte returned from the flash after sending opcode SPINOR_OP_RDID. + * Sometimes these are the same as CFI IDs, but sometimes they aren't. + */ +#define SNOR_MFR_ATMEL CFI_MFR_ATMEL +#define SNOR_MFR_INTEL CFI_MFR_INTEL +#define SNOR_MFR_MICRON CFI_MFR_ST /* ST Micro <--> Micron */ +#define SNOR_MFR_MACRONIX CFI_MFR_MACRONIX +#define SNOR_MFR_SPANSION CFI_MFR_AMD +#define SNOR_MFR_SST CFI_MFR_SST +#define SNOR_MFR_WINBOND 0xef /* * Note on opcode nomenclature: some opcodes have a format like -- cgit v1.2.3 From f8900258906c3533b91e779e80f75ec80de816c0 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 1 Sep 2015 12:57:10 -0700 Subject: mtd: spi-nor: fixup kernel-doc for flash lock/unlock function pointers I got the names of these fields wrong. Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 88297ee22de4..75eb1a079f75 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -145,8 +145,8 @@ struct mtd_info; * @write: [DRIVER-SPECIFIC] write data to the SPI NOR * @erase: [DRIVER-SPECIFIC] erase a sector of the SPI NOR * at the offset @offs - * @lock: [FLASH-SPECIFIC] lock a region of the SPI NOR - * @unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR + * @flash_lock: [FLASH-SPECIFIC] lock a region of the SPI NOR + * @flash_unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR * @priv: the private data */ struct spi_nor { -- cgit v1.2.3 From 5bf0e69b67a5600ea7ef178acd57606d1fc2c134 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Tue, 1 Sep 2015 12:57:12 -0700 Subject: mtd: spi-nor: add mtd_is_locked() support This enables ioctl(MEMISLOCKED). Status can now be reported in the mtdinfo or flash_lock utilities found in mtd-utils. Signed-off-by: Brian Norris --- include/linux/mtd/spi-nor.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/spi-nor.h b/include/linux/mtd/spi-nor.h index 75eb1a079f75..c8723b62c4cd 100644 --- a/include/linux/mtd/spi-nor.h +++ b/include/linux/mtd/spi-nor.h @@ -147,6 +147,8 @@ struct mtd_info; * at the offset @offs * @flash_lock: [FLASH-SPECIFIC] lock a region of the SPI NOR * @flash_unlock: [FLASH-SPECIFIC] unlock a region of the SPI NOR + * @flash_is_locked: [FLASH-SPECIFIC] check if a region of the SPI NOR is + * completely locked * @priv: the private data */ struct spi_nor { @@ -178,6 +180,7 @@ struct spi_nor { int (*flash_lock)(struct spi_nor *nor, loff_t ofs, uint64_t len); int (*flash_unlock)(struct spi_nor *nor, loff_t ofs, uint64_t len); + int (*flash_is_locked)(struct spi_nor *nor, loff_t ofs, uint64_t len); void *priv; }; -- cgit v1.2.3 From 870823e629ea194e6cf8e82a9694ac62cad49512 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 15:32:37 +0200 Subject: configfs: add show and store methods to struct configfs_attribute Add methods to struct configfs_attribute to directly show and store attributes without adding boilerplate code to every user. In addition to the methods this also adds 3 helper macros to define read/write, read-only and write-only attributes with a single line of code. Signed-off-by: Christoph Hellwig Reviewed-by: Nicholas Bellinger Acked-by: Greg Kroah-Hartman Signed-off-by: Nicholas Bellinger --- include/linux/configfs.h | 27 +++++++++++++++++++++++++++ 1 file changed, 27 insertions(+) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 63a36e89d0eb..85e9956a86de 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -125,8 +125,35 @@ struct configfs_attribute { const char *ca_name; struct module *ca_owner; umode_t ca_mode; + ssize_t (*show)(struct config_item *, char *); + ssize_t (*store)(struct config_item *, const char *, size_t); }; +#define CONFIGFS_ATTR(_pfx, _name) \ +static struct configfs_attribute _pfx##attr_##_name = { \ + .ca_name = __stringify(_name), \ + .ca_mode = S_IRUGO | S_IWUSR, \ + .ca_owner = THIS_MODULE, \ + .show = _pfx##_name##_show, \ + .store = _pfx##_name##_store, \ +} + +#define CONFIGFS_ATTR_RO(_pfx, _name) \ +static struct configfs_attribute _pfx##attr_##_name = { \ + .ca_name = __stringify(_name), \ + .ca_mode = S_IRUGO, \ + .ca_owner = THIS_MODULE, \ + .show = _pfx##_name##_show, \ +} + +#define CONFIGFS_ATTR_WO(_pfx, _name) \ +static struct configfs_attribute _pfx##attr_##_name = { \ + .ca_name = __stringify(_name), \ + .ca_mode = S_IWUSR, \ + .ca_owner = THIS_MODULE, \ + .store = _pfx##_name##_store, \ +} + /* * Users often need to create attribute structures for their configurable * attributes, containing a configfs_attribute member and function pointers -- cgit v1.2.3 From 45b6a73f62ebcf3ff067895fb8030e67f4c7b67f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 15:32:38 +0200 Subject: usb-gadget: use per-attribute show and store methods To simplify the configfs interface and remove boilerplate code that also causes binary bloat. Signed-off-by: Christoph Hellwig Reviewed-by: Andrzej Pietrasiewicz Acked-by: Felipe Balbi Signed-off-by: Nicholas Bellinger --- include/linux/usb/gadget_configfs.h | 19 +++++-------------- 1 file changed, 5 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb/gadget_configfs.h b/include/linux/usb/gadget_configfs.h index d74c0ae989d5..c36e95730de1 100644 --- a/include/linux/usb/gadget_configfs.h +++ b/include/linux/usb/gadget_configfs.h @@ -7,9 +7,10 @@ int check_user_usb_string(const char *name, struct usb_gadget_strings *stringtab_dev); #define GS_STRINGS_W(__struct, __name) \ - static ssize_t __struct##_##__name##_store(struct __struct *gs, \ +static ssize_t __struct##_##__name##_store(struct config_item *item, \ const char *page, size_t len) \ { \ + struct __struct *gs = to_##__struct(item); \ int ret; \ \ ret = usb_string_copy(page, &gs->__name); \ @@ -19,30 +20,20 @@ int check_user_usb_string(const char *name, } #define GS_STRINGS_R(__struct, __name) \ - static ssize_t __struct##_##__name##_show(struct __struct *gs, \ - char *page) \ +static ssize_t __struct##_##__name##_show(struct config_item *item, char *page) \ { \ + struct __struct *gs = to_##__struct(item); \ return sprintf(page, "%s\n", gs->__name ?: ""); \ } -#define GS_STRING_ITEM_ATTR(struct_name, name) \ - static struct struct_name##_attribute struct_name##_##name = \ - __CONFIGFS_ATTR(name, S_IRUGO | S_IWUSR, \ - struct_name##_##name##_show, \ - struct_name##_##name##_store) - #define GS_STRINGS_RW(struct_name, _name) \ GS_STRINGS_R(struct_name, _name) \ GS_STRINGS_W(struct_name, _name) \ - GS_STRING_ITEM_ATTR(struct_name, _name) + CONFIGFS_ATTR(struct_name##_, _name) #define USB_CONFIG_STRING_RW_OPS(struct_in) \ - CONFIGFS_ATTR_OPS(struct_in); \ - \ static struct configfs_item_operations struct_in##_langid_item_ops = { \ .release = struct_in##_attr_release, \ - .show_attribute = struct_in##_attr_show, \ - .store_attribute = struct_in##_attr_store, \ }; \ \ static struct config_item_type struct_in##_langid_type = { \ -- cgit v1.2.3 From 517982229f78b2aebf00a8a337e84e8eeea70b8e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 3 Oct 2015 15:32:59 +0200 Subject: configfs: remove old API Remove the old show_attribute and store_attribute methods and update the documentation. Also replace the two C samples with a single new one in the proper samples directory where people expect to find it. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- include/linux/configfs.h | 82 ------------------------------------------------ 1 file changed, 82 deletions(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 85e9956a86de..a8a335b7fce0 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -154,86 +154,6 @@ static struct configfs_attribute _pfx##attr_##_name = { \ .store = _pfx##_name##_store, \ } -/* - * Users often need to create attribute structures for their configurable - * attributes, containing a configfs_attribute member and function pointers - * for the show() and store() operations on that attribute. If they don't - * need anything else on the extended attribute structure, they can use - * this macro to define it The argument _item is the name of the - * config_item structure. - */ -#define CONFIGFS_ATTR_STRUCT(_item) \ -struct _item##_attribute { \ - struct configfs_attribute attr; \ - ssize_t (*show)(struct _item *, char *); \ - ssize_t (*store)(struct _item *, const char *, size_t); \ -} - -/* - * With the extended attribute structure, users can use this macro - * (similar to sysfs' __ATTR) to make defining attributes easier. - * An example: - * #define MYITEM_ATTR(_name, _mode, _show, _store) \ - * struct myitem_attribute childless_attr_##_name = \ - * __CONFIGFS_ATTR(_name, _mode, _show, _store) - */ -#define __CONFIGFS_ATTR(_name, _mode, _show, _store) \ -{ \ - .attr = { \ - .ca_name = __stringify(_name), \ - .ca_mode = _mode, \ - .ca_owner = THIS_MODULE, \ - }, \ - .show = _show, \ - .store = _store, \ -} -/* Here is a readonly version, only requiring a show() operation */ -#define __CONFIGFS_ATTR_RO(_name, _show) \ -{ \ - .attr = { \ - .ca_name = __stringify(_name), \ - .ca_mode = 0444, \ - .ca_owner = THIS_MODULE, \ - }, \ - .show = _show, \ -} - -/* - * With these extended attributes, the simple show_attribute() and - * store_attribute() operations need to call the show() and store() of the - * attributes. This is a common pattern, so we provide a macro to define - * them. The argument _item is the name of the config_item structure. - * This macro expects the attributes to be named "struct _attribute" - * and the function to_() to exist; - */ -#define CONFIGFS_ATTR_OPS(_item) \ -static ssize_t _item##_attr_show(struct config_item *item, \ - struct configfs_attribute *attr, \ - char *page) \ -{ \ - struct _item *_item = to_##_item(item); \ - struct _item##_attribute *_item##_attr = \ - container_of(attr, struct _item##_attribute, attr); \ - ssize_t ret = 0; \ - \ - if (_item##_attr->show) \ - ret = _item##_attr->show(_item, page); \ - return ret; \ -} \ -static ssize_t _item##_attr_store(struct config_item *item, \ - struct configfs_attribute *attr, \ - const char *page, size_t count) \ -{ \ - struct _item *_item = to_##_item(item); \ - struct _item##_attribute *_item##_attr = \ - container_of(attr, struct _item##_attribute, attr); \ - ssize_t ret = -EINVAL; \ - \ - if (_item##_attr->store) \ - ret = _item##_attr->store(_item, page, count); \ - return ret; \ -} - /* * If allow_link() exists, the item can symlink(2) out to other * items. If the item is a group, it may support mkdir(2). @@ -250,8 +170,6 @@ static ssize_t _item##_attr_store(struct config_item *item, \ */ struct configfs_item_operations { void (*release)(struct config_item *); - ssize_t (*show_attribute)(struct config_item *, struct configfs_attribute *,char *); - ssize_t (*store_attribute)(struct config_item *,struct configfs_attribute *,const char *, size_t); int (*allow_link)(struct config_item *src, struct config_item *target); int (*drop_link)(struct config_item *src, struct config_item *target); }; -- cgit v1.2.3 From 027c71bbae3a6eeff00c11d1b708593a5c790314 Mon Sep 17 00:00:00 2001 From: Petri Gynther Date: Tue, 13 Oct 2015 23:13:55 -0700 Subject: Input: improve autorepeat initialization Add new function input_enable_softrepeat() that allows drivers to initialize their own values for input_dev->rep[REP_DELAY] and input_dev->rep[REP_PERIOD], but also use the software autorepeat functionality from input.c. For example, a HID driver could do: static void xyz_input_configured(struct hid_device *hid, struct hid_input *hidinput) { input_enable_softrepeat(hidinput->input, 400, 100); } static struct hid_driver xyz_driver = { .input_configured = xyz_input_configured, } Signed-off-by: Petri Gynther Signed-off-by: Dmitry Torokhov --- include/linux/input.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/input.h b/include/linux/input.h index 82ce323b9986..1e967694e9a5 100644 --- a/include/linux/input.h +++ b/include/linux/input.h @@ -469,6 +469,8 @@ int input_get_keycode(struct input_dev *dev, struct input_keymap_entry *ke); int input_set_keycode(struct input_dev *dev, const struct input_keymap_entry *ke); +void input_enable_softrepeat(struct input_dev *dev, int delay, int period); + extern struct class input_class; /** -- cgit v1.2.3 From 47ec6e5a5f57f96d7d382e2d9f8dc5a5bdb45259 Mon Sep 17 00:00:00 2001 From: Sylvain Rochet Date: Tue, 13 Oct 2015 23:24:36 -0700 Subject: Input: rotary_encoder - add wake up support This patch adds wake up support to GPIO rotary encoders. Signed-off-by: Sylvain Rochet Reviewed-by: Johan Hovold Signed-off-by: Dmitry Torokhov --- include/linux/rotary_encoder.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/rotary_encoder.h b/include/linux/rotary_encoder.h index 3f594dce5716..b33f2d2a708f 100644 --- a/include/linux/rotary_encoder.h +++ b/include/linux/rotary_encoder.h @@ -11,6 +11,7 @@ struct rotary_encoder_platform_data { bool relative_axis; bool rollover; bool half_period; + bool wakeup_source; }; #endif /* __ROTARY_ENCODER_H__ */ -- cgit v1.2.3 From 0c5a69f432ba1e586ac6ae5e4311c2f1cbd051fa Mon Sep 17 00:00:00 2001 From: Christian Borntraeger Date: Wed, 7 Oct 2015 10:54:36 +0200 Subject: s390/compiler.h Fix sparse vs. hotpatch sparse does not understand the s390 specific hotpatch attribute and floods the log with messages like include/uapi/linux/swab.h:92:8: error: attribute 'hotpatch': unknown attribute Let's just dont use it, if __CHECKER__ is defined. Signed-off-by: Christian Borntraeger Signed-off-by: Martin Schwidefsky --- include/linux/compiler.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c836eb2dc44d..449cb674c7fa 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -56,7 +56,7 @@ extern void __chk_io_ptr(const volatile void __iomem *); #include #endif -#ifdef CC_USING_HOTPATCH +#if defined(CC_USING_HOTPATCH) && !defined(__CHECKER__) #define notrace __attribute__((hotpatch(0,0))) #else #define notrace __attribute__((no_instrument_function)) -- cgit v1.2.3 From 2d4d1eea540b27c72488fd1914674c42473d53df Mon Sep 17 00:00:00 2001 From: Tadeusz Struk Date: Thu, 8 Oct 2015 09:26:50 -0700 Subject: lib/mpi: Add mpi sgl helpers Add mpi_read_raw_from_sgl and mpi_write_to_sgl helpers. Signed-off-by: Tadeusz Struk Signed-off-by: Herbert Xu --- include/linux/mpi.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 641b7d6fd096..8b8269fa8907 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -31,6 +31,7 @@ #define G10_MPI_H #include +#include /* DSI defines */ @@ -78,6 +79,7 @@ void mpi_swap(MPI a, MPI b); MPI do_encode_md(const void *sha_buffer, unsigned nbits); MPI mpi_read_raw_data(const void *xbuffer, size_t nbytes); MPI mpi_read_from_buffer(const void *buffer, unsigned *ret_nread); +MPI mpi_read_raw_from_sgl(struct scatterlist *sgl, unsigned int len); int mpi_fromstr(MPI val, const char *str); u32 mpi_get_keyid(MPI a, u32 *keyid); void *mpi_get_buffer(MPI a, unsigned *nbytes, int *sign); @@ -85,6 +87,8 @@ int mpi_read_buffer(MPI a, uint8_t *buf, unsigned buf_len, unsigned *nbytes, int *sign); void *mpi_get_secure_buffer(MPI a, unsigned *nbytes, int *sign); int mpi_set_buffer(MPI a, const void *buffer, unsigned nbytes, int sign); +int mpi_write_to_sgl(MPI a, struct scatterlist *sg, unsigned *nbytes, + int *sign); #define log_mpidump g10_log_mpidump -- cgit v1.2.3 From d28c2b36d6027702585ca93773b3edd6e5f1a5bd Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Oct 2015 14:42:44 +0300 Subject: ARM: common: edma: Remove unused functions We no longer have users for these functions so they can be removed. Remove also unused enums from the header file. Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- include/linux/platform_data/edma.h | 33 --------------------------------- 1 file changed, 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h index bdb2710e2aab..c1862423b356 100644 --- a/include/linux/platform_data/edma.h +++ b/include/linux/platform_data/edma.h @@ -72,20 +72,6 @@ struct edmacc_param { #define EDMA_DMA_TC1_ERROR 3 #define EDMA_DMA_TC2_ERROR 4 -enum address_mode { - INCR = 0, - FIFO = 1 -}; - -enum fifo_width { - W8BIT = 0, - W16BIT = 1, - W32BIT = 2, - W64BIT = 3, - W128BIT = 4, - W256BIT = 5 -}; - enum dma_event_q { EVENTQ_0 = 0, EVENTQ_1 = 1, @@ -94,11 +80,6 @@ enum dma_event_q { EVENTQ_DEFAULT = -1 }; -enum sync_dimension { - ASYNC = 0, - ABSYNC = 1 -}; - #define EDMA_CTLR_CHAN(ctlr, chan) (((ctlr) << 16) | (chan)) #define EDMA_CTLR(i) ((i) >> 16) #define EDMA_CHAN_SLOT(i) ((i) & 0xffff) @@ -121,22 +102,9 @@ void edma_free_channel(unsigned channel); int edma_alloc_slot(unsigned ctlr, int slot); void edma_free_slot(unsigned slot); -/* alloc/free a set of contiguous parameter RAM slots */ -int edma_alloc_cont_slots(unsigned ctlr, unsigned int id, int slot, int count); -int edma_free_cont_slots(unsigned slot, int count); - /* calls that operate on part of a parameter RAM slot */ -void edma_set_src(unsigned slot, dma_addr_t src_port, - enum address_mode mode, enum fifo_width); -void edma_set_dest(unsigned slot, dma_addr_t dest_port, - enum address_mode mode, enum fifo_width); dma_addr_t edma_get_position(unsigned slot, bool dst); -void edma_set_src_index(unsigned slot, s16 src_bidx, s16 src_cidx); -void edma_set_dest_index(unsigned slot, s16 dest_bidx, s16 dest_cidx); -void edma_set_transfer_params(unsigned slot, u16 acnt, u16 bcnt, u16 ccnt, - u16 bcnt_rld, enum sync_dimension sync_mode); void edma_link(unsigned from, unsigned to); -void edma_unlink(unsigned from); /* calls that operate on an entire parameter RAM slot */ void edma_write_slot(unsigned slot, const struct edmacc_param *params); @@ -146,7 +114,6 @@ void edma_read_slot(unsigned slot, struct edmacc_param *params); int edma_start(unsigned channel); void edma_stop(unsigned channel); void edma_clean_channel(unsigned channel); -void edma_clear_event(unsigned channel); void edma_pause(unsigned channel); void edma_resume(unsigned channel); -- cgit v1.2.3 From ca304fa9bb762f091e851d48de43f623c975d47a Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Oct 2015 14:42:49 +0300 Subject: ARM/dmaengine: edma: Public API to use private struct pointer Instead of relying on indexes pointing to edma private date in the global pointer array, pass the private data pointer via the public API. Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- include/linux/platform_data/edma.h | 38 ++++++++++++++++++++++---------------- 1 file changed, 22 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h index c1862423b356..466021c03169 100644 --- a/include/linux/platform_data/edma.h +++ b/include/linux/platform_data/edma.h @@ -92,32 +92,40 @@ enum dma_event_q { #define EDMA_MAX_CC 2 +struct edma; + +struct edma *edma_get_data(struct device *edma_dev); + /* alloc/free DMA channels and their dedicated parameter RAM slots */ -int edma_alloc_channel(int channel, +int edma_alloc_channel(struct edma *cc, int channel, void (*callback)(unsigned channel, u16 ch_status, void *data), void *data, enum dma_event_q); -void edma_free_channel(unsigned channel); +void edma_free_channel(struct edma *cc, unsigned channel); /* alloc/free parameter RAM slots */ -int edma_alloc_slot(unsigned ctlr, int slot); -void edma_free_slot(unsigned slot); +int edma_alloc_slot(struct edma *cc, int slot); +void edma_free_slot(struct edma *cc, unsigned slot); /* calls that operate on part of a parameter RAM slot */ -dma_addr_t edma_get_position(unsigned slot, bool dst); -void edma_link(unsigned from, unsigned to); +dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst); +void edma_link(struct edma *cc, unsigned from, unsigned to); /* calls that operate on an entire parameter RAM slot */ -void edma_write_slot(unsigned slot, const struct edmacc_param *params); -void edma_read_slot(unsigned slot, struct edmacc_param *params); +void edma_write_slot(struct edma *cc, unsigned slot, + const struct edmacc_param *params); +void edma_read_slot(struct edma *cc, unsigned slot, + struct edmacc_param *params); /* channel control operations */ -int edma_start(unsigned channel); -void edma_stop(unsigned channel); -void edma_clean_channel(unsigned channel); -void edma_pause(unsigned channel); -void edma_resume(unsigned channel); +int edma_start(struct edma *cc, unsigned channel); +void edma_stop(struct edma *cc, unsigned channel); +void edma_clean_channel(struct edma *cc, unsigned channel); +void edma_pause(struct edma *cc, unsigned channel); +void edma_resume(struct edma *cc, unsigned channel); +int edma_trigger_channel(struct edma *cc, unsigned channel); -void edma_assign_channel_eventq(unsigned channel, enum dma_event_q eventq_no); +void edma_assign_channel_eventq(struct edma *cc, unsigned channel, + enum dma_event_q eventq_no); struct edma_rsv_info { @@ -141,6 +149,4 @@ struct edma_soc_info { const s16 (*xbar_chans)[2]; }; -int edma_trigger_channel(unsigned); - #endif -- cgit v1.2.3 From 2b6b3b7420190888793c49e97276e1e73bd7eaed Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Wed, 14 Oct 2015 14:42:53 +0300 Subject: ARM/dmaengine: edma: Merge the two drivers under drivers/dma/ Move the code out from arch/arm/common and merge it inside of the dmaengine driver. This change is done with as minimal (if eny) functional change to the code as possible to avoid introducing regression. Signed-off-by: Peter Ujfalusi Acked-by: Tony Lindgren Signed-off-by: Vinod Koul --- include/linux/platform_data/edma.h | 74 -------------------------------------- 1 file changed, 74 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h index 466021c03169..6b9d500956e4 100644 --- a/include/linux/platform_data/edma.h +++ b/include/linux/platform_data/edma.h @@ -41,37 +41,6 @@ #ifndef EDMA_H_ #define EDMA_H_ -/* PaRAM slots are laid out like this */ -struct edmacc_param { - u32 opt; - u32 src; - u32 a_b_cnt; - u32 dst; - u32 src_dst_bidx; - u32 link_bcntrld; - u32 src_dst_cidx; - u32 ccnt; -} __packed; - -/* fields in edmacc_param.opt */ -#define SAM BIT(0) -#define DAM BIT(1) -#define SYNCDIM BIT(2) -#define STATIC BIT(3) -#define EDMA_FWID (0x07 << 8) -#define TCCMODE BIT(11) -#define EDMA_TCC(t) ((t) << 12) -#define TCINTEN BIT(20) -#define ITCINTEN BIT(21) -#define TCCHEN BIT(22) -#define ITCCHEN BIT(23) - -/*ch_status paramater of callback function possible values*/ -#define EDMA_DMA_COMPLETE 1 -#define EDMA_DMA_CC_ERROR 2 -#define EDMA_DMA_TC1_ERROR 3 -#define EDMA_DMA_TC2_ERROR 4 - enum dma_event_q { EVENTQ_0 = 0, EVENTQ_1 = 1, @@ -84,49 +53,6 @@ enum dma_event_q { #define EDMA_CTLR(i) ((i) >> 16) #define EDMA_CHAN_SLOT(i) ((i) & 0xffff) -#define EDMA_CHANNEL_ANY -1 /* for edma_alloc_channel() */ -#define EDMA_SLOT_ANY -1 /* for edma_alloc_slot() */ -#define EDMA_CONT_PARAMS_ANY 1001 -#define EDMA_CONT_PARAMS_FIXED_EXACT 1002 -#define EDMA_CONT_PARAMS_FIXED_NOT_EXACT 1003 - -#define EDMA_MAX_CC 2 - -struct edma; - -struct edma *edma_get_data(struct device *edma_dev); - -/* alloc/free DMA channels and their dedicated parameter RAM slots */ -int edma_alloc_channel(struct edma *cc, int channel, - void (*callback)(unsigned channel, u16 ch_status, void *data), - void *data, enum dma_event_q); -void edma_free_channel(struct edma *cc, unsigned channel); - -/* alloc/free parameter RAM slots */ -int edma_alloc_slot(struct edma *cc, int slot); -void edma_free_slot(struct edma *cc, unsigned slot); - -/* calls that operate on part of a parameter RAM slot */ -dma_addr_t edma_get_position(struct edma *cc, unsigned slot, bool dst); -void edma_link(struct edma *cc, unsigned from, unsigned to); - -/* calls that operate on an entire parameter RAM slot */ -void edma_write_slot(struct edma *cc, unsigned slot, - const struct edmacc_param *params); -void edma_read_slot(struct edma *cc, unsigned slot, - struct edmacc_param *params); - -/* channel control operations */ -int edma_start(struct edma *cc, unsigned channel); -void edma_stop(struct edma *cc, unsigned channel); -void edma_clean_channel(struct edma *cc, unsigned channel); -void edma_pause(struct edma *cc, unsigned channel); -void edma_resume(struct edma *cc, unsigned channel); -int edma_trigger_channel(struct edma *cc, unsigned channel); - -void edma_assign_channel_eventq(struct edma *cc, unsigned channel, - enum dma_event_q eventq_no); - struct edma_rsv_info { const s16 (*rsv_chans)[2]; -- cgit v1.2.3 From 1a7caca20ed56a80cea045327deaeb4e4379cbd1 Mon Sep 17 00:00:00 2001 From: Bjorn Andersson Date: Fri, 28 Aug 2015 10:39:20 -0700 Subject: soc: qcom: smd: Implement id_table driver matching Implement a id_table based driver maching mechanism for drivers that binds to fixed channels and doesn't need any additional configuration, e.g. IPCRTR and DIAG. Signed-off-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smd.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smd.h b/include/linux/soc/qcom/smd.h index d7e50aa6a4ac..d0cb6d189a0a 100644 --- a/include/linux/soc/qcom/smd.h +++ b/include/linux/soc/qcom/smd.h @@ -8,6 +8,14 @@ struct qcom_smd; struct qcom_smd_channel; struct qcom_smd_lookup; +/** + * struct qcom_smd_id - struct used for matching a smd device + * @name: name of the channel + */ +struct qcom_smd_id { + char name[20]; +}; + /** * struct qcom_smd_device - smd device struct * @dev: the device struct @@ -21,6 +29,7 @@ struct qcom_smd_device { /** * struct qcom_smd_driver - smd driver struct * @driver: underlying device driver + * @smd_match_table: static channel match table * @probe: invoked when the smd channel is found * @remove: invoked when the smd channel is closed * @callback: invoked when an inbound message is received on the channel, @@ -29,6 +38,8 @@ struct qcom_smd_device { */ struct qcom_smd_driver { struct device_driver driver; + const struct qcom_smd_id *smd_match_table; + int (*probe)(struct qcom_smd_device *dev); void (*remove)(struct qcom_smd_device *dev); int (*callback)(struct qcom_smd_device *, const void *, size_t); -- cgit v1.2.3 From 1a03964dec3cecb6382d172b9dfe318735c2cad7 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 2 Sep 2015 15:46:44 -0700 Subject: soc: qcom: Make qcom_smem_get() return a pointer Passing a void ** almost always requires a cast at the call site. Instead of littering the code with casts every time this function is called, have qcom_smem_get() return a void pointer to the location of the smem item. This frees the caller from having to cast the pointer. Cc: Bjorn Andersson Signed-off-by: Stephen Boyd Reviewed-by: Bjorn Andersson Signed-off-by: Andy Gross --- include/linux/soc/qcom/smem.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/soc/qcom/smem.h b/include/linux/soc/qcom/smem.h index bc9630d3aced..785e196ee2ca 100644 --- a/include/linux/soc/qcom/smem.h +++ b/include/linux/soc/qcom/smem.h @@ -4,7 +4,7 @@ #define QCOM_SMEM_HOST_ANY -1 int qcom_smem_alloc(unsigned host, unsigned item, size_t size); -int qcom_smem_get(unsigned host, unsigned item, void **ptr, size_t *size); +void *qcom_smem_get(unsigned host, unsigned item, size_t *size); int qcom_smem_get_free_space(unsigned host); -- cgit v1.2.3 From 2d3c277ca5b1a9c12cde1f760ff925b87608bc76 Mon Sep 17 00:00:00 2001 From: Rob Clark Date: Tue, 29 Sep 2015 15:48:55 -0400 Subject: qcom-scm: add missing prototype for qcom_scm_is_available() Signed-off-by: Rob Clark Signed-off-by: Andy Gross --- include/linux/qcom_scm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qcom_scm.h b/include/linux/qcom_scm.h index 6e7d5ec65838..9e12000914b3 100644 --- a/include/linux/qcom_scm.h +++ b/include/linux/qcom_scm.h @@ -23,6 +23,8 @@ struct qcom_scm_hdcp_req { u32 val; }; +extern bool qcom_scm_is_available(void); + extern bool qcom_scm_hdcp_available(void); extern int qcom_scm_hdcp_req(struct qcom_scm_hdcp_req *req, u32 req_cnt, u32 *resp); -- cgit v1.2.3 From 2f8e2c877784a0b23f02b41550170a24e14f5c95 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Aug 2015 09:27:14 +0200 Subject: move io-64-nonatomic*.h out of asm-generic These are not implementations of default architecture code but helpers for drivers. Move them to the place they belong to. Signed-off-by: Christoph Hellwig Acked-by: Darren Hart Acked-by: Hitoshi Mitake Signed-off-by: Arnd Bergmann --- include/linux/io-64-nonatomic-hi-lo.h | 32 ++++++++++++++++++++++++++++++++ include/linux/io-64-nonatomic-lo-hi.h | 32 ++++++++++++++++++++++++++++++++ 2 files changed, 64 insertions(+) create mode 100644 include/linux/io-64-nonatomic-hi-lo.h create mode 100644 include/linux/io-64-nonatomic-lo-hi.h (limited to 'include/linux') diff --git a/include/linux/io-64-nonatomic-hi-lo.h b/include/linux/io-64-nonatomic-hi-lo.h new file mode 100644 index 000000000000..11d7e840d913 --- /dev/null +++ b/include/linux/io-64-nonatomic-hi-lo.h @@ -0,0 +1,32 @@ +#ifndef _LINUX_IO_64_NONATOMIC_HI_LO_H_ +#define _LINUX_IO_64_NONATOMIC_HI_LO_H_ + +#include +#include + +static inline __u64 hi_lo_readq(const volatile void __iomem *addr) +{ + const volatile u32 __iomem *p = addr; + u32 low, high; + + high = readl(p + 1); + low = readl(p); + + return low + ((u64)high << 32); +} + +static inline void hi_lo_writeq(__u64 val, volatile void __iomem *addr) +{ + writel(val >> 32, addr + 4); + writel(val, addr); +} + +#ifndef readq +#define readq hi_lo_readq +#endif + +#ifndef writeq +#define writeq hi_lo_writeq +#endif + +#endif /* _LINUX_IO_64_NONATOMIC_HI_LO_H_ */ diff --git a/include/linux/io-64-nonatomic-lo-hi.h b/include/linux/io-64-nonatomic-lo-hi.h new file mode 100644 index 000000000000..1a4315f97360 --- /dev/null +++ b/include/linux/io-64-nonatomic-lo-hi.h @@ -0,0 +1,32 @@ +#ifndef _LINUX_IO_64_NONATOMIC_LO_HI_H_ +#define _LINUX_IO_64_NONATOMIC_LO_HI_H_ + +#include +#include + +static inline __u64 lo_hi_readq(const volatile void __iomem *addr) +{ + const volatile u32 __iomem *p = addr; + u32 low, high; + + low = readl(p); + high = readl(p + 1); + + return low + ((u64)high << 32); +} + +static inline void lo_hi_writeq(__u64 val, volatile void __iomem *addr) +{ + writel(val, addr); + writel(val >> 32, addr + 4); +} + +#ifndef readq +#define readq lo_hi_readq +#endif + +#ifndef writeq +#define writeq lo_hi_writeq +#endif + +#endif /* _LINUX_IO_64_NONATOMIC_LO_HI_H_ */ -- cgit v1.2.3 From a1164a3ac75feeab86f6c02fabdfbf24b81e3c1a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 28 Aug 2015 09:27:15 +0200 Subject: move count_zeroes.h out of asm-generic This header contains a few helpers currenly only used by the mpi implementation, and not default implementation of architecture code. Signed-off-by: Christoph Hellwig Signed-off-by: Arnd Bergmann --- include/linux/count_zeros.h | 57 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 57 insertions(+) create mode 100644 include/linux/count_zeros.h (limited to 'include/linux') diff --git a/include/linux/count_zeros.h b/include/linux/count_zeros.h new file mode 100644 index 000000000000..363da78c4f64 --- /dev/null +++ b/include/linux/count_zeros.h @@ -0,0 +1,57 @@ +/* Count leading and trailing zeros functions + * + * Copyright (C) 2012 Red Hat, Inc. All Rights Reserved. + * Written by David Howells (dhowells@redhat.com) + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public Licence + * as published by the Free Software Foundation; either version + * 2 of the Licence, or (at your option) any later version. + */ + +#ifndef _LINUX_BITOPS_COUNT_ZEROS_H_ +#define _LINUX_BITOPS_COUNT_ZEROS_H_ + +#include + +/** + * count_leading_zeros - Count the number of zeros from the MSB back + * @x: The value + * + * Count the number of leading zeros from the MSB going towards the LSB in @x. + * + * If the MSB of @x is set, the result is 0. + * If only the LSB of @x is set, then the result is BITS_PER_LONG-1. + * If @x is 0 then the result is COUNT_LEADING_ZEROS_0. + */ +static inline int count_leading_zeros(unsigned long x) +{ + if (sizeof(x) == 4) + return BITS_PER_LONG - fls(x); + else + return BITS_PER_LONG - fls64(x); +} + +#define COUNT_LEADING_ZEROS_0 BITS_PER_LONG + +/** + * count_trailing_zeros - Count the number of zeros from the LSB forwards + * @x: The value + * + * Count the number of trailing zeros from the LSB going towards the MSB in @x. + * + * If the LSB of @x is set, the result is 0. + * If only the MSB of @x is set, then the result is BITS_PER_LONG-1. + * If @x is 0 then the result is COUNT_TRAILING_ZEROS_0. + */ +static inline int count_trailing_zeros(unsigned long x) +{ +#define COUNT_TRAILING_ZEROS_0 (-1) + + if (sizeof(x) == 4) + return ffs(x); + else + return (x != 0) ? __ffs(x) : COUNT_TRAILING_ZEROS_0; +} + +#endif /* _LINUX_BITOPS_COUNT_ZEROS_H_ */ -- cgit v1.2.3 From 9b3fedde27d3d63055c43c05e8254e252e58ba48 Mon Sep 17 00:00:00 2001 From: Lukasz Anaczkowski Date: Wed, 9 Sep 2015 15:47:28 +0200 Subject: ACPI / tables: Add acpi_subtable_proc to ACPI table parsers ACPI subtable parsing needs to be extended to allow two or more handlers to be run in the same ACPI table walk, thus adding acpi_subtable_proc structure which stores () ACPI table id () handler that processes table () counter how many items has been processed and passing it to acpi_parse_entries_array() and acpi_table_parse_entries_array(). This is needed to fix CPU enumeration when APIC/X2APIC entries are interleaved. Signed-off-by: Lukasz Anaczkowski Reviewed-by: Marc Zyngier Tested-by: Marc Zyngier Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 19 ++++++++++++++++--- 1 file changed, 16 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 7235c4851460..b0299f8db660 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -131,6 +131,12 @@ static inline void acpi_initrd_override(void *data, size_t size) (!entry) || (unsigned long)entry + sizeof(*entry) > end || \ ((struct acpi_subtable_header *)entry)->length < sizeof(*entry)) +struct acpi_subtable_proc { + int id; + acpi_tbl_entry_handler handler; + int count; +}; + char * __acpi_map_table (unsigned long phys_addr, unsigned long size); void __acpi_unmap_table(char *map, unsigned long size); int early_acpi_boot_init(void); @@ -146,9 +152,16 @@ int __init acpi_parse_entries(char *id, unsigned long table_size, struct acpi_table_header *table_header, int entry_id, unsigned int max_entries); int __init acpi_table_parse_entries(char *id, unsigned long table_size, - int entry_id, - acpi_tbl_entry_handler handler, - unsigned int max_entries); + int entry_id, + acpi_tbl_entry_handler handler, + unsigned int max_entries); +int __init acpi_table_parse_entries(char *id, unsigned long table_size, + int entry_id, + acpi_tbl_entry_handler handler, + unsigned int max_entries); +int __init acpi_table_parse_entries_array(char *id, unsigned long table_size, + struct acpi_subtable_proc *proc, int proc_num, + unsigned int max_entries); int acpi_table_parse_madt(enum acpi_madt_type id, acpi_tbl_entry_handler handler, unsigned int max_entries); -- cgit v1.2.3 From fd76ee4da55abb21babfc69310d321b9cb9a32e0 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 14 Oct 2015 17:43:45 +0300 Subject: net/mlx5_core: Fix internal error detection conditions The detection of a fatal condition has been updated to take into account the state reported by the device or by detecting an all ones read of the firmware version which indicates that the device is not accessible. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 41a32873f608..62b7d439813d 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -393,6 +393,7 @@ struct mlx5_core_health { struct timer_list timer; u32 prev; int miss_counter; + bool sick; struct workqueue_struct *wq; struct work_struct work; }; -- cgit v1.2.3 From 89d44f0a6c732db23b219be708e2fe1e03ee4842 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Wed, 14 Oct 2015 17:43:46 +0300 Subject: net/mlx5_core: Add pci error handlers to mlx5_core driver This patch implement the pci_error_handlers for mlx5_core which allow the driver to recover from PCI error. Once an error is detected in the PCI, the mlx5_pci_err_detected is called and it: 1) Marks the device to be in 'Internal Error' state. 2) Dispatches an event to the mlx5_ib to flush all the outstanding cqes with error. 3) Returns all the on going commands with error. 4) Unloads the driver. Afterwards, the FW is reset and mlx5_pci_slot_reset is called and it enables the device and restore it's pci state. If the later succeeds, mlx5_pci_resume is called, and it loads the SW stack. Signed-off-by: Majd Dibbiny Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 62b7d439813d..9aba8d5139fa 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -487,8 +487,26 @@ struct mlx5_priv { spinlock_t ctx_lock; }; +enum mlx5_device_state { + MLX5_DEVICE_STATE_UP, + MLX5_DEVICE_STATE_INTERNAL_ERROR, +}; + +enum mlx5_interface_state { + MLX5_INTERFACE_STATE_DOWN, + MLX5_INTERFACE_STATE_UP, +}; + +enum mlx5_pci_status { + MLX5_PCI_STATUS_DISABLED, + MLX5_PCI_STATUS_ENABLED, +}; + struct mlx5_core_dev { struct pci_dev *pdev; + /* sync pci state */ + struct mutex pci_status_mutex; + enum mlx5_pci_status pci_status; u8 rev_id; char board_id[MLX5_BOARD_ID_LEN]; struct mlx5_cmd cmd; @@ -497,6 +515,10 @@ struct mlx5_core_dev { u32 hca_caps_max[MLX5_CAP_NUM][MLX5_UN_SZ_DW(hca_cap_union)]; phys_addr_t iseg_base; struct mlx5_init_seg __iomem *iseg; + enum mlx5_device_state state; + /* sync interface state */ + struct mutex intf_state_mutex; + enum mlx5_interface_state interface_state; void (*event) (struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); -- cgit v1.2.3 From e3297246c2c8cf8548ba722da3e3a8104cdcd035 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Wed, 14 Oct 2015 17:43:47 +0300 Subject: net/mlx5_core: Wait for FW readiness on startup On device initialization, wait till firmware indicates that that it is done with initialization before proceeding to initialize the device. Also update initialization segment layout to match driver/firmware interface definitions. Signed-off-by: Eli Cohen Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/device.h | 3 ++- include/linux/mlx5/driver.h | 5 +++++ 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 2a0b95662548..0b473cbfa7ef 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -439,7 +439,8 @@ struct mlx5_init_seg { __be32 cmdq_addr_h; __be32 cmdq_addr_l_sz; __be32 cmd_dbell; - __be32 rsvd1[121]; + __be32 rsvd1[120]; + __be32 initializing; struct health_buffer health; __be32 rsvd2[884]; __be32 health_counter; diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 9aba8d5139fa..5c857f2a20d7 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -826,6 +826,11 @@ void mlx5_core_put_rsc(struct mlx5_core_rsc_common *common); int mlx5_query_odp_caps(struct mlx5_core_dev *dev, struct mlx5_odp_caps *odp_caps); +static inline int fw_initializing(struct mlx5_core_dev *dev) +{ + return ioread32be(&dev->iseg->initializing) >> 31; +} + static inline u32 mlx5_mkey_to_idx(u32 mkey) { return mkey >> 8; -- cgit v1.2.3 From 2b3ddf27f48c8061f0676c5a8796008099945280 Mon Sep 17 00:00:00 2001 From: Jack Morgenstein Date: Wed, 14 Oct 2015 17:43:48 +0300 Subject: net/mlx4_core: Replace VF zero mac with random mac in mlx4_core By design, when no default MAC addresses are set in the Hypervisor for VFs, the VFs are passed zero-macs. When such a MAC is received by the VF, it generates a random MAC address and registers that MAC address with the Hypervisor. This random mac generation is currently done in the mlx4_en module. There is a problem, though, if the mlx4_ib module is loaded by a VF before the mlx4_en module. In this case, for RoCE, mlx4_ib will see the un-replaced zero-mac and register that zero-mac as part of QP1 initialization. Having a zero-mac in the port's MAC table creates problems for a Baseboard Management Console. The BMC occasionally sends packets with a zero-mac destination MAC. If there is a zero-mac present in the port's MAC table, the FW will send such BMC packets to the host driver rather than to the wire, and BMC will stop working. To address this problem, we move the replacement of zero-mac addresses with random-mac addresses to procedure mlx4_slave_cap(), which is part of the driver startup for VFs, and is before activation of mlx4_ib and mlx4_en. As a result, zero-mac addresses will never be registered in the port MAC table by the driver. In addition, when mlx4_en does initialize the net device, it needs to set the NET_ADDR_RANDOM flag in the netdev structure if the address was randomly generated. This is done so that udev on the VM does not create a new device name after each VF probe (VM boot and such). To accomplish this, we add a per-port flag in mlx4_dev which gets set whenever mlx4_core replaces a zero-mac with a randomly-generated mac. This flag is examined when mlx4_en initializes the net-device. Fix was suggested by Matan Barak Signed-off-by: Jack Morgenstein Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index baad4cb8e9b0..5a8677bafe04 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -833,6 +833,7 @@ struct mlx4_dev { struct mlx4_quotas quotas; struct radix_tree_root qp_table_tree; u8 rev_id; + u8 port_random_macs; char board_id[MLX4_BOARD_ID_LEN]; int numa_node; int oper_log_mgm_entry_size; -- cgit v1.2.3 From d5c373eb5610686162ff50429f63f4c00c554799 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 14 Oct 2015 12:07:55 -0700 Subject: posix_cpu_timer: Convert cputimer->running to bool In the next patch in this series, a new field 'checking_timer' will be added to 'struct thread_group_cputimer'. Both this and the existing 'running' integer field are just used as boolean values. To save space in the structure, we can make both of these fields booleans. This is a preparatory patch to convert the existing running integer field to a boolean. Suggested-by: George Spelvin Signed-off-by: Jason Low Reviewed: George Spelvin Cc: Oleg Nesterov Cc: Paul E. McKenney Cc: Frederic Weisbecker Cc: Davidlohr Bueso Cc: Steven Rostedt Cc: hideaki.kimura@hpe.com Cc: terry.rudd@hpe.com Cc: scott.norton@hpe.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444849677-29330-4-git-send-email-jason.low2@hp.com Signed-off-by: Thomas Gleixner --- include/linux/init_task.h | 2 +- include/linux/sched.h | 6 +++--- 2 files changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index e38681f4912d..c43b80f3f875 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -59,7 +59,7 @@ extern struct fs_struct init_fs; .rlim = INIT_RLIMITS, \ .cputimer = { \ .cputime_atomic = INIT_CPUTIME_ATOMIC, \ - .running = 0, \ + .running = false, \ }, \ INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ diff --git a/include/linux/sched.h b/include/linux/sched.h index b7b9501b41af..6c8504ade2ba 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -617,15 +617,15 @@ struct task_cputime_atomic { /** * struct thread_group_cputimer - thread group interval timer counts * @cputime_atomic: atomic thread group interval timers. - * @running: non-zero when there are timers running and - * @cputime receives updates. + * @running: true when there are timers running and + * @cputime_atomic receives updates. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. */ struct thread_group_cputimer { struct task_cputime_atomic cputime_atomic; - int running; + bool running; }; #include -- cgit v1.2.3 From c8d75aa47dd585c9538a8205e9bb9847e12cfb84 Mon Sep 17 00:00:00 2001 From: Jason Low Date: Wed, 14 Oct 2015 12:07:56 -0700 Subject: posix_cpu_timer: Reduce unnecessary sighand lock contention It was found while running a database workload on large systems that significant time was spent trying to acquire the sighand lock. The issue was that whenever an itimer expired, many threads ended up simultaneously trying to send the signal. Most of the time, nothing happened after acquiring the sighand lock because another thread had just already sent the signal and updated the "next expire" time. The fastpath_timer_check() didn't help much since the "next expire" time was updated after the threads exit fastpath_timer_check(). This patch addresses this by having the thread_group_cputimer structure maintain a boolean to signify when a thread in the group is already checking for process wide timers, and adds extra logic in the fastpath to check the boolean. Signed-off-by: Jason Low Reviewed-by: Oleg Nesterov Reviewed-by: George Spelvin Cc: Paul E. McKenney Cc: Frederic Weisbecker Cc: Davidlohr Bueso Cc: Steven Rostedt Cc: hideaki.kimura@hpe.com Cc: terry.rudd@hpe.com Cc: scott.norton@hpe.com Cc: Peter Zijlstra Link: http://lkml.kernel.org/r/1444849677-29330-5-git-send-email-jason.low2@hp.com Signed-off-by: Thomas Gleixner --- include/linux/init_task.h | 1 + include/linux/sched.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/init_task.h b/include/linux/init_task.h index c43b80f3f875..810a34f60424 100644 --- a/include/linux/init_task.h +++ b/include/linux/init_task.h @@ -60,6 +60,7 @@ extern struct fs_struct init_fs; .cputimer = { \ .cputime_atomic = INIT_CPUTIME_ATOMIC, \ .running = false, \ + .checking_timer = false, \ }, \ INIT_PREV_CPUTIME(sig) \ .cred_guard_mutex = \ diff --git a/include/linux/sched.h b/include/linux/sched.h index 6c8504ade2ba..f87559df5b75 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -619,6 +619,8 @@ struct task_cputime_atomic { * @cputime_atomic: atomic thread group interval timers. * @running: true when there are timers running and * @cputime_atomic receives updates. + * @checking_timer: true when a thread in the group is in the + * process of checking for thread group timers. * * This structure contains the version of task_cputime, above, that is * used for thread group CPU timer calculations. @@ -626,6 +628,7 @@ struct task_cputime_atomic { struct thread_group_cputimer { struct task_cputime_atomic cputime_atomic; bool running; + bool checking_timer; }; #include -- cgit v1.2.3 From ae853ddb9ad5e7c01cad3fbf016040acd961f407 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 9 Sep 2015 11:58:59 +0100 Subject: iommu/vt-d: Introduce intel_iommu=pasid28, and pasid_enabled() macro As long as we use an identity mapping to work around the worst of the hardware bugs which caused us to defeature it and change the definition of the capability bit, we *can* use PASID support on the devices which advertised it in bit 28 of the Extended Capability Register. Allow people to do so with 'intel_iommu=pasid28' on the command line. Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 08802b99f057..1d69c1d3aa9a 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -121,7 +121,7 @@ static inline void dmar_writeq(void __iomem *addr, u64 val) #define ecap_srs(e) ((e >> 31) & 0x1) #define ecap_ers(e) ((e >> 30) & 0x1) #define ecap_prs(e) ((e >> 29) & 0x1) -/* PASID support used to be on bit 28 */ +#define ecap_broken_pasid(e) ((e >> 28) & 0x1) #define ecap_dis(e) ((e >> 27) & 0x1) #define ecap_nest(e) ((e >> 26) & 0x1) #define ecap_mts(e) ((e >> 25) & 0x1) -- cgit v1.2.3 From 8a94ade4ce6df22006b96c5c9a8d6d12fce67585 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 24 Mar 2015 14:54:56 +0000 Subject: iommu/vt-d: Add initial support for PASID tables Add CONFIG_INTEL_IOMMU_SVM, and allocate PASID tables on supported hardware. Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 1d69c1d3aa9a..0f38e60d40ad 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -325,6 +325,9 @@ enum { #define VTD_FLAG_TRANS_PRE_ENABLED (1 << 0) #define VTD_FLAG_IRQ_REMAP_PRE_ENABLED (1 << 1) +struct pasid_entry; +struct pasid_state_entry; + struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ u64 reg_phys; /* physical address of hw register set */ @@ -347,6 +350,15 @@ struct intel_iommu { struct root_entry *root_entry; /* virtual address */ struct iommu_flush flush; +#endif +#ifdef CONFIG_INTEL_IOMMU_SVM + /* These are large and need to be contiguous, so we allocate just + * one for now. We'll maybe want to rethink that if we truly give + * devices away to userspace processes (e.g. for DPDK) and don't + * want to trust that userspace will use *only* the PASID it was + * told to. But while it's all driver-arbitrated, we're fine. */ + struct pasid_entry *pasid_table; + struct pasid_state_entry *pasid_state_table; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ @@ -387,6 +399,9 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); +extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu); +extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu); + extern const struct attribute_group *intel_iommu_groups[]; #endif -- cgit v1.2.3 From 2f26e0a9c9860db290d63e9d85c2c8c09813677f Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 9 Sep 2015 11:40:47 +0100 Subject: iommu/vt-d: Add basic SVM PASID support This provides basic PASID support for endpoint devices, tested with a version of the i915 driver. Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 7 ++++ include/linux/intel-iommu.h | 68 ++++++++++++++++++++++++++++++++--- include/linux/intel-svm.h | 82 +++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 152 insertions(+), 5 deletions(-) create mode 100644 include/linux/intel-svm.h (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 7ac17f57250e..0e114bfabeed 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -20,6 +20,13 @@ #define CONTEXT_TT_MULTI_LEVEL 0 #define CONTEXT_TT_DEV_IOTLB 1 #define CONTEXT_TT_PASS_THROUGH 2 +/* Extended context entry types */ +#define CONTEXT_TT_PT_PASID 4 +#define CONTEXT_TT_PT_PASID_DEV_IOTLB 5 +#define CONTEXT_TT_MASK (7ULL << 2) + +#define CONTEXT_PRS (1ULL << 9) +#define CONTEXT_PASIDE (1ULL << 11) struct intel_iommu; struct dmar_domain; diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 0f38e60d40ad..46add607567b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -1,5 +1,9 @@ /* - * Copyright (c) 2006, Intel Corporation. + * Copyright © 2006-2015, Intel Corporation. + * + * Authors: Ashok Raj + * Anil S Keshavamurthy + * David Woodhouse * * This program is free software; you can redistribute it and/or modify it * under the terms and conditions of the GNU General Public License, @@ -13,10 +17,6 @@ * You should have received a copy of the GNU General Public License along with * this program; if not, write to the Free Software Foundation, Inc., 59 Temple * Place - Suite 330, Boston, MA 02111-1307 USA. - * - * Copyright (C) 2006-2008 Intel Corporation - * Author: Ashok Raj - * Author: Anil S Keshavamurthy */ #ifndef _INTEL_IOMMU_H_ @@ -25,7 +25,10 @@ #include #include #include +#include #include +#include +#include #include #include @@ -251,6 +254,9 @@ enum { #define QI_DIOTLB_TYPE 0x3 #define QI_IEC_TYPE 0x4 #define QI_IWD_TYPE 0x5 +#define QI_EIOTLB_TYPE 0x6 +#define QI_PC_TYPE 0x7 +#define QI_DEIOTLB_TYPE 0x8 #define QI_IEC_SELECTIVE (((u64)1) << 4) #define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) @@ -278,6 +284,34 @@ enum { #define QI_DEV_IOTLB_SIZE 1 #define QI_DEV_IOTLB_MAX_INVS 32 +#define QI_PC_PASID(pasid) (((u64)pasid) << 32) +#define QI_PC_DID(did) (((u64)did) << 16) +#define QI_PC_GRAN(gran) (((u64)gran) << 4) + +#define QI_PC_ALL_PASIDS (QI_PC_TYPE | QI_PC_GRAN(0)) +#define QI_PC_PASID_SEL (QI_PC_TYPE | QI_PC_GRAN(1)) + +#define QI_EIOTLB_ADDR(addr) ((u64)(addr) & VTD_PAGE_MASK) +#define QI_EIOTLB_GL(gl) (((u64)gl) << 7) +#define QI_EIOTLB_IH(ih) (((u64)ih) << 6) +#define QI_EIOTLB_AM(am) (((u64)am)) +#define QI_EIOTLB_PASID(pasid) (((u64)pasid) << 32) +#define QI_EIOTLB_DID(did) (((u64)did) << 16) +#define QI_EIOTLB_GRAN(gran) (((u64)gran) << 4) + +#define QI_DEV_EIOTLB_ADDR(a) ((u64)(a) & VTD_PAGE_MASK) +#define QI_DEV_EIOTLB_SIZE (((u64)1) << 11) +#define QI_DEV_EIOTLB_GLOB(g) ((u64)g) +#define QI_DEV_EIOTLB_PASID(p) (((u64)p) << 32) +#define QI_DEV_EIOTLB_SID(sid) ((u64)((sid) & 0xffff) << 32) +#define QI_DEV_EIOTLB_QDEP(qd) (((qd) & 0x1f) << 16) +#define QI_DEV_EIOTLB_MAX_INVS 32 + +#define QI_GRAN_ALL_ALL 0 +#define QI_GRAN_NONG_ALL 1 +#define QI_GRAN_NONG_PASID 2 +#define QI_GRAN_PSI_PASID 3 + struct qi_desc { u64 low, high; }; @@ -359,6 +393,7 @@ struct intel_iommu { * told to. But while it's all driver-arbitrated, we're fine. */ struct pasid_entry *pasid_table; struct pasid_state_entry *pasid_state_table; + struct idr pasid_idr; #endif struct q_inval *qi; /* Queued invalidation info */ u32 *iommu_state; /* Store iommu states between suspend and resume.*/ @@ -399,9 +434,32 @@ extern int qi_submit_sync(struct qi_desc *desc, struct intel_iommu *iommu); extern int dmar_ir_support(void); +#ifdef CONFIG_INTEL_IOMMU_SVM extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu); extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu); +struct intel_svm_dev { + struct list_head list; + struct rcu_head rcu; + struct device *dev; + int users; + u16 did; + u16 dev_iotlb:1; + u16 sid, qdep; +}; + +struct intel_svm { + struct mmu_notifier notifier; + struct mm_struct *mm; + struct intel_iommu *iommu; + int pasid; + struct list_head devs; +}; + +extern int intel_iommu_enable_pasid(struct intel_iommu *iommu, struct intel_svm_dev *sdev); +extern struct intel_iommu *intel_svm_device_to_iommu(struct device *dev); +#endif + extern const struct attribute_group *intel_iommu_groups[]; #endif diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h new file mode 100644 index 000000000000..42f80ad7a7a0 --- /dev/null +++ b/include/linux/intel-svm.h @@ -0,0 +1,82 @@ +/* + * Copyright © 2015 Intel Corporation. + * + * Authors: David Woodhouse + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#ifndef __INTEL_SVM_H__ +#define __INTEL_SVM_H__ + +#ifdef CONFIG_INTEL_IOMMU_SVM + +struct device; + +/** + * intel_svm_bind_mm() - Bind the current process to a PASID + * @dev: Device to be granted acccess + * @pasid: Address for allocated PASID + * + * This function attempts to enable PASID support for the given device. + * If the @pasid argument is non-%NULL, a PASID is allocated for access + * to the MM of the current process. + * + * By using a %NULL value for the @pasid argument, this function can + * be used to simply validate that PASID support is available for the + * given device — i.e. that it is behind an IOMMU which has the + * requisite support, and is enabled. + * + * Page faults are handled transparently by the IOMMU code, and there + * should be no need for the device driver to be involved. If a page + * fault cannot be handled (i.e. is an invalid address rather than + * just needs paging in), then the page request will be completed by + * the core IOMMU code with appropriate status, and the device itself + * can then report the resulting fault to its driver via whatever + * mechanism is appropriate. + * + * Multiple calls from the same process may result in the same PASID + * being re-used. A reference count is kept. + */ +extern int intel_svm_bind_mm(struct device *dev, int *pasid); + +/** + * intel_svm_unbind_mm() - Unbind a specified PASID + * @dev: Device for which PASID was allocated + * @pasid: PASID value to be unbound + * + * This function allows a PASID to be retired when the device no + * longer requires access to the address space of a given process. + * + * If the use count for the PASID in question reaches zero, the + * PASID is revoked and may no longer be used by hardware. + * + * Device drivers are required to ensure that no access (including + * page requests) is currently outstanding for the PASID in question, + * before calling this function. + */ +extern int intel_svm_unbind_mm(struct device *dev, int pasid); + +#else /* CONFIG_INTEL_IOMMU_SVM */ + +static inline int intel_svm_bind_mm(struct device *dev, int *pasid) +{ + return -ENOSYS; +} + +static inline int intel_svm_unbind_mm(struct device *dev, int pasid) +{ + BUG(); +} +#endif /* CONFIG_INTEL_IOMMU_SVM */ + +#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL)) + +#endif /* __INTEL_SVM_H__ */ -- cgit v1.2.3 From 907fea3491d52063bb37b1f1ce0cf8a4ae70944c Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 13 Oct 2015 14:11:13 +0100 Subject: iommu/vt-d: Implement deferred invalidate for SVM Signed-off-by: David Woodhouse --- include/linux/dma_remapping.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/dma_remapping.h b/include/linux/dma_remapping.h index 0e114bfabeed..187c10299722 100644 --- a/include/linux/dma_remapping.h +++ b/include/linux/dma_remapping.h @@ -25,6 +25,7 @@ #define CONTEXT_TT_PT_PASID_DEV_IOTLB 5 #define CONTEXT_TT_MASK (7ULL << 2) +#define CONTEXT_DINVE (1ULL << 8) #define CONTEXT_PRS (1ULL << 9) #define CONTEXT_PASIDE (1ULL << 11) -- cgit v1.2.3 From 1208225cf48fa3b170b6dfe7369f15c295260755 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 7 Oct 2015 15:37:03 +0100 Subject: iommu/vt-d: Generalise DMAR MSI setup to allow for page request events Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 46add607567b..f16a2b9124d1 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -60,6 +60,14 @@ #define DMAR_IQA_REG 0x90 /* Invalidation queue addr register */ #define DMAR_ICS_REG 0x9c /* Invalidation complete status register */ #define DMAR_IRTA_REG 0xb8 /* Interrupt remapping table addr register */ +#define DMAR_PQH_REG 0xc0 /* Page request queue head register */ +#define DMAR_PQT_REG 0xc8 /* Page request queue tail register */ +#define DMAR_PQA_REG 0xd0 /* Page request queue address register */ +#define DMAR_PRS_REG 0xdc /* Page request status register */ +#define DMAR_PECTL_REG 0xe0 /* Page request event control register */ +#define DMAR_PEDATA_REG 0xe4 /* Page request event interrupt data register */ +#define DMAR_PEADDR_REG 0xe8 /* Page request event interrupt addr register */ +#define DMAR_PEUADDR_REG 0xec /* Page request event Upper address register */ #define OFFSET_STRIDE (9) @@ -373,7 +381,7 @@ struct intel_iommu { int seq_id; /* sequence id of the iommu */ int agaw; /* agaw of this iommu */ int msagaw; /* max sagaw of this iommu */ - unsigned int irq; + unsigned int irq, pr_irq; u16 segment; /* PCI segment# */ unsigned char name[13]; /* Device Name */ -- cgit v1.2.3 From 8595798ca34d186d39abcb277591e541776c0ef5 Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Thu, 15 Oct 2015 10:30:36 -0400 Subject: jbd2: gate checksum calculations on crc driver presence, not sb flags Change the journal's checksum functions to gate on whether or not the crc32c driver is loaded, and gate the loading on the superblock bits. This prevents a journal crash if someone loads a journal in no-csum mode and then randomizes the superblock, thus flipping on the feature bits. Tested-By: Nikolay Borisov Reported-by: Nikolay Borisov Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 13 +++++++++---- 1 file changed, 9 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index df07e78487d5..6da6f89722e1 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1338,13 +1338,18 @@ static inline int tid_geq(tid_t x, tid_t y) extern int jbd2_journal_blocks_per_page(struct inode *inode); extern size_t journal_tag_bytes(journal_t *journal); +static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j) +{ + return JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2) || + JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3); +} + static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) { - if (JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V2) || - JBD2_HAS_INCOMPAT_FEATURE(journal, JBD2_FEATURE_INCOMPAT_CSUM_V3)) - return 1; + WARN_ON_ONCE(jbd2_journal_has_csum_v2or3_feature(journal) && + journal->j_chksum_driver == NULL); - return 0; + return journal->j_chksum_driver != NULL; } /* -- cgit v1.2.3 From a222a7f0bb6c94c31cc9c755110593656f19de89 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 7 Oct 2015 23:35:18 +0100 Subject: iommu/vt-d: Implement page request handling Largely based on the driver-mode implementation by Jesse Barnes. Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index f16a2b9124d1..e5b80d31eb1b 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -265,6 +265,8 @@ enum { #define QI_EIOTLB_TYPE 0x6 #define QI_PC_TYPE 0x7 #define QI_DEIOTLB_TYPE 0x8 +#define QI_PGRP_RESP_TYPE 0x9 +#define QI_PSTRM_RESP_TYPE 0xa #define QI_IEC_SELECTIVE (((u64)1) << 4) #define QI_IEC_IIDEX(idx) (((u64)(idx & 0xffff) << 32)) @@ -315,6 +317,25 @@ enum { #define QI_DEV_EIOTLB_QDEP(qd) (((qd) & 0x1f) << 16) #define QI_DEV_EIOTLB_MAX_INVS 32 +#define QI_PGRP_IDX(idx) (((u64)(idx)) << 55) +#define QI_PGRP_PRIV(priv) (((u64)(priv)) << 32) +#define QI_PGRP_RESP_CODE(res) ((u64)(res)) +#define QI_PGRP_PASID(pasid) (((u64)(pasid)) << 32) +#define QI_PGRP_DID(did) (((u64)(did)) << 16) +#define QI_PGRP_PASID_P(p) (((u64)(p)) << 4) + +#define QI_PSTRM_ADDR(addr) (((u64)(addr)) & VTD_PAGE_MASK) +#define QI_PSTRM_DEVFN(devfn) (((u64)(devfn)) << 4) +#define QI_PSTRM_RESP_CODE(res) ((u64)(res)) +#define QI_PSTRM_IDX(idx) (((u64)(idx)) << 55) +#define QI_PSTRM_PRIV(priv) (((u64)(priv)) << 32) +#define QI_PSTRM_BUS(bus) (((u64)(bus)) << 24) +#define QI_PSTRM_PASID(pasid) (((u64)(pasid)) << 4) + +#define QI_RESP_SUCCESS 0x0 +#define QI_RESP_INVALID 0x1 +#define QI_RESP_FAILURE 0xf + #define QI_GRAN_ALL_ALL 0 #define QI_GRAN_NONG_ALL 1 #define QI_GRAN_NONG_PASID 2 @@ -369,6 +390,7 @@ enum { struct pasid_entry; struct pasid_state_entry; +struct page_req_dsc; struct intel_iommu { void __iomem *reg; /* Pointer to hardware regs, virtual addr */ @@ -401,6 +423,8 @@ struct intel_iommu { * told to. But while it's all driver-arbitrated, we're fine. */ struct pasid_entry *pasid_table; struct pasid_state_entry *pasid_state_table; + struct page_req_dsc *prq; + unsigned char prq_name[16]; /* Name for PRQ interrupt */ struct idr pasid_idr; #endif struct q_inval *qi; /* Queued invalidation info */ @@ -445,6 +469,8 @@ extern int dmar_ir_support(void); #ifdef CONFIG_INTEL_IOMMU_SVM extern int intel_svm_alloc_pasid_tables(struct intel_iommu *iommu); extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu); +extern int intel_svm_enable_prq(struct intel_iommu *iommu); +extern int intel_svm_finish_prq(struct intel_iommu *iommu); struct intel_svm_dev { struct list_head list; -- cgit v1.2.3 From 0204a49609824163092c32a8aeb073f7e9acc76d Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 13 Oct 2015 17:18:10 +0100 Subject: iommu/vt-d: Add callback to device driver on page faults Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 3 +++ include/linux/intel-svm.h | 21 ++++++++++++++++++--- 2 files changed, 21 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index e5b80d31eb1b..57be14fce640 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -472,10 +472,13 @@ extern int intel_svm_free_pasid_tables(struct intel_iommu *iommu); extern int intel_svm_enable_prq(struct intel_iommu *iommu); extern int intel_svm_finish_prq(struct intel_iommu *iommu); +struct svm_dev_ops; + struct intel_svm_dev { struct list_head list; struct rcu_head rcu; struct device *dev; + struct svm_dev_ops *ops; int users; u16 did; u16 dev_iotlb:1; diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 42f80ad7a7a0..239f8a13a332 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -20,10 +20,23 @@ struct device; +struct svm_dev_ops { + void (*fault_cb)(struct device *dev, int pasid, u64 address, + u32 private, int rwxp, int response); +}; + +/* Values for rxwp in fault_cb callback */ +#define SVM_REQ_READ (1<<3) +#define SVM_REQ_WRITE (1<<2) +#define SVM_REQ_EXEC (1<<1) +#define SVM_REQ_PRIV (1<<0) + /** * intel_svm_bind_mm() - Bind the current process to a PASID * @dev: Device to be granted acccess * @pasid: Address for allocated PASID + * @flags: Flags. Later for requesting supervisor mode, etc. + * @ops: Callbacks to device driver * * This function attempts to enable PASID support for the given device. * If the @pasid argument is non-%NULL, a PASID is allocated for access @@ -45,7 +58,8 @@ struct device; * Multiple calls from the same process may result in the same PASID * being re-used. A reference count is kept. */ -extern int intel_svm_bind_mm(struct device *dev, int *pasid); +extern int intel_svm_bind_mm(struct device *dev, int *pasid, int flags, + struct svm_dev_ops *ops); /** * intel_svm_unbind_mm() - Unbind a specified PASID @@ -66,7 +80,8 @@ extern int intel_svm_unbind_mm(struct device *dev, int pasid); #else /* CONFIG_INTEL_IOMMU_SVM */ -static inline int intel_svm_bind_mm(struct device *dev, int *pasid) +static inline int intel_svm_bind_mm(struct device *dev, int *pasid, + int flags, struct svm_dev_ops *ops) { return -ENOSYS; } @@ -77,6 +92,6 @@ static inline int intel_svm_unbind_mm(struct device *dev, int pasid) } #endif /* CONFIG_INTEL_IOMMU_SVM */ -#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL)) +#define intel_svm_available(dev) (!intel_svm_bind_mm((dev), NULL, 0, NULL)) #endif /* __INTEL_SVM_H__ */ -- cgit v1.2.3 From 569e4f7782fb92d0e1b395b5fb01de642dd74dcf Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 15 Oct 2015 13:59:14 +0100 Subject: iommu/vt-d: Implement SVM_FLAG_PRIVATE_PASID to allocate unique PASIDs Signed-off-by: David Woodhouse --- include/linux/intel-iommu.h | 1 + include/linux/intel-svm.h | 11 +++++++++++ 2 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/intel-iommu.h b/include/linux/intel-iommu.h index 57be14fce640..821273ca4873 100644 --- a/include/linux/intel-iommu.h +++ b/include/linux/intel-iommu.h @@ -489,6 +489,7 @@ struct intel_svm { struct mmu_notifier notifier; struct mm_struct *mm; struct intel_iommu *iommu; + int flags; int pasid; struct list_head devs; }; diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 239f8a13a332..dd94ab45a4db 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -31,6 +31,17 @@ struct svm_dev_ops { #define SVM_REQ_EXEC (1<<1) #define SVM_REQ_PRIV (1<<0) + +/* + * The SVM_FLAG_PRIVATE_PASID flag requests a PASID which is *not* the "main" + * PASID for the current process. Even if a PASID already exists, a new one + * will be allocated. And the PASID allocated with SVM_FLAG_PRIVATE_PASID + * will not be given to subsequent callers. This facility allows a driver to + * disambiguate between multiple device contexts which access the same MM, + * if there is no other way to do so. It should be used sparingly, if at all. + */ +#define SVM_FLAG_PRIVATE_PASID (1<<0) + /** * intel_svm_bind_mm() - Bind the current process to a PASID * @dev: Device to be granted acccess -- cgit v1.2.3 From 0db2e5d18f76a66ca945447d9f610bed0a94ca5a Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Thu, 1 Oct 2015 20:13:58 +0100 Subject: iommu: Implement common IOMMU ops for DMA mapping Taking inspiration from the existing arch/arm code, break out some generic functions to interface the DMA-API to the IOMMU-API. This will do the bulk of the heavy lifting for IOMMU-backed dma-mapping. Since associating an IOVA allocator with an IOMMU domain is a fairly common need, rather than introduce yet another private structure just to do this for ourselves, extend the top-level struct iommu_domain with the notion. A simple opaque cookie allows reuse by other IOMMU API users with their various different incompatible allocator types. Signed-off-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/dma-iommu.h | 85 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/iommu.h | 1 + 2 files changed, 86 insertions(+) create mode 100644 include/linux/dma-iommu.h (limited to 'include/linux') diff --git a/include/linux/dma-iommu.h b/include/linux/dma-iommu.h new file mode 100644 index 000000000000..fc481037478a --- /dev/null +++ b/include/linux/dma-iommu.h @@ -0,0 +1,85 @@ +/* + * Copyright (C) 2014-2015 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ +#ifndef __DMA_IOMMU_H +#define __DMA_IOMMU_H + +#ifdef __KERNEL__ +#include + +#ifdef CONFIG_IOMMU_DMA +#include + +int iommu_dma_init(void); + +/* Domain management interface for IOMMU drivers */ +int iommu_get_dma_cookie(struct iommu_domain *domain); +void iommu_put_dma_cookie(struct iommu_domain *domain); + +/* Setup call for arch DMA mapping code */ +int iommu_dma_init_domain(struct iommu_domain *domain, dma_addr_t base, u64 size); + +/* General helpers for DMA-API <-> IOMMU-API interaction */ +int dma_direction_to_prot(enum dma_data_direction dir, bool coherent); + +/* + * These implement the bulk of the relevant DMA mapping callbacks, but require + * the arch code to take care of attributes and cache maintenance + */ +struct page **iommu_dma_alloc(struct device *dev, size_t size, + gfp_t gfp, int prot, dma_addr_t *handle, + void (*flush_page)(struct device *, const void *, phys_addr_t)); +void iommu_dma_free(struct device *dev, struct page **pages, size_t size, + dma_addr_t *handle); + +int iommu_dma_mmap(struct page **pages, size_t size, struct vm_area_struct *vma); + +dma_addr_t iommu_dma_map_page(struct device *dev, struct page *page, + unsigned long offset, size_t size, int prot); +int iommu_dma_map_sg(struct device *dev, struct scatterlist *sg, + int nents, int prot); + +/* + * Arch code with no special attribute handling may use these + * directly as DMA mapping callbacks for simplicity + */ +void iommu_dma_unmap_page(struct device *dev, dma_addr_t handle, size_t size, + enum dma_data_direction dir, struct dma_attrs *attrs); +void iommu_dma_unmap_sg(struct device *dev, struct scatterlist *sg, int nents, + enum dma_data_direction dir, struct dma_attrs *attrs); +int iommu_dma_supported(struct device *dev, u64 mask); +int iommu_dma_mapping_error(struct device *dev, dma_addr_t dma_addr); + +#else + +struct iommu_domain; + +static inline int iommu_dma_init(void) +{ + return 0; +} + +static inline int iommu_get_dma_cookie(struct iommu_domain *domain) +{ + return -ENODEV; +} + +static inline void iommu_put_dma_cookie(struct iommu_domain *domain) +{ +} + +#endif /* CONFIG_IOMMU_DMA */ +#endif /* __KERNEL__ */ +#endif /* __DMA_IOMMU_H */ diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f9c1b6d0f2e4..f174506a5343 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -81,6 +81,7 @@ struct iommu_domain { iommu_fault_handler_t handler; void *handler_token; struct iommu_domain_geometry geometry; + void *iova_cookie; }; enum iommu_cap { -- cgit v1.2.3 From 5cec753709adf1a20c8b15edf8e5245cf4fd4e82 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Thu, 15 Oct 2015 15:52:15 +0100 Subject: iommu/vt-d: Implement SVM_FLAG_SUPERVISOR_MODE for kernel access MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is only usable for the static 1:1 mapping of physical memory. Any access to vmalloc or module regions will require some way of doing an IOTLB flush. It's theoretically possible to hook into the tlb_flush_kernel_range() function, but that seems like overkill — most of the addresses accessed through a kernel PASID *will* be in the 1:1 mapping. If we really need to allow access to more interesting kernel regions, then the answer will probably be an explicit IOTLB flush call after use, akin to the DMA API's unmap function. In fact, it might be worth introducing that sooner rather than later, and making it just BUG() if the address isn't in the static 1:1 mapping. Signed-off-by: David Woodhouse --- include/linux/intel-svm.h | 15 ++++++++++++++- 1 file changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index dd94ab45a4db..0a48ccff24ae 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -40,7 +40,20 @@ struct svm_dev_ops { * disambiguate between multiple device contexts which access the same MM, * if there is no other way to do so. It should be used sparingly, if at all. */ -#define SVM_FLAG_PRIVATE_PASID (1<<0) +#define SVM_FLAG_PRIVATE_PASID (1<<0) + +/* + * The SVM_FLAG_SUPERVISOR_MODE flag requests a PASID which can be used only + * for access to kernel addresses. No IOTLB flushes are automatically done + * for kernel mappings; it is valid only for access to the kernel's static + * 1:1 mapping of physical memory — not to vmalloc or even module mappings. + * A future API addition may permit the use of such ranges, by means of an + * explicit IOTLB flush call (akin to the DMA API's unmap method). + * + * It is unlikely that we will ever hook into flush_tlb_kernel_range() to + * do such IOTLB flushes automatically. + */ +#define SVM_FLAG_SUPERVISOR_MODE (1<<1) /** * intel_svm_bind_mm() - Bind the current process to a PASID -- cgit v1.2.3 From b02176f30cd30acccd3b633ab7d9aed8b5da52ff Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Tue, 8 Sep 2015 12:20:22 -0400 Subject: block: don't release bdi while request_queue has live references bdi's are initialized in two steps, bdi_init() and bdi_register(), but destroyed in a single step by bdi_destroy() which, for a bdi embedded in a request_queue, is called during blk_cleanup_queue() which makes the queue invisible and starts the draining of remaining usages. A request_queue's user can access the congestion state of the embedded bdi as long as it holds a reference to the queue. As such, it may access the congested state of a queue which finished blk_cleanup_queue() but hasn't reached blk_release_queue() yet. Because the congested state was embedded in backing_dev_info which in turn is embedded in request_queue, accessing the congested state after bdi_destroy() was called was fine. The bdi was destroyed but the memory region for the congested state remained accessible till the queue got released. a13f35e87140 ("writeback: don't embed root bdi_writeback_congested in bdi_writeback") changed the situation. Now, the root congested state which is expected to be pinned while request_queue remains accessible is separately reference counted and the base ref is put during bdi_destroy(). This means that the root congested state may go away prematurely while the queue is between bdi_dstroy() and blk_cleanup_queue(), which was detected by Andrey's KASAN tests. The root cause of this problem is that bdi doesn't distinguish the two steps of destruction, unregistration and release, and now the root congested state actually requires a separate release step. To fix the issue, this patch separates out bdi_unregister() and bdi_exit() from bdi_destroy(). bdi_unregister() is called from blk_cleanup_queue() and bdi_exit() from blk_release_queue(). bdi_destroy() is now just a simple wrapper calling the two steps back-to-back. While at it, the prototype of bdi_destroy() is moved right below bdi_setup_and_register() so that the counterpart operations are located together. Signed-off-by: Tejun Heo Fixes: a13f35e87140 ("writeback: don't embed root bdi_writeback_congested in bdi_writeback") Cc: stable@vger.kernel.org # v4.2+ Reported-and-tested-by: Andrey Konovalov Link: http://lkml.kernel.org/g/CAAeHK+zUJ74Zn17=rOyxacHU18SgCfC6bsYW=6kCY5GXJBwGfQ@mail.gmail.com Reviewed-by: Jan Kara Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 78677e5a65bf..c85f74946a8b 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -19,13 +19,17 @@ #include int __must_check bdi_init(struct backing_dev_info *bdi); -void bdi_destroy(struct backing_dev_info *bdi); +void bdi_exit(struct backing_dev_info *bdi); __printf(3, 4) int bdi_register(struct backing_dev_info *bdi, struct device *parent, const char *fmt, ...); int bdi_register_dev(struct backing_dev_info *bdi, dev_t dev); +void bdi_unregister(struct backing_dev_info *bdi); + int __must_check bdi_setup_and_register(struct backing_dev_info *, char *); +void bdi_destroy(struct backing_dev_info *bdi); + void wb_start_writeback(struct bdi_writeback *wb, long nr_pages, bool range_cyclic, enum wb_reason reason); void wb_start_background_writeback(struct bdi_writeback *wb); -- cgit v1.2.3 From 36022770de6cf9a403c40a68712ed2d2ea2746be Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:34 +0800 Subject: nfs42: add CLONE xdr functions xdr definitions per draft-ietf-nfsv4-minorversion2-38.txt Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 2 ++ include/linux/nfs_xdr.h | 19 +++++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index 00121f298269..c0c695b634d0 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -130,6 +130,7 @@ enum nfs_opnum4 { OP_READ_PLUS = 68, OP_SEEK = 69, OP_WRITE_SAME = 70, + OP_CLONE = 71, OP_ILLEGAL = 10044, }; @@ -501,6 +502,7 @@ enum { NFSPROC4_CLNT_ALLOCATE, NFSPROC4_CLNT_DEALLOCATE, NFSPROC4_CLNT_LAYOUTSTATS, + NFSPROC4_CLNT_CLONE, }; /* nfs41 types */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 52faf7e96c65..ac678b7a65ed 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -359,6 +359,25 @@ struct nfs42_layoutstat_data { struct nfs42_layoutstat_res res; }; +struct nfs42_clone_args { + struct nfs4_sequence_args seq_args; + struct nfs_fh *src_fh; + struct nfs_fh *dst_fh; + nfs4_stateid src_stateid; + nfs4_stateid dst_stateid; + __u64 src_offset; + __u64 dst_offset; + __u64 count; + const u32 *dst_bitmask; +}; + +struct nfs42_clone_res { + struct nfs4_sequence_res seq_res; + unsigned int rpc_status; + struct nfs_fattr *dst_fattr; + const struct nfs_server *server; +}; + struct stateowner_id { __u64 create_time; __u32 uniquifier; -- cgit v1.2.3 From e5341f3a5762d17be9cdd06257c02c0098bdcab8 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:35 +0800 Subject: nfs42: add CLONE proc functions Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs_fs_sb.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index 570a7df2775b..a50de1002b20 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -243,5 +243,6 @@ struct nfs_server { #define NFS_CAP_ALLOCATE (1U << 20) #define NFS_CAP_DEALLOCATE (1U << 21) #define NFS_CAP_LAYOUTSTATS (1U << 22) +#define NFS_CAP_CLONE (1U << 23) #endif -- cgit v1.2.3 From 2a92ee92d4545448066fb664674c0ae5a9d5ea99 Mon Sep 17 00:00:00 2001 From: Peng Tao Date: Sat, 26 Sep 2015 02:24:37 +0800 Subject: nfs: get clone_blksize when probing fsinfo NFSv42 CLONE operation is supposed to respect it. Signed-off-by: Peng Tao Signed-off-by: Trond Myklebust --- include/linux/nfs4.h | 1 + include/linux/nfs_fs_sb.h | 1 + include/linux/nfs_xdr.h | 1 + 3 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs4.h b/include/linux/nfs4.h index c0c695b634d0..e7e78537aea2 100644 --- a/include/linux/nfs4.h +++ b/include/linux/nfs4.h @@ -422,6 +422,7 @@ enum lock_type4 { #define FATTR4_WORD2_LAYOUT_TYPES (1UL << 0) #define FATTR4_WORD2_LAYOUT_BLKSIZE (1UL << 1) #define FATTR4_WORD2_MDSTHRESHOLD (1UL << 4) +#define FATTR4_WORD2_CLONE_BLKSIZE (1UL << 13) #define FATTR4_WORD2_SECURITY_LABEL (1UL << 16) /* MDS threshold bitmap bits */ diff --git a/include/linux/nfs_fs_sb.h b/include/linux/nfs_fs_sb.h index a50de1002b20..2469ab0bb3a1 100644 --- a/include/linux/nfs_fs_sb.h +++ b/include/linux/nfs_fs_sb.h @@ -147,6 +147,7 @@ struct nfs_server { unsigned int acdirmax; unsigned int namelen; unsigned int options; /* extra options enabled by mount */ + unsigned int clone_blksize; /* granularity of a CLONE operation */ #define NFS_OPTION_FSCACHE 0x00000001 /* - local caching enabled */ #define NFS_OPTION_MIGRATION 0x00000002 /* - NFSv4 migration enabled */ diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index ac678b7a65ed..92ff445e60a0 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -141,6 +141,7 @@ struct nfs_fsinfo { __u32 lease_time; /* in seconds */ __u32 layouttype; /* supported pnfs layout driver */ __u32 blksize; /* preferred pnfs io block size */ + __u32 clone_blksize; /* granularity of a CLONE operation */ }; struct nfs_fsstat { -- cgit v1.2.3 From 0de0942db2b36dd91c088a7950398d2e87f23b23 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 15 Oct 2015 16:41:49 -0400 Subject: cgroup: make cgroup->nr_populated count the number of populated css_sets Currently, cgroup->nr_populated counts whether the cgroup has any css_sets linked to it and the number of children which has non-zero ->nr_populated. This works because a css_set's refcnt converges with the number of tasks linked to it and thus there's no css_set linked to a cgroup if it doesn't have any live tasks. To help tracking resource usage of zombie tasks, putting the ref of css_set will be separated from disassociating the task from the css_set which means that a cgroup may have css_sets linked to it even when it doesn't have any live tasks. This patch updates cgroup->nr_populated so that for the cgroup itself it counts the number of css_sets which have tasks associated with them so that empty css_sets don't skew the populated test. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index df589a097539..17444505c870 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -232,10 +232,10 @@ struct cgroup { int id; /* - * If this cgroup contains any tasks, it contributes one to - * populated_cnt. All children with non-zero popuplated_cnt of - * their own contribute one. The count is zero iff there's no task - * in this cgroup or its subtree. + * Each non-empty css_set associated with this cgroup contributes + * one to populated_cnt. All children with non-zero popuplated_cnt + * of their own contribute one. The count is zero iff there's no + * task in this cgroup or its subtree. */ int populated_cnt; -- cgit v1.2.3 From 27bd4dbb8d51c476298e62bd088225317b7853de Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 15 Oct 2015 16:41:50 -0400 Subject: cgroup: replace cgroup_has_tasks() with cgroup_is_populated() Currently, cgroup_has_tasks() tests whether the target cgroup has any css_set linked to it. This works because a css_set's refcnt converges with the number of tasks linked to it and thus there's no css_set linked to a cgroup if it doesn't have any live tasks. To help tracking resource usage of zombie tasks, putting the ref of css_set will be separated from disassociating the task from the css_set which means that a cgroup may have css_sets linked to it even when it doesn't have any live tasks. This patch replaces cgroup_has_tasks() with cgroup_is_populated() which tests cgroup->nr_populated instead which locally counts the number of populated css_sets. Unlike cgroup_has_tasks(), cgroup_is_populated() is recursive - if any of the descendants is populated, the cgroup is populated too. While this changes the meaning of the test, all the existing users are okay with the change. While at it, replace the open-coded ->populated_cnt test in cgroup_events_show() with cgroup_is_populated(). Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko --- include/linux/cgroup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index e9c3eac074e2..bdfdb3a1a83c 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -456,9 +456,9 @@ static inline struct cgroup *task_cgroup(struct task_struct *task, } /* no synchronization, the result can only be used as a hint */ -static inline bool cgroup_has_tasks(struct cgroup *cgrp) +static inline bool cgroup_is_populated(struct cgroup *cgrp) { - return !list_empty(&cgrp->cset_links); + return cgrp->populated_cnt; } /* returns ino associated with a cgroup */ -- cgit v1.2.3 From ed27b9f7a17ddfbc007e16d4d11f33dff4fc2de7 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 15 Oct 2015 16:41:52 -0400 Subject: cgroup: don't hold css_set_rwsem across css task iteration css_sets are synchronized through css_set_rwsem but the locking scheme is kinda bizarre. The hot paths - fork and exit - have to write lock the rwsem making the rw part pointless; furthermore, many readers already hold cgroup_mutex. One of the readers is css task iteration. It read locks the rwsem over the entire duration of iteration. This leads to silly locking behavior. When cpuset tries to migrate processes of a cgroup to a different NUMA node, css_set_rwsem is held across the entire migration attempt which can take a long time locking out forking, exiting and other cgroup operations. This patch updates css task iteration so that it locks css_set_rwsem only while the iterator is being advanced. css task iteration involves two levels - css_set and task iteration. As css_sets in use are practically immutable, simply pinning the current one is enough for resuming iteration afterwards. Task iteration is tricky as tasks may leave their css_set while iteration is in progress. This is solved by keeping track of active iterators and advancing them if their next task leaves its css_set. v2: put_task_struct() in css_task_iter_next() moved outside css_set_rwsem. A later patch will add cgroup operations to task_struct free path which may grab the same lock and this avoids deadlock possibilities. css_set_move_task() updated to use list_for_each_entry_safe() when walking task_iters and advancing them. This is necessary as advancing an iter may remove it from the list. Signed-off-by: Tejun Heo --- include/linux/cgroup-defs.h | 3 +++ include/linux/cgroup.h | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 17444505c870..62413c3e2f4b 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -211,6 +211,9 @@ struct css_set { */ struct list_head e_cset_node[CGROUP_SUBSYS_COUNT]; + /* all css_task_iters currently walking this cset */ + struct list_head task_iters; + /* For RCU-protected deletion */ struct rcu_head rcu_head; }; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index bdfdb3a1a83c..a9dcf0e76865 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -42,6 +42,10 @@ struct css_task_iter { struct list_head *task_pos; struct list_head *tasks_head; struct list_head *mg_tasks_head; + + struct css_set *cur_cset; + struct task_struct *cur_task; + struct list_head iters_node; /* css_set->task_iters */ }; extern struct cgroup_root cgrp_dfl_root; -- cgit v1.2.3 From f0d9a5f175753a371bc7fdff0d584a8d9cd72bb0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 15 Oct 2015 16:41:53 -0400 Subject: cgroup: make css_set_rwsem a spinlock and rename it to css_set_lock css_set_rwsem is the inner lock protecting css_sets and is accessed from hot paths such as fork and exit. Internally, it has no reason to be a rwsem or even mutex. There are no internal blocking operations while holding it. This was rwsem because css task iteration used to expose it to external iterator users. As the previous patch updated css task iteration such that the locking is not leaked to its users, there's no reason to keep it a rwsem. This patch converts css_set_rwsem to a spinlock and rename it to css_set_lock. It uses bh-safe operations as a planned usage needs to access it from RCU callback context. Signed-off-by: Tejun Heo --- include/linux/cgroup.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index a9dcf0e76865..46020735bcbb 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -13,7 +13,6 @@ #include #include #include -#include #include #include #include @@ -367,11 +366,11 @@ static inline void css_put_many(struct cgroup_subsys_state *css, unsigned int n) */ #ifdef CONFIG_PROVE_RCU extern struct mutex cgroup_mutex; -extern struct rw_semaphore css_set_rwsem; +extern spinlock_t css_set_lock; #define task_css_set_check(task, __c) \ rcu_dereference_check((task)->cgroups, \ lockdep_is_held(&cgroup_mutex) || \ - lockdep_is_held(&css_set_rwsem) || \ + lockdep_is_held(&css_set_lock) || \ ((task)->flags & PF_EXITING) || (__c)) #else #define task_css_set_check(task, __c) \ -- cgit v1.2.3 From 2e91fa7f6d451e3ea9fec999065d2fd199691f9d Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 15 Oct 2015 16:41:53 -0400 Subject: cgroup: keep zombies associated with their original cgroups cgroup_exit() is called when a task exits and disassociates the exiting task from its cgroups and half-attach it to the root cgroup. This is unnecessary and undesirable. No controller actually needs an exiting task to be disassociated with non-root cgroups. Both cpu and perf_event controllers update the association to the root cgroup from their exit callbacks just to keep consistent with the cgroup core behavior. Also, this disassociation makes it difficult to track resources held by zombies or determine where the zombies came from. Currently, pids controller is completely broken as it uncharges on exit and zombies always escape the resource restriction. With cgroup association being reset on exit, fixing it is pretty painful. There's no reason to reset cgroup membership on exit. The zombie can be removed from its css_set so that it doesn't show up on "cgroup.procs" and thus can't be migrated or interfere with cgroup removal. It can still pin and point to the css_set so that its cgroup membership is maintained. This patch makes cgroup core keep zombies associated with their cgroups at the time of exit. * Previous patches decoupled populated_cnt tracking from css_set lifetime, so a dying task can be simply unlinked from its css_set while pinning and pointing to the css_set. This keeps css_set association from task side alive while hiding it from "cgroup.procs" and populated_cnt tracking. The css_set reference is dropped when the task_struct is freed. * ->exit() callback no longer needs the css arguments as the associated css never changes once PF_EXITING is set. Removed. * cpu and perf_events controllers no longer need ->exit() callbacks. There's no reason to explicitly switch away on exit. The final schedule out is enough. The callbacks are removed. * On traditional hierarchies, nothing changes. "/proc/PID/cgroup" still reports "/" for all zombies. On the default hierarchy, "/proc/PID/cgroup" keeps reporting the cgroup that the task belonged to at the time of exit. If the cgroup gets removed before the task is reaped, " (deleted)" is appended. v2: Build brekage due to missing dummy cgroup_free() when !CONFIG_CGROUP fixed. Signed-off-by: Tejun Heo Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo --- include/linux/cgroup-defs.h | 4 +--- include/linux/cgroup.h | 2 ++ 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 62413c3e2f4b..6a1ab64ee5f9 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -435,9 +435,7 @@ struct cgroup_subsys { int (*can_fork)(struct task_struct *task, void **priv_p); void (*cancel_fork)(struct task_struct *task, void *priv); void (*fork)(struct task_struct *task, void *priv); - void (*exit)(struct cgroup_subsys_state *css, - struct cgroup_subsys_state *old_css, - struct task_struct *task); + void (*exit)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); int early_init; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 46020735bcbb..22e3754f89c5 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -102,6 +102,7 @@ extern void cgroup_cancel_fork(struct task_struct *p, extern void cgroup_post_fork(struct task_struct *p, void *old_ss_priv[CGROUP_CANFORK_COUNT]); void cgroup_exit(struct task_struct *p); +void cgroup_free(struct task_struct *p); int cgroup_init_early(void); int cgroup_init(void); @@ -547,6 +548,7 @@ static inline void cgroup_cancel_fork(struct task_struct *p, static inline void cgroup_post_fork(struct task_struct *p, void *ss_priv[CGROUP_CANFORK_COUNT]) {} static inline void cgroup_exit(struct task_struct *p) {} +static inline void cgroup_free(struct task_struct *p) {} static inline int cgroup_init_early(void) { return 0; } static inline int cgroup_init(void) { return 0; } -- cgit v1.2.3 From afcf6c8b75444382e0f9996157207ebae34a8848 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 15 Oct 2015 16:41:53 -0400 Subject: cgroup: add cgroup_subsys->free() method and use it to fix pids controller pids controller is completely broken in that it uncharges when a task exits allowing zombies to escape resource control. With the recent updates, cgroup core now maintains cgroup association till task free and pids controller can be fixed by uncharging on free instead of exit. This patch adds cgroup_subsys->free() method and update pids controller to use it instead of ->exit() for uncharging. Signed-off-by: Tejun Heo Cc: Aleksa Sarai --- include/linux/cgroup-defs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 6a1ab64ee5f9..60d44b26276d 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -436,6 +436,7 @@ struct cgroup_subsys { void (*cancel_fork)(struct task_struct *task, void *priv); void (*fork)(struct task_struct *task, void *priv); void (*exit)(struct task_struct *task); + void (*free)(struct task_struct *task); void (*bind)(struct cgroup_subsys_state *root_css); int early_init; -- cgit v1.2.3 From 11eecf910bd81d36425020743b2df73651c5b466 Mon Sep 17 00:00:00 2001 From: Chanwoo Choi Date: Sat, 3 Oct 2015 14:15:13 +0900 Subject: extcon: Modify the id and name of external connector This patch modifies the id and name of external connector with the additional prefix to clarify both attribute and meaning of external connector as following: - EXTCON_CHG_* mean the charger connector. - EXTCON_JACK_* mean the jack connector. - EXTCON_DISP_* mean the display port connector. Following table show the new name of external connector with old name: -------------------------------------------------- Old extcon name | New extcon name | -------------------------------------------------- EXTCON_TA | EXTCON_CHG_USB_DCP | EXTCON_CHARGE_DOWNSTREAM| EXTCON_CHG_USB_CDP | EXTCON_FAST_CHARGER | EXTCON_CHG_USB_FAST | EXTCON_SLOW_CHARGER | EXTCON_CHG_USB_SLOW | -------------------------------------------------- EXTCON_MICROPHONE | EXTCON_JACK_MICROPHONE | EXTCON_HEADPHONE | EXTCON_JACK_HEADPHONE | EXTCON_LINE_IN | EXTCON_JACK_LINE_IN | EXTCON_LINE_OUT | EXTCON_JACK_LINE_OUT | EXTCON_VIDEO_IN | EXTCON_JACK_VIDEO_IN | EXTCON_VIDEO_OUT | EXTCON_JACK_VIDEO_OUT | EXTCON_SPDIF_IN | EXTCON_JACK_SPDIF_IN | EXTCON_SPDIF_OUT | EXTCON_JACK_SPDIF_OUT | -------------------------------------------------- EXTCON_HMDI | EXTCON_DISP_HDMI | EXTCON_MHL | EXTCON_DISP_MHL | EXTCON_DVI | EXTCON_DISP_DVI | EXTCON_VGA | EXTCON_DISP_VGA | -------------------------------------------------- And, when altering the name of USB charger connector, EXTCON refers to the "Battery Charging v1.2 Spec and Adopters Agreement"[1] to use the standard name of USB charging port as following. Following name of USB charging port are already used in power_supply subsystem. We chan check it on patch[2]. - EXTCON_CHG_USB_SDP /* Standard Downstream Port */ - EXTCON_CHG_USB_DCP /* Dedicated Charging Port */ - EXTCON_CHG_USB_CDP /* Charging Downstream Port */ - EXTCON_CHG_USB_ACA /* Accessory Charger Adapter */ [1] www.usb.org/developers/docs/devclass_docs/BCv1.2_070312.zip [2] commit 85efc8a18ced ("power_supply: Add types for USB chargers") Signed-off-by: Chanwoo Choi [ckeepax: For the Arizona changes] Acked-by: Charles Keepax Reviewed-by: Roger Quadros --- include/linux/extcon.h | 62 +++++++++++++++++++++++++++++--------------------- 1 file changed, 36 insertions(+), 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/extcon.h b/include/linux/extcon.h index c0f8c4fc5d45..7abf674c388c 100644 --- a/include/linux/extcon.h +++ b/include/linux/extcon.h @@ -31,32 +31,42 @@ /* * Define the unique id of supported external connectors */ -#define EXTCON_NONE 0 - -#define EXTCON_USB 1 /* USB connector */ -#define EXTCON_USB_HOST 2 - -#define EXTCON_TA 3 /* Charger connector */ -#define EXTCON_FAST_CHARGER 4 -#define EXTCON_SLOW_CHARGER 5 -#define EXTCON_CHARGE_DOWNSTREAM 6 - -#define EXTCON_LINE_IN 7 /* Audio/Video connector */ -#define EXTCON_LINE_OUT 8 -#define EXTCON_MICROPHONE 9 -#define EXTCON_HEADPHONE 10 -#define EXTCON_HDMI 11 -#define EXTCON_MHL 12 -#define EXTCON_DVI 13 -#define EXTCON_VGA 14 -#define EXTCON_SPDIF_IN 15 -#define EXTCON_SPDIF_OUT 16 -#define EXTCON_VIDEO_IN 17 -#define EXTCON_VIDEO_OUT 18 - -#define EXTCON_DOCK 19 /* Misc connector */ -#define EXTCON_JIG 20 -#define EXTCON_MECHANICAL 21 +#define EXTCON_NONE 0 + +/* USB external connector */ +#define EXTCON_USB 1 +#define EXTCON_USB_HOST 2 + +/* Charging external connector */ +#define EXTCON_CHG_USB_SDP 5 /* Standard Downstream Port */ +#define EXTCON_CHG_USB_DCP 6 /* Dedicated Charging Port */ +#define EXTCON_CHG_USB_CDP 7 /* Charging Downstream Port */ +#define EXTCON_CHG_USB_ACA 8 /* Accessory Charger Adapter */ +#define EXTCON_CHG_USB_FAST 9 +#define EXTCON_CHG_USB_SLOW 10 + +/* Jack external connector */ +#define EXTCON_JACK_MICROPHONE 20 +#define EXTCON_JACK_HEADPHONE 21 +#define EXTCON_JACK_LINE_IN 22 +#define EXTCON_JACK_LINE_OUT 23 +#define EXTCON_JACK_VIDEO_IN 24 +#define EXTCON_JACK_VIDEO_OUT 25 +#define EXTCON_JACK_SPDIF_IN 26 /* Sony Philips Digital InterFace */ +#define EXTCON_JACK_SPDIF_OUT 27 + +/* Display external connector */ +#define EXTCON_DISP_HDMI 40 /* High-Definition Multimedia Interface */ +#define EXTCON_DISP_MHL 41 /* Mobile High-Definition Link */ +#define EXTCON_DISP_DVI 42 /* Digital Visual Interface */ +#define EXTCON_DISP_VGA 43 /* Video Graphics Array */ + +/* Miscellaneous external connector */ +#define EXTCON_DOCK 60 +#define EXTCON_JIG 61 +#define EXTCON_MECHANICAL 62 + +#define EXTCON_NUM 63 struct extcon_cable; -- cgit v1.2.3 From ba1aefcd6db5536d3eb3ca3ce7bd6786960140ea Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:46 +0300 Subject: kvm/eventfd: factor out kvm_notify_acked_gsi() Factor out kvm_notify_acked_gsi() helper to iterate over EOI listeners and notify those matching the given gsi. It will be reused in the upcoming Hyper-V SynIC implementation. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 9596a2f0977b..b66861c297c4 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -829,6 +829,7 @@ int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level) int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); +void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_register_irq_ack_notifier(struct kvm *kvm, struct kvm_irq_ack_notifier *kian); -- cgit v1.2.3 From c9a5eccac1abf50649949f15754a7635f263a1ff Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:47 +0300 Subject: kvm/eventfd: add arch-specific set_irq Allow for arch-specific interrupt types to be set. For that, add kvm_arch_set_irq() which takes interrupt type-specific action if it recognizes the interrupt type given, and -EWOULDBLOCK otherwise. The default implementation always returns -EWOULDBLOCK. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index b66861c297c4..eba9caebc9c1 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -828,6 +828,10 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); + +int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, + int irq_source_id, int level, bool line_status); + bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); -- cgit v1.2.3 From 8db02d8b4089fa8098a170738e8ae7939aa8ae7a Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 8 Oct 2015 15:10:48 -0700 Subject: of/irq: Add new function of_msi_map_rid() The device tree property "msi-map" specifies how to create the PCI requester id used in some MSI controllers. Add a new function of_msi_map_rid() that finds the msi-map property and applies its translation to a given requester id. Reviewed-by: Marc Zyngier Acked-by: Rob Herring Signed-off-by: David Daney Signed-off-by: Marc Zyngier --- include/linux/of_irq.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 4bcbd586a672..8cd9334e5731 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -75,6 +75,7 @@ static inline int of_irq_to_resource_table(struct device_node *dev, extern unsigned int irq_of_parse_and_map(struct device_node *node, int index); extern struct device_node *of_irq_find_parent(struct device_node *child); extern void of_msi_configure(struct device *dev, struct device_node *np); +u32 of_msi_map_rid(struct device *dev, struct device_node *msi_np, u32 rid_in); #else /* !CONFIG_OF */ static inline unsigned int irq_of_parse_and_map(struct device_node *dev, @@ -87,6 +88,12 @@ static inline void *of_irq_find_parent(struct device_node *child) { return NULL; } + +static inline u32 of_msi_map_rid(struct device *dev, + struct device_node *msi_np, u32 rid_in) +{ + return rid_in; +} #endif /* !CONFIG_OF */ #endif /* __OF_IRQ_H */ -- cgit v1.2.3 From b6eec9b717d4dcb39ef024b8a3b619a32468b01e Mon Sep 17 00:00:00 2001 From: David Daney Date: Thu, 8 Oct 2015 15:10:49 -0700 Subject: PCI/MSI: Add helper function pci_msi_domain_get_msi_rid(). Add pci_msi_domain_get_msi_rid() to return the MSI requester id (RID). Initially needed by gic-v3 based systems. It will be used by follow on patch to drivers/irqchip/irq-gic-v3-its-pci-msi.c Initially supports mapping the RID via OF device tree. In the future, this could be extended to use ACPI _IORT tables as well. Reviewed-by: Marc Zyngier Acked-by: Bjorn Helgaas Signed-off-by: David Daney Signed-off-by: Marc Zyngier --- include/linux/msi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 32a24b9a9556..87723751054d 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -294,6 +294,7 @@ irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev, struct msi_desc *desc); int pci_msi_domain_check_cap(struct irq_domain *domain, struct msi_domain_info *info, struct device *dev); +u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ #endif /* LINUX_MSI_H */ -- cgit v1.2.3 From 48ae34fb39b0c0cfc76275e844fba5b0b04fa49e Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 18 Sep 2015 14:07:40 +0100 Subject: of/irq: Add support code for multi-parent version of "msi-parent" Since 126b16e2ad98 ("Docs: dt: add generic MSI bindings"), the definition of "msi-parent" has evolved, while maintaining some degree of compatibility. It can now express multiple MSI controllers as parents, as well as some sideband data being communicated to the controller. This patch adds the parsing of the property, iterating over the multiple parents until a suitable irqdomain is found. It can also fallback to the original parsing if the old binding is detected. This support code gets used in the subsequent patches. Suggested-by: Robin Murphy Acked-by: Rob Herring Signed-off-by: Marc Zyngier --- include/linux/of_irq.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 8cd9334e5731..62ae6edecfd5 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -46,6 +46,9 @@ extern int of_irq_get(struct device_node *dev, int index); extern int of_irq_get_byname(struct device_node *dev, const char *name); extern int of_irq_to_resource_table(struct device_node *dev, struct resource *res, int nr_irqs); +extern struct irq_domain *of_msi_get_domain(struct device *dev, + struct device_node *np, + enum irq_domain_bus_token token); #else static inline int of_irq_count(struct device_node *dev) { @@ -64,6 +67,12 @@ static inline int of_irq_to_resource_table(struct device_node *dev, { return 0; } +static inline struct irq_domain *of_msi_get_domain(struct device *dev, + struct device_node *np, + enum irq_domain_bus_token token) +{ + return NULL; +} #endif #if defined(CONFIG_OF) -- cgit v1.2.3 From 82b9b4243c6d99d9e38087fa89183aa7479185e9 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Oct 2015 14:38:55 +0100 Subject: of/irq: Use the msi-map property to provide device-specific MSI domain While msi-parent is used to point to the MSI controller that works for all the devices behind a root complex, it doesn't allow configurations where each individual device can be routed to a separate MSI controller. The msi-map property provides this flexibility (and much more), so let's add a utility function that parses it, and return the corresponding MSI domain. Acked-by: Rob Herring Signed-off-by: Marc Zyngier --- include/linux/of_irq.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_irq.h b/include/linux/of_irq.h index 62ae6edecfd5..65d969246a4d 100644 --- a/include/linux/of_irq.h +++ b/include/linux/of_irq.h @@ -49,6 +49,8 @@ extern int of_irq_to_resource_table(struct device_node *dev, extern struct irq_domain *of_msi_get_domain(struct device *dev, struct device_node *np, enum irq_domain_bus_token token); +extern struct irq_domain *of_msi_map_get_device_domain(struct device *dev, + u32 rid); #else static inline int of_irq_count(struct device_node *dev) { @@ -73,6 +75,11 @@ static inline struct irq_domain *of_msi_get_domain(struct device *dev, { return NULL; } +static inline struct irq_domain *of_msi_map_get_device_domain(struct device *dev, + u32 rid) +{ + return NULL; +} #endif #if defined(CONFIG_OF) -- cgit v1.2.3 From 54fa97eeb9e22b47d68b67ee00987afa7fbc2178 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Fri, 2 Oct 2015 14:43:06 +0100 Subject: PCI/MSI: Allow the MSI domain to be device-specific So far, we've always considered that for a given PCI device, its MSI controller was either set by the architecture-specific pcibios hook, or simply inherited from the host bridge. This doesn't cover things like firmware-defined topologies like msi-map (DT) or IORT (ACPI), which can provide information about which MSI controller to use on a per-device basis. This patch adds the necessary hook into the MSI code to allow this feature, and provides the msi-map functionnality as a first implementation. Acked-by: Rob Herring Acked-by: Bjorn Helgaas Signed-off-by: Marc Zyngier --- include/linux/msi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/msi.h b/include/linux/msi.h index 87723751054d..0b4460374020 100644 --- a/include/linux/msi.h +++ b/include/linux/msi.h @@ -295,6 +295,12 @@ irq_hw_number_t pci_msi_domain_calc_hwirq(struct pci_dev *dev, int pci_msi_domain_check_cap(struct irq_domain *domain, struct msi_domain_info *info, struct device *dev); u32 pci_msi_domain_get_msi_rid(struct irq_domain *domain, struct pci_dev *pdev); +struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev); +#else +static inline struct irq_domain *pci_msi_get_device_domain(struct pci_dev *pdev) +{ + return NULL; +} #endif /* CONFIG_PCI_MSI_IRQ_DOMAIN */ #endif /* LINUX_MSI_H */ -- cgit v1.2.3 From 203d027de4d7068c607b60d4310a1599dec8839f Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 28 Aug 2015 11:56:26 +0200 Subject: vga_switcheroo: Use enum vga_switcheroo_state instead of int Signed-off-by: Lukas Wunner Reviewed-by: Jani Nikula Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter --- include/linux/vga_switcheroo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 376499197717..e63661757505 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -138,7 +138,7 @@ void vga_switcheroo_unregister_handler(void); int vga_switcheroo_process_delayed_switch(void); -int vga_switcheroo_get_client_state(struct pci_dev *dev); +enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev); void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic); @@ -157,7 +157,7 @@ static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, int id) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } -static inline int vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } +static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } static inline void vga_switcheroo_set_dynamic_switch(struct pci_dev *pdev, enum vga_switcheroo_state dynamic) {} -- cgit v1.2.3 From 21c5ba8c1ee02f204e556c26703cebaf9c4019e0 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 28 Aug 2015 13:30:32 +0200 Subject: vga_switcheroo: Use VGA_SWITCHEROO_UNKNOWN_ID instead of -1 Signed-off-by: Lukas Wunner Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter --- include/linux/vga_switcheroo.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index e63661757505..88909a865b72 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -59,6 +59,9 @@ enum vga_switcheroo_state { /** * enum vga_switcheroo_client_id - client identifier + * @VGA_SWITCHEROO_UNKNOWN_ID: initial identifier assigned to vga clients. + * Determining the id requires the handler, so GPUs are given their + * true id in a delayed fashion in vga_switcheroo_enable() * @VGA_SWITCHEROO_IGD: integrated graphics device * @VGA_SWITCHEROO_DIS: discrete graphics device * @VGA_SWITCHEROO_MAX_CLIENTS: currently no more than two GPUs are supported @@ -66,6 +69,7 @@ enum vga_switcheroo_state { * Client identifier. Audio clients use the same identifier & 0x100. */ enum vga_switcheroo_client_id { + VGA_SWITCHEROO_UNKNOWN_ID = -1, VGA_SWITCHEROO_IGD, VGA_SWITCHEROO_DIS, VGA_SWITCHEROO_MAX_CLIENTS, -- cgit v1.2.3 From fa3e967fffaf267ccab7959429722da34e45ad77 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Fri, 28 Aug 2015 12:54:07 +0200 Subject: vga_switcheroo: Use enum vga_switcheroo_client_id instead of int Signed-off-by: Lukas Wunner Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter --- include/linux/vga_switcheroo.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 88909a865b72..c55751155631 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -100,7 +100,7 @@ struct vga_switcheroo_handler { int (*switchto)(enum vga_switcheroo_client_id id); int (*power_state)(enum vga_switcheroo_client_id id, enum vga_switcheroo_state state); - int (*get_client_id)(struct pci_dev *pdev); + enum vga_switcheroo_client_id (*get_client_id)(struct pci_dev *pdev); }; /** @@ -132,7 +132,7 @@ int vga_switcheroo_register_client(struct pci_dev *dev, bool driver_power_control); int vga_switcheroo_register_audio_client(struct pci_dev *pdev, const struct vga_switcheroo_client_ops *ops, - int id); + enum vga_switcheroo_client_id id); void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info); @@ -158,7 +158,7 @@ static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_i static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; } static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, const struct vga_switcheroo_client_ops *ops, - int id) { return 0; } + enum vga_switcheroo_client_id id) { return 0; } static inline void vga_switcheroo_unregister_handler(void) {} static inline int vga_switcheroo_process_delayed_switch(void) { return 0; } static inline enum vga_switcheroo_state vga_switcheroo_get_client_state(struct pci_dev *dev) { return VGA_SWITCHEROO_ON; } -- cgit v1.2.3 From 573c7ba006edbecff0714db651dd3602b9d0a6a0 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Fri, 16 Oct 2015 14:01:22 +0200 Subject: net: introduce pre-change upper device notifier This newly introduced netdevice notifier is called before actual change upper happens. That provides a possibility for notifier handlers to know upper change will happen and react to it, including possibility to forbid the change. That is valuable for drivers which can check if the upper device linkage is supported and forbid that in case it is not. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index b3374402c1ea..69fdd427c8cb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2106,6 +2106,7 @@ struct pcpu_sw_netstats { #define NETDEV_PRECHANGEMTU 0x0017 /* notify before mtu change happened */ #define NETDEV_CHANGEINFODATA 0x0018 #define NETDEV_BONDING_INFO 0x0019 +#define NETDEV_PRECHANGEUPPER 0x001A int register_netdevice_notifier(struct notifier_block *nb); int unregister_netdevice_notifier(struct notifier_block *nb); -- cgit v1.2.3 From 2ffbceb2b08f8ca0496c54a9ebcd11d25275954e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 13 Oct 2015 14:33:26 +0200 Subject: netfilter: remove hook owner refcounting since commit 8405a8fff3f8 ("netfilter: nf_qeueue: Drop queue entries on nf_unregister_hook") all pending queued entries are discarded. So we can simply remove all of the owner handling -- when module is removed it also needs to unregister all its hooks. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index edb3dc32f1da..ef11e1d77699 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -90,7 +90,6 @@ struct nf_hook_ops { /* User fills in from here down. */ nf_hookfn *hook; struct net_device *dev; - struct module *owner; void *priv; u_int8_t pf; unsigned int hooknum; -- cgit v1.2.3 From 008027c31d57a22bd80dda5acc95b037634eee0f Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Fri, 9 Oct 2015 20:45:42 +0200 Subject: netfilter: turn NF_HOOK into an inline function A recent change to the dst_output handling caused a new warning when the call to NF_HOOK() is the only used of a local variable passed as 'dev', and CONFIG_NETFILTER is disabled: net/ipv6/ip6_output.c: In function 'ip6_output': net/ipv6/ip6_output.c:135:21: warning: unused variable 'dev' [-Wunused-variable] The reason for this is that the NF_HOOK macro in this case does not reference the variable at all, and the call to dev_net(dev) got removed from the ip6_output function. To avoid that warning now and in the future, this changes the macro into an equivalent inline function, which tells the compiler that the variable is passed correctly but still unused. The dn_forward function apparently had the same problem in the past and added a local workaround that no longer works with the inline function. In order to avoid a regression, we have to also remove the #ifdef from decnet in the same patch. Fixes: ede2059dbaf9 ("dst: Pass net into dst->output") Signed-off-by: Arnd Bergmann Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter.h | 19 +++++++++++++++++-- 1 file changed, 17 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter.h b/include/linux/netfilter.h index ef11e1d77699..0ad556726181 100644 --- a/include/linux/netfilter.h +++ b/include/linux/netfilter.h @@ -346,8 +346,23 @@ nf_nat_decode_session(struct sk_buff *skb, struct flowi *fl, u_int8_t family) } #else /* !CONFIG_NETFILTER */ -#define NF_HOOK(pf, hook, net, sk, skb, indev, outdev, okfn) (okfn)(net, sk, skb) -#define NF_HOOK_COND(pf, hook, net, sk, skb, indev, outdev, okfn, cond) (okfn)(net, sk, skb) +static inline int +NF_HOOK_COND(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, + struct sk_buff *skb, struct net_device *in, struct net_device *out, + int (*okfn)(struct net *, struct sock *, struct sk_buff *), + bool cond) +{ + return okfn(net, sk, skb); +} + +static inline int +NF_HOOK(uint8_t pf, unsigned int hook, struct net *net, struct sock *sk, + struct sk_buff *skb, struct net_device *in, struct net_device *out, + int (*okfn)(struct net *, struct sock *, struct sk_buff *)) +{ + return okfn(net, sk, skb); +} + static inline int nf_hook(u_int8_t pf, unsigned int hook, struct net *net, struct sock *sk, struct sk_buff *skb, struct net_device *indev, struct net_device *outdev, -- cgit v1.2.3 From 5abe4f223ed6b820443d3657bd48600692f61c12 Mon Sep 17 00:00:00 2001 From: Sascha Hauer Date: Tue, 13 Oct 2015 12:45:26 +0200 Subject: regulator: introduce min_dropout_uV Many voltage Regulators need a input voltage that is higher than the output voltage. Allow to specify a minimum dropout voltage which will be used later to find the best input voltage for regulators. [Changed uv to uV for consistency and legibility -- broonie] Signed-off-by: Sascha Hauer Signed-off-by: Mark Brown --- include/linux/regulator/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/regulator/driver.h b/include/linux/regulator/driver.h index 45932228cbf5..9c2903e58adb 100644 --- a/include/linux/regulator/driver.h +++ b/include/linux/regulator/driver.h @@ -245,6 +245,7 @@ enum regulator_type { * @linear_min_sel: Minimal selector for starting linear mapping * @fixed_uV: Fixed voltage of rails. * @ramp_delay: Time to settle down after voltage change (unit: uV/us) + * @min_dropout_uV: The minimum dropout voltage this regulator can handle * @linear_ranges: A constant table of possible voltage ranges. * @n_linear_ranges: Number of entries in the @linear_ranges table. * @volt_table: Voltage mapping table (if table based mapping) @@ -292,6 +293,7 @@ struct regulator_desc { unsigned int linear_min_sel; int fixed_uV; unsigned int ramp_delay; + int min_dropout_uV; const struct regulator_linear_range *linear_ranges; int n_linear_ranges; -- cgit v1.2.3 From b76281cb97761002277730432812b1687de96062 Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Fri, 16 Oct 2015 14:35:21 +0200 Subject: clk: Make clk input parameter of __clk_get_name() const When calling __clk_get_name() on a const clock: warning: passing argument 1 of '__clk_get_name' discards 'const' qualifier from pointer target type include/linux/clk-provider.h:613:13: note: expected 'struct clk *' but argument is of type 'const struct clk *' __clk_get_name() does not modify the passed clock, hence make it const. Signed-off-by: Geert Uytterhoeven Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 8ff43eb4b311..bbb8fed11e44 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -607,7 +607,7 @@ void clk_unregister(struct clk *clk); void devm_clk_unregister(struct device *dev, struct clk *clk); /* helper functions */ -const char *__clk_get_name(struct clk *clk); +const char *__clk_get_name(const struct clk *clk); const char *clk_hw_get_name(const struct clk_hw *hw); struct clk_hw *__clk_get_hw(struct clk *clk); unsigned int clk_hw_get_num_parents(const struct clk_hw *hw); -- cgit v1.2.3 From c771c2f484857f3b1fc81d180485e96b7cb67c17 Mon Sep 17 00:00:00 2001 From: Jonas Gorski Date: Sun, 11 Oct 2015 17:34:15 +0200 Subject: gpiolib: provide generic request/free implementations Provide generic request/free implementations that pinctrl aware gpio drivers can use instead of open coding if they use a 1:1 pin to gpio signal mapping. Signed-off-by: Jonas Gorski Signed-off-by: Linus Walleij --- include/linux/gpio/driver.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/gpio/driver.h b/include/linux/gpio/driver.h index 1aed31c5ffba..d1baebf350d8 100644 --- a/include/linux/gpio/driver.h +++ b/include/linux/gpio/driver.h @@ -206,6 +206,9 @@ int _gpiochip_irqchip_add(struct gpio_chip *gpiochip, #endif /* CONFIG_GPIOLIB_IRQCHIP */ +int gpiochip_generic_request(struct gpio_chip *chip, unsigned offset); +void gpiochip_generic_free(struct gpio_chip *chip, unsigned offset); + #ifdef CONFIG_PINCTRL /** -- cgit v1.2.3 From 91236ecc74a25431138f71b6d52e130cd0f774b3 Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 14 Oct 2015 14:29:36 +0800 Subject: ACPI/PCI: Enhance ACPI core to support sparse IO space Enhance ACPI resource parsing interfaces to support sparse IO space, which will be used to share common code between x86 and IA64 later. Tested-by: Tony Luck Signed-off-by: Jiang Liu Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/ioport.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ioport.h b/include/linux/ioport.h index 388e3ae94f7a..24bea087e7af 100644 --- a/include/linux/ioport.h +++ b/include/linux/ioport.h @@ -94,6 +94,7 @@ struct resource { /* PnP I/O specific bits (IORESOURCE_BITS) */ #define IORESOURCE_IO_16BIT_ADDR (1<<0) #define IORESOURCE_IO_FIXED (1<<1) +#define IORESOURCE_IO_SPARSE (1<<2) /* PCI ROM control bits (IORESOURCE_BITS) */ #define IORESOURCE_ROM_ENABLE (1<<0) /* ROM is enabled, same as PCI_ROM_ADDRESS_ENABLE */ -- cgit v1.2.3 From 2c204383a2922cd6b79b9d78680a049a2144fbcc Mon Sep 17 00:00:00 2001 From: Jiang Liu Date: Wed, 14 Oct 2015 14:29:39 +0800 Subject: PCI/ACPI: Add interface acpi_pci_root_create() Introduce common interface acpi_pci_root_create() and related data structures to create PCI root bus for ACPI PCI host bridges. It will be used to kill duplicated arch specific code for IA64 and x86. It may also help ARM64 in future. Reviewed-by: Lorenzo Pieralisi Tested-by: Tony Luck Signed-off-by: Jiang Liu Acked-by: Bjorn Helgaas Signed-off-by: Rafael J. Wysocki --- include/linux/pci-acpi.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pci-acpi.h b/include/linux/pci-acpi.h index a965efa52152..89ab0572dbc6 100644 --- a/include/linux/pci-acpi.h +++ b/include/linux/pci-acpi.h @@ -52,6 +52,30 @@ static inline acpi_handle acpi_pci_get_bridge_handle(struct pci_bus *pbus) return ACPI_HANDLE(dev); } +struct acpi_pci_root; +struct acpi_pci_root_ops; + +struct acpi_pci_root_info { + struct acpi_pci_root *root; + struct acpi_device *bridge; + struct acpi_pci_root_ops *ops; + struct list_head resources; + char name[16]; +}; + +struct acpi_pci_root_ops { + struct pci_ops *pci_ops; + int (*init_info)(struct acpi_pci_root_info *info); + void (*release_info)(struct acpi_pci_root_info *info); + int (*prepare_resources)(struct acpi_pci_root_info *info); +}; + +extern int acpi_pci_probe_root_resources(struct acpi_pci_root_info *info); +extern struct pci_bus *acpi_pci_root_create(struct acpi_pci_root *root, + struct acpi_pci_root_ops *ops, + struct acpi_pci_root_info *info, + void *sd); + void acpi_pci_add_bus(struct pci_bus *bus); void acpi_pci_remove_bus(struct pci_bus *bus); -- cgit v1.2.3 From 90b665f627b18822a7bbebeff44ce730ccf74275 Mon Sep 17 00:00:00 2001 From: Laurent Pinchart Date: Tue, 13 Oct 2015 00:20:21 +0300 Subject: gpiolib: Add and use OF_GPIO_SINGLE_ENDED flag The flag matches the DT GPIO_SINGLE_ENDED flag and allows drivers to parse and use the DT flag to handle single-ended (open-drain or open-source) GPIOs. Signed-off-by: Laurent Pinchart Signed-off-by: Linus Walleij --- include/linux/of_gpio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/of_gpio.h b/include/linux/of_gpio.h index f3191828f037..87d6d1632dd4 100644 --- a/include/linux/of_gpio.h +++ b/include/linux/of_gpio.h @@ -29,6 +29,7 @@ struct device_node; */ enum of_gpio_flags { OF_GPIO_ACTIVE_LOW = 0x1, + OF_GPIO_SINGLE_ENDED = 0x2, }; #ifdef CONFIG_OF_GPIO -- cgit v1.2.3 From 3a341a4c30d427fd05617087db1564a595f65093 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Tue, 13 Oct 2015 23:39:50 -0700 Subject: Input: rotary-encoder - add support for quarter-period mode MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Some encoders have both outputs low in stable states, others also have a stable state with both outputs high (half-period mode) and some have a stable state in all steps (quarter-period mode). The driver used to support the former states and with this change it can also support the later. This commit also deprecates the 'half-period' property and introduces a new property 'steps-per-period'. This property specifies the number of steps (stable states) produced by the rotary encoder for each GPIO period. Signed-off-by: Guido Martínez Signed-off-by: Ezequiel Garcia Acked-by: Rob Herring Signed-off-by: Dmitry Torokhov --- include/linux/rotary_encoder.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rotary_encoder.h b/include/linux/rotary_encoder.h index b33f2d2a708f..fe3dc64e5aeb 100644 --- a/include/linux/rotary_encoder.h +++ b/include/linux/rotary_encoder.h @@ -8,9 +8,9 @@ struct rotary_encoder_platform_data { unsigned int gpio_b; unsigned int inverted_a; unsigned int inverted_b; + unsigned int steps_per_period; bool relative_axis; bool rollover; - bool half_period; bool wakeup_source; }; -- cgit v1.2.3 From 6a797d2737838906f2ea0a31686e87c3151e21ca Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 17 Oct 2015 16:16:04 -0400 Subject: ext4: call out CRC and corruption errors with specific error codes Instead of overloading EIO for CRC errors and corrupt structures, return the same error codes that XFS returns for the same issues. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index 6da6f89722e1..f2a4b07a6ce6 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1449,4 +1449,7 @@ static inline tid_t jbd2_get_latest_transaction(journal_t *journal) #endif /* __KERNEL__ */ +#define EFSBADCRC EBADMSG /* Bad CRC detected */ +#define EFSCORRUPTED EUCLEAN /* Filesystem is corrupted */ + #endif /* _LINUX_JBD2_H */ -- cgit v1.2.3 From 56316a0d28f251dae6a3bc2b6d50e7c25389871f Mon Sep 17 00:00:00 2001 From: "Darrick J. Wong" Date: Sat, 17 Oct 2015 16:18:45 -0400 Subject: jbd2: clean up feature test macros with predicate functions Create separate predicate functions to test/set/clear feature flags, thereby replacing the wordy old macros. Furthermore, clean out the places where we open-coded feature tests. Signed-off-by: Darrick J. Wong Signed-off-by: Theodore Ts'o --- include/linux/jbd2.h | 71 +++++++++++++++++++++++++++++++++++++++++++++++++--- 1 file changed, 68 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index f2a4b07a6ce6..b5ca2314cace 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -278,6 +278,7 @@ typedef struct journal_superblock_s /* 0x0400 */ } journal_superblock_t; +/* Use the jbd2_{has,set,clear}_feature_* helpers; these will be removed */ #define JBD2_HAS_COMPAT_FEATURE(j,mask) \ ((j)->j_format_version >= 2 && \ ((j)->j_superblock->s_feature_compat & cpu_to_be32((mask)))) @@ -288,7 +289,7 @@ typedef struct journal_superblock_s ((j)->j_format_version >= 2 && \ ((j)->j_superblock->s_feature_incompat & cpu_to_be32((mask)))) -#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001 +#define JBD2_FEATURE_COMPAT_CHECKSUM 0x00000001 #define JBD2_FEATURE_INCOMPAT_REVOKE 0x00000001 #define JBD2_FEATURE_INCOMPAT_64BIT 0x00000002 @@ -296,6 +297,8 @@ typedef struct journal_superblock_s #define JBD2_FEATURE_INCOMPAT_CSUM_V2 0x00000008 #define JBD2_FEATURE_INCOMPAT_CSUM_V3 0x00000010 +/* See "journal feature predicate functions" below */ + /* Features known to this kernel version: */ #define JBD2_KNOWN_COMPAT_FEATURES JBD2_FEATURE_COMPAT_CHECKSUM #define JBD2_KNOWN_ROCOMPAT_FEATURES 0 @@ -1034,6 +1037,69 @@ struct journal_s __u32 j_csum_seed; }; +/* journal feature predicate functions */ +#define JBD2_FEATURE_COMPAT_FUNCS(name, flagname) \ +static inline bool jbd2_has_feature_##name(journal_t *j) \ +{ \ + return ((j)->j_format_version >= 2 && \ + ((j)->j_superblock->s_feature_compat & \ + cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname)) != 0); \ +} \ +static inline void jbd2_set_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_compat |= \ + cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \ +} \ +static inline void jbd2_clear_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_compat &= \ + ~cpu_to_be32(JBD2_FEATURE_COMPAT_##flagname); \ +} + +#define JBD2_FEATURE_RO_COMPAT_FUNCS(name, flagname) \ +static inline bool jbd2_has_feature_##name(journal_t *j) \ +{ \ + return ((j)->j_format_version >= 2 && \ + ((j)->j_superblock->s_feature_ro_compat & \ + cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname)) != 0); \ +} \ +static inline void jbd2_set_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_ro_compat |= \ + cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \ +} \ +static inline void jbd2_clear_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_ro_compat &= \ + ~cpu_to_be32(JBD2_FEATURE_RO_COMPAT_##flagname); \ +} + +#define JBD2_FEATURE_INCOMPAT_FUNCS(name, flagname) \ +static inline bool jbd2_has_feature_##name(journal_t *j) \ +{ \ + return ((j)->j_format_version >= 2 && \ + ((j)->j_superblock->s_feature_incompat & \ + cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname)) != 0); \ +} \ +static inline void jbd2_set_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_incompat |= \ + cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \ +} \ +static inline void jbd2_clear_feature_##name(journal_t *j) \ +{ \ + (j)->j_superblock->s_feature_incompat &= \ + ~cpu_to_be32(JBD2_FEATURE_INCOMPAT_##flagname); \ +} + +JBD2_FEATURE_COMPAT_FUNCS(checksum, CHECKSUM) + +JBD2_FEATURE_INCOMPAT_FUNCS(revoke, REVOKE) +JBD2_FEATURE_INCOMPAT_FUNCS(64bit, 64BIT) +JBD2_FEATURE_INCOMPAT_FUNCS(async_commit, ASYNC_COMMIT) +JBD2_FEATURE_INCOMPAT_FUNCS(csum2, CSUM_V2) +JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) + /* * Journal flag definitions */ @@ -1340,8 +1406,7 @@ extern size_t journal_tag_bytes(journal_t *journal); static inline bool jbd2_journal_has_csum_v2or3_feature(journal_t *j) { - return JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V2) || - JBD2_HAS_INCOMPAT_FEATURE(j, JBD2_FEATURE_INCOMPAT_CSUM_V3); + return jbd2_has_feature_csum2(j) || jbd2_has_feature_csum3(j); } static inline int jbd2_journal_has_csum_v2or3(journal_t *journal) -- cgit v1.2.3 From 79c1faa4511e78380cd643dac88a775062a08bc0 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 10 Oct 2015 16:00:51 -0400 Subject: tty: Remove tty_wait_until_sent_from_close() tty_wait_until_sent_from_close() drops the tty lock while waiting for the tty driver to finish sending previously accepted data (ie., data remaining in its write buffer and transmit fifo). tty_wait_until_sent_from_close() was added by commit a57a7bf3fc7e ("TTY: define tty_wait_until_sent_from_close") to prevent the entire tty subsystem from being unable to open new ttys while waiting for one tty to close while output drained. However, since commit 0911261d4cb6 ("tty: Don't take tty_mutex for tty count changes"), holding a tty lock while closing does not prevent other ttys from being opened/closed/hung up, but only prevents lifetime event changes for the tty under lock. Holding the tty lock while waiting for output to drain does prevent parallel non-blocking opens (O_NONBLOCK) from advancing or returning while the tty lock is held. However, all parallel opens _already_ block even if the tty lock is dropped while closing and the parallel open advances. Blocking in open has been in mainline since at least 2.6.29 (see tty_port_block_til_ready(); note the test for O_NONBLOCK is _after_ the wait while ASYNC_CLOSING). IOW, before this patch a non-blocking open will sleep anyway for the _entire_ duration of a parallel hardware shutdown, and when it wakes, the error return will cause a release of its tty, and it will restart with a fresh attempt to open. Similarly with a blocking open that is already waiting; when it's woken, the hardware shutdown has already completed to ASYNC_INITIALIZED is not set, which forces a release and restart as well. So, holding the tty lock across the _entire_ close (which is what this patch does), even while waiting for output to drain, is equivalent to the current outcome wrt parallel opens. Cc: Alan Cox Cc: David Laight CC: Arnd Bergmann CC: Karsten Keil CC: linuxppc-dev@lists.ozlabs.org Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 18 ------------------ 1 file changed, 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index d072ded41678..614c8224c32f 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -656,24 +656,6 @@ extern void __lockfunc tty_unlock(struct tty_struct *tty); extern void __lockfunc tty_lock_slave(struct tty_struct *tty); extern void __lockfunc tty_unlock_slave(struct tty_struct *tty); extern void tty_set_lock_subclass(struct tty_struct *tty); -/* - * this shall be called only from where BTM is held (like close) - * - * We need this to ensure nobody waits for us to finish while we are waiting. - * Without this we were encountering system stalls. - * - * This should be indeed removed with BTM removal later. - * - * Locking: BTM required. Nobody is allowed to hold port->mutex. - */ -static inline void tty_wait_until_sent_from_close(struct tty_struct *tty, - long timeout) -{ - tty_unlock(tty); /* tty->ops->close holds the BTM, drop it while waiting */ - tty_wait_until_sent(tty, timeout); - tty_lock(tty); -} - /* * wait_event_interruptible_tty -- wait for a condition with the tty lock held * -- cgit v1.2.3 From cc2aaabfd6d6335e2156781ca67715b4de17f993 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 10 Oct 2015 16:00:54 -0400 Subject: tty: Remove tty_port::close_wait With the removal of tty_wait_until_sent_from_close(), tty drivers no longer wait during open for parallel closes to complete (instead, the tty core waits before calling the driver open() method). Thus, the close_wait waitqueue is no longer used for waiting. Remove struct tty_port::close_wait. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 614c8224c32f..090ce2a52262 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -227,7 +227,6 @@ struct tty_port { int blocked_open; /* Waiting to open */ int count; /* Usage count */ wait_queue_head_t open_wait; /* Open waiters */ - wait_queue_head_t close_wait; /* Close waiters */ wait_queue_head_t delta_msr_wait; /* Modem status change */ unsigned long flags; /* TTY flags ASY_*/ unsigned char console:1, /* port is a console */ -- cgit v1.2.3 From 9b9ab1b3f0860138681862cf6e4c48be59377ef1 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 10 Oct 2015 16:00:55 -0400 Subject: tty: r3964: Use tty->read_wait waitqueue The tty core provides read_wait waitqueue specifically for line disciplines to wait readers; otherwise, the line discipline may miss wakeups generated by the tty core. NB: The tty core already provides serialization for the line discipline's close() method, and guarantees no readers or writers will be using the closing instance of the line discipline. Completely remove that wakeup. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/n_r3964.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/n_r3964.h b/include/linux/n_r3964.h index 5d0b2a1dee69..e9adb4226224 100644 --- a/include/linux/n_r3964.h +++ b/include/linux/n_r3964.h @@ -152,9 +152,6 @@ struct r3964_info { unsigned char *rx_buf; /* ring buffer */ unsigned char *tx_buf; - wait_queue_head_t read_wait; - //struct wait_queue *read_wait; - struct r3964_block_header *rx_first; struct r3964_block_header *rx_last; struct r3964_block_header *tx_first; -- cgit v1.2.3 From aba24888d9af19e0bd1c7e7344fd27841611d7a8 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 10 Oct 2015 16:00:56 -0400 Subject: tty: r3964: Replace/remove bogus tty lock use The tty lock is strictly for serializing tty lifetime events (open/close/hangup), and not for line discipline serialization. The tty core already provides serialization of concurrent writes to the same tty, and line discipline lifetime management (by ldisc references), so pinning the tty via tty_lock() is unnecessary and counter-productive; remove tty lock use. However, the line discipline is responsible for serializing reads (if required by the line discipline); add read_lock mutex to serialize calls of r3964_read(). Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/n_r3964.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/n_r3964.h b/include/linux/n_r3964.h index e9adb4226224..90a803aa42e8 100644 --- a/include/linux/n_r3964.h +++ b/include/linux/n_r3964.h @@ -161,8 +161,9 @@ struct r3964_info { unsigned char last_rx; unsigned char bcc; unsigned int blocks_in_rx_queue; - - + + struct mutex read_lock; /* serialize r3964_read */ + struct r3964_client_info *firstClient; unsigned int state; unsigned int flags; -- cgit v1.2.3 From f148d6d7b79adb42a8e7fa95bf6be5b607015f26 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 10 Oct 2015 16:00:57 -0400 Subject: tty: Remove wait_event_interruptible_tty() In-tree users of wait_event_interruptible_tty() have been removed; remove. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 26 -------------------------- 1 file changed, 26 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 090ce2a52262..c2889f4331e1 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -655,32 +655,6 @@ extern void __lockfunc tty_unlock(struct tty_struct *tty); extern void __lockfunc tty_lock_slave(struct tty_struct *tty); extern void __lockfunc tty_unlock_slave(struct tty_struct *tty); extern void tty_set_lock_subclass(struct tty_struct *tty); -/* - * wait_event_interruptible_tty -- wait for a condition with the tty lock held - * - * The condition we are waiting for might take a long time to - * become true, or might depend on another thread taking the - * BTM. In either case, we need to drop the BTM to guarantee - * forward progress. This is a leftover from the conversion - * from the BKL and should eventually get removed as the BTM - * falls out of use. - * - * Do not use in new code. - */ -#define wait_event_interruptible_tty(tty, wq, condition) \ -({ \ - int __ret = 0; \ - if (!(condition)) \ - __ret = __wait_event_interruptible_tty(tty, wq, \ - condition); \ - __ret; \ -}) - -#define __wait_event_interruptible_tty(tty, wq, condition) \ - ___wait_event(wq, condition, TASK_INTERRUPTIBLE, 0, 0, \ - tty_unlock(tty); \ - schedule(); \ - tty_lock(tty)) #ifdef CONFIG_PROC_FS extern void proc_tty_register_driver(struct tty_driver *); -- cgit v1.2.3 From 47f82f1adf701b31d1816bf45118f8e83c02588e Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 13 Oct 2015 13:29:04 +0300 Subject: dmaengine: hsu: introduce stubs for the exported functions This allows UART drivers to register HSU DMA Engine without being forced to use ifdefs. Signed-off-by: Heikki Krogerus Acked-by: Vinod Koul Acked-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/dma/hsu.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h index 234393a6997b..765b414eef98 100644 --- a/include/linux/dma/hsu.h +++ b/include/linux/dma/hsu.h @@ -38,11 +38,21 @@ struct hsu_dma_chip { struct hsu_dma_platform_data *pdata; }; +#if IS_ENABLED(CONFIG_HSU_DMA) /* Export to the internal users */ irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, unsigned short nr); /* Export to the platform drivers */ int hsu_dma_probe(struct hsu_dma_chip *chip); int hsu_dma_remove(struct hsu_dma_chip *chip); +#else +static inline irqreturn_t hsu_dma_irq(struct hsu_dma_chip *chip, + unsigned short nr) +{ + return IRQ_NONE; +} +static inline int hsu_dma_probe(struct hsu_dma_chip *chip) { return -ENODEV; } +static inline int hsu_dma_remove(struct hsu_dma_chip *chip) { return 0; } +#endif /* CONFIG_HSU_DMA */ #endif /* _DMA_HSU_H */ -- cgit v1.2.3 From 4c97ad993d763904fc1c9e0bdc3a6dba062802a2 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Tue, 13 Oct 2015 13:29:05 +0300 Subject: dmaengine: hsu: remove platform data There are no platforms where it's not possible to calculate the number of channels based on IO space length, and since that is the only purpose for struct hsu_dma_platform_data, removing it. Suggested-by: Andy Shevchenko Signed-off-by: Heikki Krogerus Acked-by: Vinod Koul Acked-by: Andy Shevchenko Signed-off-by: Greg Kroah-Hartman --- include/linux/dma/hsu.h | 1 - include/linux/platform_data/dma-hsu.h | 4 ---- 2 files changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma/hsu.h b/include/linux/dma/hsu.h index 765b414eef98..79df69dc629c 100644 --- a/include/linux/dma/hsu.h +++ b/include/linux/dma/hsu.h @@ -35,7 +35,6 @@ struct hsu_dma_chip { unsigned int length; unsigned int offset; struct hsu_dma *hsu; - struct hsu_dma_platform_data *pdata; }; #if IS_ENABLED(CONFIG_HSU_DMA) diff --git a/include/linux/platform_data/dma-hsu.h b/include/linux/platform_data/dma-hsu.h index 8a1f6a4920b2..3453fa655502 100644 --- a/include/linux/platform_data/dma-hsu.h +++ b/include/linux/platform_data/dma-hsu.h @@ -18,8 +18,4 @@ struct hsu_dma_slave { int chan_id; }; -struct hsu_dma_platform_data { - unsigned short nr_channels; -}; - #endif /* _PLATFORM_DATA_DMA_HSU_H */ -- cgit v1.2.3 From 2812d9e9fd94c54b0482215f579e6aa04452a322 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 10 Oct 2015 20:28:42 -0400 Subject: tty: Combine SIGTTOU/SIGTTIN handling The job_control() check in n_tty_read() has nearly identical purpose and results as tty_check_change(). Both functions' purpose is to determine if the current task's pgrp is the foreground pgrp for the tty, and if not, to signal the current pgrp. Introduce __tty_check_change() which takes the signal to send and performs the shared operations for job control() and tty_check_change(). Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index c2889f4331e1..533d7f6e2481 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -423,6 +423,7 @@ extern int tty_paranoia_check(struct tty_struct *tty, struct inode *inode, const char *routine); extern const char *tty_name(const struct tty_struct *tty); extern void tty_wait_until_sent(struct tty_struct *tty, long timeout); +extern int __tty_check_change(struct tty_struct *tty, int sig); extern int tty_check_change(struct tty_struct *tty); extern void __stop_tty(struct tty_struct *tty); extern void stop_tty(struct tty_struct *tty); -- cgit v1.2.3 From e176058f0de53c2346734e5254835e0045364001 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sat, 17 Oct 2015 16:36:23 -0400 Subject: tty: Abstract tty buffer work Introduce API functions to restart and cancel tty buffer work, rather than manipulate buffer work directly. Signed-off-by: Peter Hurley Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 533d7f6e2481..5b04b0a5375b 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -467,6 +467,8 @@ extern void tty_buffer_free_all(struct tty_port *port); extern void tty_buffer_flush(struct tty_struct *tty, struct tty_ldisc *ld); extern void tty_buffer_init(struct tty_port *port); extern void tty_buffer_set_lock_subclass(struct tty_port *port); +extern bool tty_buffer_restart_work(struct tty_port *port); +extern bool tty_buffer_cancel_work(struct tty_port *port); extern speed_t tty_termios_baud_rate(struct ktermios *termios); extern speed_t tty_termios_input_baud_rate(struct ktermios *termios); extern void tty_termios_encode_baud_rate(struct ktermios *termios, -- cgit v1.2.3 From 52d63671c15bd3dfcd72f4ea324b338d1309db97 Mon Sep 17 00:00:00 2001 From: Gabriel Laskar Date: Tue, 6 Oct 2015 16:27:36 +0200 Subject: msm: remove unused header Signed-off-by: Gabriel Laskar Acked-by: Arnd Bergmann Signed-off-by: Greg Kroah-Hartman --- include/linux/msm_mdp.h | 79 ------------------------------------------------- 1 file changed, 79 deletions(-) delete mode 100644 include/linux/msm_mdp.h (limited to 'include/linux') diff --git a/include/linux/msm_mdp.h b/include/linux/msm_mdp.h deleted file mode 100644 index fe722c1fb61d..000000000000 --- a/include/linux/msm_mdp.h +++ /dev/null @@ -1,79 +0,0 @@ -/* include/linux/msm_mdp.h - * - * Copyright (C) 2007 Google Incorporated - * - * This software is licensed under the terms of the GNU General Public - * License version 2, as published by the Free Software Foundation, and - * may be copied, distributed, and modified under those terms. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ -#ifndef _MSM_MDP_H_ -#define _MSM_MDP_H_ - -#include - -#define MSMFB_IOCTL_MAGIC 'm' -#define MSMFB_GRP_DISP _IOW(MSMFB_IOCTL_MAGIC, 1, unsigned int) -#define MSMFB_BLIT _IOW(MSMFB_IOCTL_MAGIC, 2, unsigned int) - -enum { - MDP_RGB_565, /* RGB 565 planar */ - MDP_XRGB_8888, /* RGB 888 padded */ - MDP_Y_CBCR_H2V2, /* Y and CbCr, pseudo planar w/ Cb is in MSB */ - MDP_ARGB_8888, /* ARGB 888 */ - MDP_RGB_888, /* RGB 888 planar */ - MDP_Y_CRCB_H2V2, /* Y and CrCb, pseudo planar w/ Cr is in MSB */ - MDP_YCRYCB_H2V1, /* YCrYCb interleave */ - MDP_Y_CRCB_H2V1, /* Y and CrCb, pseduo planar w/ Cr is in MSB */ - MDP_Y_CBCR_H2V1, /* Y and CrCb, pseduo planar w/ Cr is in MSB */ - MDP_RGBA_8888, /* ARGB 888 */ - MDP_BGRA_8888, /* ABGR 888 */ - MDP_RGBX_8888, /* RGBX 888 */ - MDP_IMGTYPE_LIMIT /* Non valid image type after this enum */ -}; - -enum { - PMEM_IMG, - FB_IMG, -}; - -/* flag values */ -#define MDP_ROT_NOP 0 -#define MDP_FLIP_LR 0x1 -#define MDP_FLIP_UD 0x2 -#define MDP_ROT_90 0x4 -#define MDP_ROT_180 (MDP_FLIP_UD|MDP_FLIP_LR) -#define MDP_ROT_270 (MDP_ROT_90|MDP_FLIP_UD|MDP_FLIP_LR) -#define MDP_DITHER 0x8 -#define MDP_BLUR 0x10 - -#define MDP_TRANSP_NOP 0xffffffff -#define MDP_ALPHA_NOP 0xff - -struct mdp_rect { - u32 x, y, w, h; -}; - -struct mdp_img { - u32 width, height, format, offset; - int memory_id; /* the file descriptor */ -}; - -struct mdp_blit_req { - struct mdp_img src; - struct mdp_img dst; - struct mdp_rect src_rect; - struct mdp_rect dst_rect; - u32 alpha, transp_mask, flags; -}; - -struct mdp_blit_req_list { - u32 count; - struct mdp_blit_req req[]; -}; - -#endif /* _MSM_MDP_H_ */ -- cgit v1.2.3 From ad1bfe410e91189522514ea784668dc75a4e64c4 Mon Sep 17 00:00:00 2001 From: Dmitry Kalinkin Date: Sun, 11 Oct 2015 01:00:58 +0300 Subject: vme: 8-bit status/id takes 256 values, not 255 Fixes an off by one array size. Signed-off-by: Dmitry Kalinkin Signed-off-by: Greg Kroah-Hartman --- include/linux/vme.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vme.h b/include/linux/vme.h index c0131358f351..71e4a6dec5ac 100644 --- a/include/linux/vme.h +++ b/include/linux/vme.h @@ -81,6 +81,9 @@ struct vme_resource { extern struct bus_type vme_bus_type; +/* Number of VME interrupt vectors */ +#define VME_NUM_STATUSID 256 + /* VME_MAX_BRIDGES comes from the type of vme_bus_numbers */ #define VME_MAX_BRIDGES (sizeof(unsigned int)*8) #define VME_MAX_SLOTS 32 -- cgit v1.2.3 From c23fe83138ed7b11ad763cbe8bf98e5378c04bd6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Sun, 18 Oct 2015 22:43:19 +0530 Subject: debugfs: Add debugfs_create_ulong() Add debugfs_create_ulong() for the users of type 'unsigned long'. These will be 32 bits long on a 32 bit machine and 64 bits long on a 64 bit machine. Signed-off-by: Viresh Kumar Signed-off-by: Greg Kroah-Hartman --- include/linux/debugfs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/debugfs.h b/include/linux/debugfs.h index 8321fe3058d6..19c066dce1da 100644 --- a/include/linux/debugfs.h +++ b/include/linux/debugfs.h @@ -79,6 +79,8 @@ struct dentry *debugfs_create_u32(const char *name, umode_t mode, struct dentry *parent, u32 *value); struct dentry *debugfs_create_u64(const char *name, umode_t mode, struct dentry *parent, u64 *value); +struct dentry *debugfs_create_ulong(const char *name, umode_t mode, + struct dentry *parent, unsigned long *value); struct dentry *debugfs_create_x8(const char *name, umode_t mode, struct dentry *parent, u8 *value); struct dentry *debugfs_create_x16(const char *name, umode_t mode, -- cgit v1.2.3 From 4327ba52afd03fc4b5afa0ee1d774c9c5b0e85c5 Mon Sep 17 00:00:00 2001 From: Daeho Jeong Date: Sun, 18 Oct 2015 17:02:56 -0400 Subject: ext4, jbd2: ensure entering into panic after recording an error in superblock If a EXT4 filesystem utilizes JBD2 journaling and an error occurs, the journaling will be aborted first and the error number will be recorded into JBD2 superblock and, finally, the system will enter into the panic state in "errors=panic" option. But, in the rare case, this sequence is little twisted like the below figure and it will happen that the system enters into panic state, which means the system reset in mobile environment, before completion of recording an error in the journal superblock. In this case, e2fsck cannot recognize that the filesystem failure occurred in the previous run and the corruption wouldn't be fixed. Task A Task B ext4_handle_error() -> jbd2_journal_abort() -> __journal_abort_soft() -> __jbd2_journal_abort_hard() | -> journal->j_flags |= JBD2_ABORT; | | __ext4_abort() | -> jbd2_journal_abort() | | -> __journal_abort_soft() | | -> if (journal->j_flags & JBD2_ABORT) | | return; | -> panic() | -> jbd2_journal_update_sb_errno() Tested-by: Hobin Woo Signed-off-by: Daeho Jeong Signed-off-by: Theodore Ts'o Cc: stable@vger.kernel.org --- include/linux/jbd2.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/jbd2.h b/include/linux/jbd2.h index b5ca2314cace..65407f6c9120 100644 --- a/include/linux/jbd2.h +++ b/include/linux/jbd2.h @@ -1112,6 +1112,7 @@ JBD2_FEATURE_INCOMPAT_FUNCS(csum3, CSUM_V3) #define JBD2_ABORT_ON_SYNCDATA_ERR 0x040 /* Abort the journal on file * data write error in ordered * mode */ +#define JBD2_REC_ERR 0x080 /* The errno in the sb has been recorded */ /* * Function declarations for the journaling transaction and buffer -- cgit v1.2.3 From 37c1c04cca920de8a68285751b2c7b3d937ad50c Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Wed, 22 Apr 2015 19:36:06 +0300 Subject: sysfs: added __compat_only_sysfs_link_entry_to_kobj() Added a new function __compat_only_sysfs_link_group_to_kobj() that adds a symlink from attribute or group to a kobject. This needed for maintaining backwards compatibility with PPI attributes in the TPM driver. Signed-off-by: Jarkko Sakkinen Signed-off-by: Peter Huewe --- include/linux/sysfs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 9f65758311a4..ea090eaf468c 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -268,6 +268,9 @@ int sysfs_add_link_to_group(struct kobject *kobj, const char *group_name, struct kobject *target, const char *link_name); void sysfs_remove_link_from_group(struct kobject *kobj, const char *group_name, const char *link_name); +int __compat_only_sysfs_link_entry_to_kobj(struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name); void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr); @@ -451,6 +454,14 @@ static inline void sysfs_remove_link_from_group(struct kobject *kobj, { } +static inline int __compat_only_sysfs_link_entry_to_kobj( + struct kobject *kobj, + struct kobject *target_kobj, + const char *target_name) +{ + return 0; +} + static inline void sysfs_notify(struct kobject *kobj, const char *dir, const char *attr) { -- cgit v1.2.3 From 954650efb79f99d5c817c121bb0a7c6c53362048 Mon Sep 17 00:00:00 2001 From: Jarkko Sakkinen Date: Sat, 30 May 2015 08:09:04 +0300 Subject: tpm: seal/unseal for TPM 2.0 Added tpm_trusted_seal() and tpm_trusted_unseal() API for sealing trusted keys. This patch implements basic sealing and unsealing functionality for TPM 2.0: * Seal with a parent key using a 20 byte auth value. * Unseal with a parent key using a 20 byte auth value. Signed-off-by: Jarkko Sakkinen Signed-off-by: Peter Huewe --- include/linux/tpm.h | 26 ++++++++++++++++++++++++++ 1 file changed, 26 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tpm.h b/include/linux/tpm.h index 8350c538b486..706e63eea080 100644 --- a/include/linux/tpm.h +++ b/include/linux/tpm.h @@ -30,6 +30,8 @@ #define TPM_ANY_NUM 0xFFFF struct tpm_chip; +struct trusted_key_payload; +struct trusted_key_options; struct tpm_class_ops { const u8 req_complete_mask; @@ -46,11 +48,22 @@ struct tpm_class_ops { #if defined(CONFIG_TCG_TPM) || defined(CONFIG_TCG_TPM_MODULE) +extern int tpm_is_tpm2(u32 chip_num); extern int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf); extern int tpm_pcr_extend(u32 chip_num, int pcr_idx, const u8 *hash); extern int tpm_send(u32 chip_num, void *cmd, size_t buflen); extern int tpm_get_random(u32 chip_num, u8 *data, size_t max); +extern int tpm_seal_trusted(u32 chip_num, + struct trusted_key_payload *payload, + struct trusted_key_options *options); +extern int tpm_unseal_trusted(u32 chip_num, + struct trusted_key_payload *payload, + struct trusted_key_options *options); #else +static inline int tpm_is_tpm2(u32 chip_num) +{ + return -ENODEV; +} static inline int tpm_pcr_read(u32 chip_num, int pcr_idx, u8 *res_buf) { return -ENODEV; } @@ -63,5 +76,18 @@ static inline int tpm_send(u32 chip_num, void *cmd, size_t buflen) { static inline int tpm_get_random(u32 chip_num, u8 *data, size_t max) { return -ENODEV; } + +static inline int tpm_seal_trusted(u32 chip_num, + struct trusted_key_payload *payload, + struct trusted_key_options *options) +{ + return -ENODEV; +} +static inline int tpm_unseal_trusted(u32 chip_num, + struct trusted_key_payload *payload, + struct trusted_key_options *options) +{ + return -ENODEV; +} #endif #endif -- cgit v1.2.3 From c8d46ece44458a2088896d6fcae123a72bdfd429 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 19 Oct 2015 09:17:40 +0100 Subject: ARM: 8446/1: amba: Remove unused callbacks for legacy system PM Signed-off-by: Ulf Hansson Signed-off-by: Russell King --- include/linux/amba/bus.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/amba/bus.h b/include/linux/amba/bus.h index 50fc66868402..9006c4e75cf7 100644 --- a/include/linux/amba/bus.h +++ b/include/linux/amba/bus.h @@ -41,8 +41,6 @@ struct amba_driver { int (*probe)(struct amba_device *, const struct amba_id *); int (*remove)(struct amba_device *); void (*shutdown)(struct amba_device *); - int (*suspend)(struct amba_device *, pm_message_t); - int (*resume)(struct amba_device *); const struct amba_id *id_table; }; -- cgit v1.2.3 From da23ac1e40ce844d1a9553906bdacce160af76f6 Mon Sep 17 00:00:00 2001 From: "Subhransu S. Prusty" Date: Tue, 29 Sep 2015 13:56:10 +0530 Subject: ALSA: hda - Add hduadio support to DEVTABLE For generating modalias entries automatically, move the definition of struct hda_device_id to linux/mod_devicetable.h and add the handling of this record in file2alias helper. The new modalias is represented with combination of vendor id, device id, and api version as "hdaudio:vNrNaN". This patch itself doesn't convert the existing modaliases. Since they were added manually, this patch won't give any regression by itself at this point. [Modified the modalias format to adapt the api_version field, and drop invalid ANY_ID definition by tiwai] Signed-off-by: Subhransu S. Prusty Reviewed-by: Vinod Koul Tested-by: Subhransu S Prusty Signed-off-by: Takashi Iwai --- include/linux/mod_devicetable.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 688997a24aad..00825672d256 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -219,6 +219,14 @@ struct serio_device_id { __u8 proto; }; +struct hda_device_id { + __u32 vendor_id; + __u32 rev_id; + __u8 api_version; + const char *name; + unsigned long driver_data; +}; + /* * Struct used for matching a device */ -- cgit v1.2.3 From 233e7f267e580fefdeb36628b7efe8bfe056d27c Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Thu, 8 Oct 2015 16:51:31 +0200 Subject: stop_machine: Ensure that a queued callback will be called before cpu_stop_park() cpu_stop_queue_work() checks stopper->enabled before it queues the work, but ->enabled == T can only guarantee cpu_stop_signal_done() if we race with cpu_down(). This is not enough for stop_two_cpus() or stop_machine(), they will deadlock if multi_cpu_stop() won't be called by one of the target CPU's. stop_machine/stop_cpus are fine, they rely on stop_cpus_mutex. But stop_two_cpus() has to check cpu_active() to avoid the same race with hotplug, and this check is very unobvious and probably not even correct if we race with cpu_up(). Change cpu_down() pass to clear ->enabled before cpu_stopper_thread() flushes the pending ->works and returns with KTHREAD_SHOULD_PARK set. Note also that smpboot_thread_call() calls cpu_stop_unpark() which sets enabled == T at CPU_ONLINE stage, so this CPU can't go away until cpu_stopper_thread() is called at least once. This all means that if cpu_stop_queue_work() succeeds, we know that work->fn() will be called. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Tejun Heo Cc: Thomas Gleixner Cc: heiko.carstens@de.ibm.com Link: http://lkml.kernel.org/r/20151008145131.GA18139@redhat.com Signed-off-by: Ingo Molnar --- include/linux/stop_machine.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 414d924318ce..7b76362b381c 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -33,6 +33,7 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, struct cpu_stop_work *work_buf); int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); +void stop_machine_park(int cpu); #else /* CONFIG_SMP */ -- cgit v1.2.3 From c00166d87e730088d919814020e96ffed129d0d1 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 9 Oct 2015 18:00:49 +0200 Subject: stop_machine: Kill smp_hotplug_thread->pre_unpark, introduce stop_machine_unpark() 1. Change smpboot_unpark_thread() to check ->selfparking, just like smpboot_park_thread() does. 2. Introduce stop_machine_unpark() which sets ->enabled and calls kthread_unpark(). 3. Change smpboot_thread_call() and cpu_stop_init() to call stop_machine_unpark() by hand. This way: - IMO the ->selfparking logic becomes more consistent. - We can kill the smp_hotplug_thread->pre_unpark() method. - We can easily unpark the stopper thread earlier. Say, we can move stop_machine_unpark() from smpboot_thread_call() to sched_cpu_active() as Peter suggests. Signed-off-by: Oleg Nesterov Signed-off-by: Peter Zijlstra (Intel) Cc: Andrew Morton Cc: Linus Torvalds Cc: Mike Galbraith Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Rik van Riel Cc: Tejun Heo Cc: Thomas Gleixner Cc: heiko.carstens@de.ibm.com Link: http://lkml.kernel.org/r/20151009160049.GA10166@redhat.com Signed-off-by: Ingo Molnar --- include/linux/smpboot.h | 4 ---- include/linux/stop_machine.h | 1 + 2 files changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/smpboot.h b/include/linux/smpboot.h index e6109a6cd8f6..12910cf19869 100644 --- a/include/linux/smpboot.h +++ b/include/linux/smpboot.h @@ -24,9 +24,6 @@ struct smpboot_thread_data; * parked (cpu offline) * @unpark: Optional unpark function, called when the thread is * unparked (cpu online) - * @pre_unpark: Optional unpark function, called before the thread is - * unparked (cpu online). This is not guaranteed to be - * called on the target cpu of the thread. Careful! * @cpumask: Internal state. To update which threads are unparked, * call smpboot_update_cpumask_percpu_thread(). * @selfparking: Thread is not parked by the park function. @@ -42,7 +39,6 @@ struct smp_hotplug_thread { void (*cleanup)(unsigned int cpu, bool online); void (*park)(unsigned int cpu); void (*unpark)(unsigned int cpu); - void (*pre_unpark)(unsigned int cpu); cpumask_var_t cpumask; bool selfparking; const char *thread_comm; diff --git a/include/linux/stop_machine.h b/include/linux/stop_machine.h index 7b76362b381c..0adedca24c5b 100644 --- a/include/linux/stop_machine.h +++ b/include/linux/stop_machine.h @@ -34,6 +34,7 @@ void stop_one_cpu_nowait(unsigned int cpu, cpu_stop_fn_t fn, void *arg, int stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); int try_stop_cpus(const struct cpumask *cpumask, cpu_stop_fn_t fn, void *arg); void stop_machine_park(int cpu); +void stop_machine_unpark(int cpu); #else /* CONFIG_SMP */ -- cgit v1.2.3 From d976441f44bc5d48635d081d277aa76556ffbf8b Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Mon, 19 Oct 2015 11:37:17 +0300 Subject: compiler, atomics, kasan: Provide READ_ONCE_NOCHECK() Some code may perform racy by design memory reads. This could be harmless, yet such code may produce KASAN warnings. To hide such accesses from KASAN this patch introduces READ_ONCE_NOCHECK() macro. KASAN will not check the memory accessed by READ_ONCE_NOCHECK(). The KernelThreadSanitizer (KTSAN) is going to ignore it as well. This patch creates __read_once_size_nocheck() a clone of __read_once_size(). The only difference between them is 'no_sanitized_address' attribute appended to '*_nocheck' function. This attribute tells the compiler that instrumentation of memory accesses should not be applied to that function. We declare it as static '__maybe_unsed' because GCC is not capable to inline such function: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 With KASAN=n READ_ONCE_NOCHECK() is just a clone of READ_ONCE(). Signed-off-by: Andrey Ryabinin Cc: Alexander Potapenko Cc: Andrew Morton Cc: Andrey Konovalov Cc: Andy Lutomirski Cc: Borislav Petkov Cc: Denys Vlasenko Cc: Dmitry Vyukov Cc: Kostya Serebryany Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Sasha Levin Cc: Thomas Gleixner Cc: Wolfram Gloger Cc: kasan-dev Link: http://lkml.kernel.org/r/1445243838-17763-2-git-send-email-aryabinin@virtuozzo.com Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc.h | 13 +++++++++ include/linux/compiler.h | 66 +++++++++++++++++++++++++++++++++++--------- 2 files changed, 66 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index dfaa7b3e9ae9..8efb40e61d6e 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -237,12 +237,25 @@ #define KASAN_ABI_VERSION 3 #endif +#if GCC_VERSION >= 40902 +/* + * Tell the compiler that address safety instrumentation (KASAN) + * should not be applied to that function. + * Conflicts with inlining: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 + */ +#define __no_sanitize_address __attribute__((no_sanitize_address)) +#endif + #endif /* gcc version >= 40000 specific checks */ #if !defined(__noclone) #define __noclone /* not needed */ #endif +#if !defined(__no_sanitize_address) +#define __no_sanitize_address +#endif + /* * A trick to suppress uninitialized variable warning without generating any * code diff --git a/include/linux/compiler.h b/include/linux/compiler.h index c836eb2dc44d..3d7810341b57 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -198,19 +198,45 @@ void ftrace_likely_update(struct ftrace_branch_data *f, int val, int expect); #include -static __always_inline void __read_once_size(const volatile void *p, void *res, int size) +#define __READ_ONCE_SIZE \ +({ \ + switch (size) { \ + case 1: *(__u8 *)res = *(volatile __u8 *)p; break; \ + case 2: *(__u16 *)res = *(volatile __u16 *)p; break; \ + case 4: *(__u32 *)res = *(volatile __u32 *)p; break; \ + case 8: *(__u64 *)res = *(volatile __u64 *)p; break; \ + default: \ + barrier(); \ + __builtin_memcpy((void *)res, (const void *)p, size); \ + barrier(); \ + } \ +}) + +static __always_inline +void __read_once_size(const volatile void *p, void *res, int size) { - switch (size) { - case 1: *(__u8 *)res = *(volatile __u8 *)p; break; - case 2: *(__u16 *)res = *(volatile __u16 *)p; break; - case 4: *(__u32 *)res = *(volatile __u32 *)p; break; - case 8: *(__u64 *)res = *(volatile __u64 *)p; break; - default: - barrier(); - __builtin_memcpy((void *)res, (const void *)p, size); - barrier(); - } + __READ_ONCE_SIZE; +} + +#ifdef CONFIG_KASAN +/* + * This function is not 'inline' because __no_sanitize_address confilcts + * with inlining. Attempt to inline it may cause a build failure. + * https://gcc.gnu.org/bugzilla/show_bug.cgi?id=67368 + * '__maybe_unused' allows us to avoid defined-but-not-used warnings. + */ +static __no_sanitize_address __maybe_unused +void __read_once_size_nocheck(const volatile void *p, void *res, int size) +{ + __READ_ONCE_SIZE; +} +#else +static __always_inline +void __read_once_size_nocheck(const volatile void *p, void *res, int size) +{ + __READ_ONCE_SIZE; } +#endif static __always_inline void __write_once_size(volatile void *p, void *res, int size) { @@ -248,8 +274,22 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s * required ordering. */ -#define READ_ONCE(x) \ - ({ union { typeof(x) __val; char __c[1]; } __u; __read_once_size(&(x), __u.__c, sizeof(x)); __u.__val; }) +#define __READ_ONCE(x, check) \ +({ \ + union { typeof(x) __val; char __c[1]; } __u; \ + if (check) \ + __read_once_size(&(x), __u.__c, sizeof(x)); \ + else \ + __read_once_size_nocheck(&(x), __u.__c, sizeof(x)); \ + __u.__val; \ +}) +#define READ_ONCE(x) __READ_ONCE(x, 1) + +/* + * Use READ_ONCE_NOCHECK() instead of READ_ONCE() if you need + * to hide memory access from KASAN. + */ +#define READ_ONCE_NOCHECK(x) __READ_ONCE(x, 0) #define WRITE_ONCE(x, val) \ ({ \ -- cgit v1.2.3 From 839a42af9441982311f33241529f711f23c857c8 Mon Sep 17 00:00:00 2001 From: LABBE Corentin Date: Thu, 15 Oct 2015 20:34:53 +0200 Subject: lib/mpi: clean unused SHA1_DIGEST_LENGTH The define SHA1_DIGEST_LENGTH is not used anywhere, so remove it. Signed-off-by: LABBE Corentin Signed-off-by: Herbert Xu --- include/linux/mpi.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mpi.h b/include/linux/mpi.h index 8b8269fa8907..3a5abe95affd 100644 --- a/include/linux/mpi.h +++ b/include/linux/mpi.h @@ -33,12 +33,6 @@ #include #include -/* DSI defines */ - -#define SHA1_DIGEST_LENGTH 20 - -/*end of DSI defines */ - #define BYTES_PER_MPI_LIMB (BITS_PER_LONG / 8) #define BITS_PER_MPI_LIMB BITS_PER_LONG -- cgit v1.2.3 From b299167652fe58f1ebadb3e3ac84a5a0b74e534e Mon Sep 17 00:00:00 2001 From: Max Filippov Date: Wed, 7 Oct 2015 02:45:11 +0300 Subject: i2c: ocores: support big-endian register layout This allows using OpenCores I2C controller attached to its host in native-endian mode with bi-endian CPUs. Example of such system is Xtensa XTFPGA platform. Acked-by: Peter Korsgaard Signed-off-by: Max Filippov Signed-off-by: Wolfram Sang --- include/linux/i2c-ocores.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c-ocores.h b/include/linux/i2c-ocores.h index 1c06b5c7c308..01edd96fe1f7 100644 --- a/include/linux/i2c-ocores.h +++ b/include/linux/i2c-ocores.h @@ -15,6 +15,7 @@ struct ocores_i2c_platform_data { u32 reg_shift; /* register offset shift value */ u32 reg_io_width; /* register io read/write width */ u32 clock_khz; /* input clock in kHz */ + bool big_endian; /* registers are big endian */ u8 num_devices; /* number of devices in the devices list */ struct i2c_board_info const *devices; /* devices connected to the bus */ }; -- cgit v1.2.3 From c6f1891323e6a259c0b0f516a3a3e0f6b0ee2c5f Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Wed, 7 Oct 2015 10:16:31 +0200 Subject: i2c: rcar: Remove obsolete platform data support Since commit 4baadb9e05c68962 ("ARM: shmobile: r8a7778: remove obsolete setup code"), Renesas R-Car SoCs are only supported in generic DT-only ARM multi-platform builds. The driver doesn't need to use platform data anymore, hence remove platform data configuration. Signed-off-by: Geert Uytterhoeven [wsa: removed now unused ret value and cast to proper enum type] Signed-off-by: Wolfram Sang --- include/linux/i2c/i2c-rcar.h | 10 ---------- 1 file changed, 10 deletions(-) delete mode 100644 include/linux/i2c/i2c-rcar.h (limited to 'include/linux') diff --git a/include/linux/i2c/i2c-rcar.h b/include/linux/i2c/i2c-rcar.h deleted file mode 100644 index 496f5c2b23c9..000000000000 --- a/include/linux/i2c/i2c-rcar.h +++ /dev/null @@ -1,10 +0,0 @@ -#ifndef __I2C_R_CAR_H__ -#define __I2C_R_CAR_H__ - -#include - -struct i2c_rcar_platform_data { - u32 bus_speed; -}; - -#endif /* __I2C_R_CAR_H__ */ -- cgit v1.2.3 From 5d170139eb10ae12e1bd076245c42b35453d8324 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Sun, 18 Oct 2015 13:05:40 +0200 Subject: vga_switcheroo: Constify vga_switcheroo_handler MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit vga_switcheroo_client_ops has always been declared const since its introduction with 26ec685ff9d9 ("vga_switcheroo: Introduce struct vga_switcheroo_client_ops"). Do so for vga_switcheroo_handler as well. drivers/gpu/drm/amd/amdgpu/amdgpu.ko: 6 .rodata 00009888 - 19 .data 00001f00 + 19 .data 00001ee0 drivers/gpu/drm/nouveau/nouveau.ko: 6 .rodata 000460b8 17 .data 00018fe0 drivers/gpu/drm/radeon/radeon.ko: - 7 .rodata 00030944 + 7 .rodata 00030964 - 21 .data 0000d6a0 + 21 .data 0000d678 drivers/platform/x86/apple-gmux.ko: - 7 .rodata 00000140 + 7 .rodata 00000160 - 11 .data 000000e0 + 11 .data 000000b8 Cc: Ben Skeggs Cc: Darren Hart Cc: Alex Deucher Signed-off-by: Lukas Wunner Reviewed-by: Christian König . Signed-off-by: Daniel Vetter --- include/linux/vga_switcheroo.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index c55751155631..786bc931dbd1 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -137,7 +137,7 @@ int vga_switcheroo_register_audio_client(struct pci_dev *pdev, void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info); -int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler); +int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler); void vga_switcheroo_unregister_handler(void); int vga_switcheroo_process_delayed_switch(void); @@ -155,7 +155,7 @@ static inline void vga_switcheroo_unregister_client(struct pci_dev *dev) {} static inline int vga_switcheroo_register_client(struct pci_dev *dev, const struct vga_switcheroo_client_ops *ops, bool driver_power_control) { return 0; } static inline void vga_switcheroo_client_fb_set(struct pci_dev *dev, struct fb_info *info) {} -static inline int vga_switcheroo_register_handler(struct vga_switcheroo_handler *handler) { return 0; } +static inline int vga_switcheroo_register_handler(const struct vga_switcheroo_handler *handler) { return 0; } static inline int vga_switcheroo_register_audio_client(struct pci_dev *pdev, const struct vga_switcheroo_client_ops *ops, enum vga_switcheroo_client_id id) { return 0; } -- cgit v1.2.3 From c63b7682b6d90530d3a071ff75b81bfddcce8598 Mon Sep 17 00:00:00 2001 From: Tal Shorer Date: Sat, 1 Aug 2015 15:27:57 +0300 Subject: tracing: Allow disabling compilation of specific trace systems Allow a trace events header file to disable compilation of its trace events by defining the preprocessor macro NOTRACE. This could be done, for example, according to a Kconfig option. Link: http://lkml.kernel.org/r/1438432079-11704-3-git-send-email-tal.shorer@gmail.com Signed-off-by: Tal Shorer Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 17 ++++++++++++++--- 1 file changed, 14 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index a5f7f3ecafa3..afada369c5b7 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -111,7 +111,18 @@ extern void syscall_unregfunc(void); #define TP_ARGS(args...) args #define TP_CONDITION(args...) args -#ifdef CONFIG_TRACEPOINTS +/* + * Individual subsystem my have a separate configuration to + * enable their tracepoints. By default, this file will create + * the tracepoints if CONFIG_TRACEPOINT is defined. If a subsystem + * wants to be able to disable its tracepoints from being created + * it can define NOTRACE before including the tracepoint headers. + */ +#if defined(CONFIG_TRACEPOINTS) && !defined(NOTRACE) +#define TRACEPOINTS_ENABLED +#endif + +#ifdef TRACEPOINTS_ENABLED /* * it_func[0] is never NULL because there is at least one element in the array @@ -234,7 +245,7 @@ extern void syscall_unregfunc(void); #define EXPORT_TRACEPOINT_SYMBOL(name) \ EXPORT_SYMBOL(__tracepoint_##name) -#else /* !CONFIG_TRACEPOINTS */ +#else /* !TRACEPOINTS_ENABLED */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ static inline void trace_##name(proto) \ { } \ @@ -266,7 +277,7 @@ extern void syscall_unregfunc(void); #define EXPORT_TRACEPOINT_SYMBOL_GPL(name) #define EXPORT_TRACEPOINT_SYMBOL(name) -#endif /* CONFIG_TRACEPOINTS */ +#endif /* TRACEPOINTS_ENABLED */ #ifdef CONFIG_TRACING /** -- cgit v1.2.3 From f672258391b42a5c7cc2732c9c063e56a85c8dbe Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 16 Oct 2015 21:57:42 -0700 Subject: tcp: track min RTT using windowed min-filter Kathleen Nichols' algorithm for tracking the minimum RTT of a data stream over some measurement window. It uses constant space and constant time per update. Yet it almost always delivers the same minimum as an implementation that has to keep all the data in the window. The measurement window is tunable via sysctl.net.ipv4.tcp_min_rtt_wlen with a default value of 5 minutes. The algorithm keeps track of the best, 2nd best & 3rd best min values, maintaining an invariant that the measurement time of the n'th best >= n-1'th best. It also makes sure that the three values are widely separated in the time window since that bounds the worse case error when that data is monotonically increasing over the window. Upon getting a new min, we can forget everything earlier because it has no value - the new min is less than everything else in the window by definition and it's the most recent. So we restart fresh on every new min and overwrites the 2nd & 3rd choices. The same property holds for the 2nd & 3rd best. Therefore we have to maintain two invariants to maximize the information in the samples, one on values (1st.v <= 2nd.v <= 3rd.v) and the other on times (now-win <=1st.t <= 2nd.t <= 3rd.t <= now). These invariants determine the structure of the code The RTT input to the windowed filter is the minimum RTT measured from ACK or SACK, or as the last resort from TCP timestamps. The accessor tcp_min_rtt() returns the minimum RTT seen in the window. ~0U indicates it is not available. The minimum is 1usec even if the true RTT is below that. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 86a7edaa6797..90edef5508f9 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -217,6 +217,9 @@ struct tcp_sock { u32 mdev_max_us; /* maximal mdev for the last rtt period */ u32 rttvar_us; /* smoothed mdev_max */ u32 rtt_seq; /* sequence number to update rttvar */ + struct rtt_meas { + u32 rtt, ts; /* RTT in usec and sampling time in jiffies. */ + } rtt_min[3]; u32 packets_out; /* Packets which are "in flight" */ u32 retrans_out; /* Retransmitted packets out */ -- cgit v1.2.3 From af82f4e84866ecd360a53f770d6217637116e6c1 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 16 Oct 2015 21:57:43 -0700 Subject: tcp: remove tcp_mark_lost_retrans() Remove the existing lost retransmit detection because RACK subsumes it completely. This also stops the overloading the ack_seq field of the skb control block. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 90edef5508f9..8c54863dfc38 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -283,8 +283,6 @@ struct tcp_sock { int lost_cnt_hint; u32 retransmit_high; /* L-bits may be on up to this seqno */ - u32 lost_retrans_low; /* Sent seq after any rxmit (lowest) */ - u32 prior_ssthresh; /* ssthresh saved at recovery start */ u32 high_seq; /* snd_nxt at onset of congestion */ -- cgit v1.2.3 From 625a5e109a3ed6f36a1008a43069a3462b44a424 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 16 Oct 2015 21:57:45 -0700 Subject: tcp: skb_mstamp_after helper a helper to prepare the first main RACK patch. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 4398411236f1..24f4dfd94c51 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -463,6 +463,15 @@ static inline u32 skb_mstamp_us_delta(const struct skb_mstamp *t1, return delta_us; } +static inline bool skb_mstamp_after(const struct skb_mstamp *t1, + const struct skb_mstamp *t0) +{ + s32 diff = t1->stamp_jiffies - t0->stamp_jiffies; + + if (!diff) + diff = t1->stamp_us - t0->stamp_us; + return diff > 0; +} /** * struct sk_buff - socket buffer -- cgit v1.2.3 From 659a8ad56f490279f0efee43a62ffa1ac914a4e0 Mon Sep 17 00:00:00 2001 From: Yuchung Cheng Date: Fri, 16 Oct 2015 21:57:46 -0700 Subject: tcp: track the packet timings in RACK This patch is the first half of the RACK loss recovery. RACK loss recovery uses the notion of time instead of packet sequence (FACK) or counts (dupthresh). It's inspired by the previous FACK heuristic in tcp_mark_lost_retrans(): when a limited transmit (new data packet) is sacked, then current retransmitted sequence below the newly sacked sequence must been lost, since at least one round trip time has elapsed. But it has several limitations: 1) can't detect tail drops since it depends on limited transmit 2) is disabled upon reordering (assumes no reordering) 3) only enabled in fast recovery ut not timeout recovery RACK (Recently ACK) addresses these limitations with the notion of time instead: a packet P1 is lost if a later packet P2 is s/acked, as at least one round trip has passed. Since RACK cares about the time sequence instead of the data sequence of packets, it can detect tail drops when later retransmission is s/acked while FACK or dupthresh can't. For reordering RACK uses a dynamically adjusted reordering window ("reo_wnd") to reduce false positives on ever (small) degree of reordering. This patch implements tcp_advanced_rack() which tracks the most recent transmission time among the packets that have been delivered (ACKed or SACKed) in tp->rack.mstamp. This timestamp is the key to determine which packet has been lost. Consider an example that the sender sends six packets: T1: P1 (lost) T2: P2 T3: P3 T4: P4 T100: sack of P2. rack.mstamp = T2 T101: retransmit P1 T102: sack of P2,P3,P4. rack.mstamp = T4 T205: ACK of P4 since the hole is repaired. rack.mstamp = T101 We need to be careful about spurious retransmission because it may falsely advance tp->rack.mstamp by an RTT or an RTO, causing RACK to falsely mark all packets lost, just like a spurious timeout. We identify spurious retransmission by the ACK's TS echo value. If TS option is not applicable but the retransmission is acknowledged less than min-RTT ago, it is likely to be spurious. We refrain from using the transmission time of these spurious retransmissions. The second half is implemented in the next patch that marks packet lost using RACK timestamp. Signed-off-by: Yuchung Cheng Signed-off-by: Neal Cardwell Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 8c54863dfc38..5dce9705fe84 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -194,6 +194,12 @@ struct tcp_sock { u32 window_clamp; /* Maximal window to advertise */ u32 rcv_ssthresh; /* Current window clamp */ + /* Information of the most recently (s)acked skb */ + struct tcp_rack { + struct skb_mstamp mstamp; /* (Re)sent time of the skb */ + u8 advanced; /* mstamp advanced since last lost marking */ + u8 reord; /* reordering detected */ + } rack; u16 advmss; /* Advertised MSS */ u8 unused; u8 nonagle : 4,/* Disable Nagle algorithm? */ -- cgit v1.2.3 From 146aa8b1453bd8f1ff2304ffb71b4ee0eb9acdcc Mon Sep 17 00:00:00 2001 From: David Howells Date: Wed, 21 Oct 2015 14:04:48 +0100 Subject: KEYS: Merge the type-specific data with the payload data Merge the type-specific data with the payload data into one four-word chunk as it seems pointless to keep them separate. Use user_key_payload() for accessing the payloads of overloaded user-defined keys. Signed-off-by: David Howells cc: linux-cifs@vger.kernel.org cc: ecryptfs@vger.kernel.org cc: linux-ext4@vger.kernel.org cc: linux-f2fs-devel@lists.sourceforge.net cc: linux-nfs@vger.kernel.org cc: ceph-devel@vger.kernel.org cc: linux-ima-devel@lists.sourceforge.net --- include/linux/key-type.h | 3 +-- include/linux/key.h | 33 ++++++++++++++------------------- 2 files changed, 15 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/key-type.h b/include/linux/key-type.h index ff9f1d394235..7463355a198b 100644 --- a/include/linux/key-type.h +++ b/include/linux/key-type.h @@ -40,8 +40,7 @@ struct key_construction { */ struct key_preparsed_payload { char *description; /* Proposed key description (or NULL) */ - void *type_data[2]; /* Private key-type data */ - void *payload[2]; /* Proposed payload */ + union key_payload payload; /* Proposed payload */ const void *data; /* Raw data */ size_t datalen; /* Raw datalen */ size_t quotalen; /* Quota length for proposed payload */ diff --git a/include/linux/key.h b/include/linux/key.h index e1d4715f3222..66f705243985 100644 --- a/include/linux/key.h +++ b/include/linux/key.h @@ -89,6 +89,11 @@ struct keyring_index_key { size_t desc_len; }; +union key_payload { + void __rcu *rcu_data0; + void *data[4]; +}; + /*****************************************************************************/ /* * key reference with possession attribute handling @@ -186,28 +191,18 @@ struct key { }; }; - /* type specific data - * - this is used by the keyring type to index the name - */ - union { - struct list_head link; - unsigned long x[2]; - void *p[2]; - int reject_error; - } type_data; - /* key data * - this is used to hold the data actually used in cryptography or * whatever */ union { - union { - unsigned long value; - void __rcu *rcudata; - void *data; - void *data2[2]; - } payload; - struct assoc_array keys; + union key_payload payload; + struct { + /* Keyring bits */ + struct list_head name_link; + struct assoc_array keys; + }; + int reject_error; }; }; @@ -336,12 +331,12 @@ static inline bool key_is_instantiated(const struct key *key) } #define rcu_dereference_key(KEY) \ - (rcu_dereference_protected((KEY)->payload.rcudata, \ + (rcu_dereference_protected((KEY)->payload.rcu_data0, \ rwsem_is_locked(&((struct key *)(KEY))->sem))) #define rcu_assign_keypointer(KEY, PAYLOAD) \ do { \ - rcu_assign_pointer((KEY)->payload.rcudata, (PAYLOAD)); \ + rcu_assign_pointer((KEY)->payload.rcu_data0, (PAYLOAD)); \ } while (0) #ifdef CONFIG_SYSCTL -- cgit v1.2.3 From f2e0a53271a439a2ab142645867f0cde45b2b3cd Mon Sep 17 00:00:00 2001 From: Maxime Ripard Date: Tue, 19 May 2015 22:19:33 +0200 Subject: clk: Add a basic multiplier clock Some clocks are using a multiplier component, however, unlike their mux, gate or divider counterpart, these factors don't have a basic clock implementation. This leads to code duplication across platforms that want to use that kind of clocks, and the impossibility to use the composite clocks with such a clock without defining your own rate operations. Create such a driver in order to remove these issues, and hopefully factor the implementations, reducing code size across platforms and consolidating the various implementations. Signed-off-by: Maxime Ripard Reviewed-by: Chen-Yu Tsai --- include/linux/clk-provider.h | 42 ++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 42 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 3ecc07d0da77..6a7dfe33a317 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -518,6 +518,48 @@ struct clk *clk_register_fractional_divider(struct device *dev, void __iomem *reg, u8 mshift, u8 mwidth, u8 nshift, u8 nwidth, u8 clk_divider_flags, spinlock_t *lock); +/** + * struct clk_multiplier - adjustable multiplier clock + * + * @hw: handle between common and hardware-specific interfaces + * @reg: register containing the multiplier + * @shift: shift to the multiplier bit field + * @width: width of the multiplier bit field + * @lock: register lock + * + * Clock with an adjustable multiplier affecting its output frequency. + * Implements .recalc_rate, .set_rate and .round_rate + * + * Flags: + * CLK_MULTIPLIER_ZERO_BYPASS - By default, the multiplier is the value read + * from the register, with 0 being a valid value effectively + * zeroing the output clock rate. If CLK_MULTIPLIER_ZERO_BYPASS is + * set, then a null multiplier will be considered as a bypass, + * leaving the parent rate unmodified. + * CLK_MULTIPLIER_ROUND_CLOSEST - Makes the best calculated divider to be + * rounded to the closest integer instead of the down one. + */ +struct clk_multiplier { + struct clk_hw hw; + void __iomem *reg; + u8 shift; + u8 width; + u8 flags; + spinlock_t *lock; +}; + +#define CLK_MULTIPLIER_ZERO_BYPASS BIT(0) +#define CLK_MULTIPLIER_ROUND_CLOSEST BIT(1) + +extern const struct clk_ops clk_multiplier_ops; + +struct clk *clk_register_multiplier(struct device *dev, const char *name, + const char *parent_name, + unsigned long flags, + void __iomem *reg, u8 shift, u8 width, + u8 clk_mult_flags, spinlock_t *lock); +void clk_unregister_multiplier(struct clk *clk); + /*** * struct clk_composite - aggregate clock of mux, divider and gate clocks * -- cgit v1.2.3 From aff34e192e4eeacfb8b5ffc68e10a240f2c0c6d7 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 21 Oct 2015 13:19:27 -0400 Subject: block: Move integrity kobject to struct gendisk The integrity kobject purely exists to support the integrity subdirectory in sysfs and doesn't really have anything to do with the blk_integrity data structure. Move the kobject to struct gendisk where it belongs. Signed-off-by: Martin K. Petersen Reported-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Dan Williams Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- include/linux/genhd.h | 1 + 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 19c2e947d4d1..830f9c07d4bb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1472,8 +1472,6 @@ struct blk_integrity { unsigned short tag_size; const char *name; - - struct kobject kobj; }; extern bool blk_integrity_is_initialized(struct gendisk *); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 2adbfa6d02bc..9e6e0dfa97ad 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -199,6 +199,7 @@ struct gendisk { struct disk_events *ev; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity *integrity; + struct kobject integrity_kobj; #endif int node_id; }; -- cgit v1.2.3 From 0f8087ecdeac921fc4920f1328f55c15080bc6aa Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 21 Oct 2015 13:19:33 -0400 Subject: block: Consolidate static integrity profile properties We previously made a complete copy of a device's data integrity profile even though several of the fields inside the blk_integrity struct are pointers to fixed template entries in t10-pi.c. Split the static and per-device portions so that we can reference the template directly. Signed-off-by: Martin K. Petersen Reported-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Cc: Dan Williams Signed-off-by: Dan Williams Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 20 +++++++++++--------- include/linux/t10-pi.h | 8 ++++---- 2 files changed, 15 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 830f9c07d4bb..f36c6476f1c7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1462,16 +1462,18 @@ struct blk_integrity_iter { typedef int (integrity_processing_fn) (struct blk_integrity_iter *); -struct blk_integrity { - integrity_processing_fn *generate_fn; - integrity_processing_fn *verify_fn; - - unsigned short flags; - unsigned short tuple_size; - unsigned short interval; - unsigned short tag_size; +struct blk_integrity_profile { + integrity_processing_fn *generate_fn; + integrity_processing_fn *verify_fn; + const char *name; +}; - const char *name; +struct blk_integrity { + struct blk_integrity_profile *profile; + unsigned short flags; + unsigned short tuple_size; + unsigned short interval; + unsigned short tag_size; }; extern bool blk_integrity_is_initialized(struct gendisk *); diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h index 6a8b9942632d..dd8de82cf5b5 100644 --- a/include/linux/t10-pi.h +++ b/include/linux/t10-pi.h @@ -14,9 +14,9 @@ struct t10_pi_tuple { }; -extern struct blk_integrity t10_pi_type1_crc; -extern struct blk_integrity t10_pi_type1_ip; -extern struct blk_integrity t10_pi_type3_crc; -extern struct blk_integrity t10_pi_type3_ip; +extern struct blk_integrity_profile t10_pi_type1_crc; +extern struct blk_integrity_profile t10_pi_type1_ip; +extern struct blk_integrity_profile t10_pi_type3_crc; +extern struct blk_integrity_profile t10_pi_type3_ip; #endif -- cgit v1.2.3 From a48f041d91bf1aee599fa2adb53b780ed20c2ee5 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 21 Oct 2015 13:19:38 -0400 Subject: block: Reduce the size of struct blk_integrity The per-device properties in the blk_integrity structure were previously unsigned short. However, most of the values fit inside a char. The only exception is the data interval size and we can work around that by storing it as a power of two. This cuts the size of the dynamic portion of blk_integrity in half. Signed-off-by: Martin K. Petersen Reported-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Dan Williams Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f36c6476f1c7..4f1968f15e30 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1470,10 +1470,10 @@ struct blk_integrity_profile { struct blk_integrity { struct blk_integrity_profile *profile; - unsigned short flags; - unsigned short tuple_size; - unsigned short interval; - unsigned short tag_size; + unsigned char flags; + unsigned char tuple_size; + unsigned char interval_exp; + unsigned char tag_size; }; extern bool blk_integrity_is_initialized(struct gendisk *); -- cgit v1.2.3 From 25520d55cdb6ee289abc68f553d364d22478ff54 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 21 Oct 2015 13:19:49 -0400 Subject: block: Inline blk_integrity in struct gendisk Up until now the_integrity profile has been dynamically allocated and attached to struct gendisk after the disk has been made active. This causes problems because NVMe devices need to register the profile prior to the partition table being read due to a mandatory metadata buffer requirement. In addition, DM goes through hoops to deal with preallocating, but not initializing integrity profiles. Since the integrity profile is small (4 bytes + a pointer), Christoph suggested moving it to struct gendisk proper. This requires several changes: - Moving the blk_integrity definition to genhd.h. - Inlining blk_integrity in struct gendisk. - Removing the dynamic allocation code. - Adding helper functions which allow gendisk to set up and tear down the integrity sysfs dir when a disk is added/deleted. - Adding a blk_integrity_revalidate() callback for updating the stable pages bdi setting. - The calls that depend on whether a device has an integrity profile or not now key off of the bi->profile pointer. - Simplifying the integrity support routines in DM (Mike Snitzer). Signed-off-by: Martin K. Petersen Reported-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Mike Snitzer Cc: Dan Williams Signed-off-by: Dan Williams Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 34 +++++++++++++--------------------- include/linux/genhd.h | 26 ++++++++++++++++++++++++-- 2 files changed, 37 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4f1968f15e30..60669c20190f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1468,16 +1468,7 @@ struct blk_integrity_profile { const char *name; }; -struct blk_integrity { - struct blk_integrity_profile *profile; - unsigned char flags; - unsigned char tuple_size; - unsigned char interval_exp; - unsigned char tag_size; -}; - -extern bool blk_integrity_is_initialized(struct gendisk *); -extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); +extern void blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); extern int blk_integrity_compare(struct gendisk *, struct gendisk *); extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, @@ -1488,15 +1479,20 @@ extern bool blk_integrity_merge_rq(struct request_queue *, struct request *, extern bool blk_integrity_merge_bio(struct request_queue *, struct request *, struct bio *); -static inline -struct blk_integrity *bdev_get_integrity(struct block_device *bdev) +static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { - return bdev->bd_disk->integrity; + struct blk_integrity *bi = &disk->integrity; + + if (!bi->profile) + return NULL; + + return bi; } -static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) +static inline +struct blk_integrity *bdev_get_integrity(struct block_device *bdev) { - return disk->integrity; + return blk_get_integrity(bdev->bd_disk); } static inline bool blk_integrity_rq(struct request *rq) @@ -1570,10 +1566,9 @@ static inline int blk_integrity_compare(struct gendisk *a, struct gendisk *b) { return 0; } -static inline int blk_integrity_register(struct gendisk *d, +static inline void blk_integrity_register(struct gendisk *d, struct blk_integrity *b) { - return 0; } static inline void blk_integrity_unregister(struct gendisk *d) { @@ -1598,10 +1593,7 @@ static inline bool blk_integrity_merge_bio(struct request_queue *rq, { return true; } -static inline bool blk_integrity_is_initialized(struct gendisk *g) -{ - return 0; -} + static inline bool integrity_req_gap_back_merge(struct request *req, struct bio *next) { diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 9e6e0dfa97ad..82f4911e0ad8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -163,6 +163,18 @@ struct disk_part_tbl { struct disk_events; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + +struct blk_integrity { + struct blk_integrity_profile *profile; + unsigned char flags; + unsigned char tuple_size; + unsigned char interval_exp; + unsigned char tag_size; +}; + +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + struct gendisk { /* major, first_minor and minors are input parameters only, * don't use directly. Use disk_devt() and disk_max_parts(). @@ -198,9 +210,9 @@ struct gendisk { atomic_t sync_io; /* RAID */ struct disk_events *ev; #ifdef CONFIG_BLK_DEV_INTEGRITY - struct blk_integrity *integrity; + struct blk_integrity integrity; struct kobject integrity_kobj; -#endif +#endif /* CONFIG_BLK_DEV_INTEGRITY */ int node_id; }; @@ -728,6 +740,16 @@ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) #endif } +#if defined(CONFIG_BLK_DEV_INTEGRITY) +extern void blk_integrity_add(struct gendisk *); +extern void blk_integrity_del(struct gendisk *); +extern void blk_integrity_revalidate(struct gendisk *); +#else /* CONFIG_BLK_DEV_INTEGRITY */ +static inline void blk_integrity_add(struct gendisk *disk) { } +static inline void blk_integrity_del(struct gendisk *disk) { } +static inline void blk_integrity_revalidate(struct gendisk *disk) { } +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + #else /* CONFIG_BLOCK */ static inline void printk_all_partitions(void) { } -- cgit v1.2.3 From 3ef28e83ab15799742e55fd13243a5f678b04242 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Oct 2015 13:20:12 -0400 Subject: block: generic request_queue reference counting Allow pmem, and other synchronous/bio-based block drivers, to fallback on a per-cpu reference count managed by the core for tracking queue live/dead state. The existing per-cpu reference count for the blk_mq case is promoted to be used in all block i/o scenarios. This involves initializing it by default, waiting for it to drop to zero at exit, and holding a live reference over the invocation of q->make_request_fn() in generic_make_request(). The blk_mq code continues to take its own reference per blk_mq request and retains the ability to freeze the queue, but the check that the queue is frozen is moved to generic_make_request(). This fixes crash signatures like the following: BUG: unable to handle kernel paging request at ffff880140000000 [..] Call Trace: [] ? copy_user_handle_tail+0x5f/0x70 [] pmem_do_bvec.isra.11+0x70/0xf0 [nd_pmem] [] pmem_make_request+0xd1/0x200 [nd_pmem] [] ? mempool_alloc+0x72/0x1a0 [] generic_make_request+0xd6/0x110 [] submit_bio+0x76/0x170 [] submit_bh_wbc+0x12f/0x160 [] submit_bh+0x12/0x20 [] jbd2_write_superblock+0x8d/0x170 [] jbd2_mark_journal_empty+0x5d/0x90 [] jbd2_journal_destroy+0x24b/0x270 [] ? put_pwq_unlocked+0x2a/0x30 [] ? destroy_workqueue+0x225/0x250 [] ext4_put_super+0x64/0x360 [] generic_shutdown_super+0x6a/0xf0 Cc: Jens Axboe Cc: Keith Busch Cc: Ross Zwisler Suggested-by: Christoph Hellwig Reviewed-by: Christoph Hellwig Tested-by: Ross Zwisler Signed-off-by: Dan Williams Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - include/linux/blkdev.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5e7d43ab61c0..83cc9d4e5455 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -166,7 +166,6 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); -void blk_mq_finish_init(struct request_queue *q); int blk_mq_register_disk(struct gendisk *); void blk_mq_unregister_disk(struct gendisk *); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 60669c20190f..3e0465257d68 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -450,7 +450,7 @@ struct request_queue { #endif struct rcu_head rcu_head; wait_queue_head_t mq_freeze_wq; - struct percpu_ref mq_usage_counter; + struct percpu_ref q_usage_counter; struct list_head all_q_node; struct blk_mq_tag_set *tag_set; -- cgit v1.2.3 From ac6fc48c9fb7d3220ec4e0be0c29bb314ea75f9f Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Wed, 21 Oct 2015 13:20:18 -0400 Subject: block: move blk_integrity to request_queue A trace like the following proceeds a crash in bio_integrity_process() when it goes to use an already freed blk_integrity profile. BUG: unable to handle kernel paging request at ffff8800d31b10d8 IP: [] 0xffff8800d31b10d8 PGD 2f65067 PUD 21fffd067 PMD 80000000d30001e3 Oops: 0011 [#1] SMP Dumping ftrace buffer: --------------------------------- ndctl-2222 2.... 44526245us : disk_release: pmem1s systemd--2223 4.... 44573945us : bio_integrity_endio: pmem1s <...>-409 4.... 44574005us : bio_integrity_process: pmem1s --------------------------------- [..] Call Trace: [] ? bio_integrity_process+0x159/0x2d0 [] bio_integrity_verify_fn+0x36/0x60 [] process_one_work+0x1cc/0x4e0 Given that a request_queue is pinned while i/o is in flight and that a gendisk is allowed to have a shorter lifetime, move blk_integrity to request_queue to satisfy requests arriving after the gendisk has been torn down. Cc: Christoph Hellwig Cc: Martin K. Petersen [martin: fix the CONFIG_BLK_DEV_INTEGRITY=n case] Tested-by: Ross Zwisler Signed-off-by: Dan Williams Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +++++- include/linux/genhd.h | 1 - 2 files changed, 5 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3e0465257d68..cf57884db4b7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -369,6 +369,10 @@ struct request_queue { */ struct kobject mq_kobj; +#ifdef CONFIG_BLK_DEV_INTEGRITY + struct blk_integrity integrity; +#endif /* CONFIG_BLK_DEV_INTEGRITY */ + #ifdef CONFIG_PM struct device *dev; int rpm_status; @@ -1481,7 +1485,7 @@ extern bool blk_integrity_merge_bio(struct request_queue *, struct request *, static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) { - struct blk_integrity *bi = &disk->integrity; + struct blk_integrity *bi = &disk->queue->integrity; if (!bi->profile) return NULL; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 82f4911e0ad8..847cc1d91634 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -210,7 +210,6 @@ struct gendisk { atomic_t sync_io; /* RAID */ struct disk_events *ev; #ifdef CONFIG_BLK_DEV_INTEGRITY - struct blk_integrity integrity; struct kobject integrity_kobj; #endif /* CONFIG_BLK_DEV_INTEGRITY */ int node_id; -- cgit v1.2.3 From bbd3e064362e5057cc4799ba2e4d68c7593e490b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Oct 2015 14:10:48 +0200 Subject: block: add an API for Persistent Reservations This commits adds a driver API and ioctls for controlling Persistent Reservations s/genericly/generically/ at the block layer. Persistent Reservations are supported by SCSI and NVMe and allow controlling who gets access to a device in a shared storage setup. Note that we add a pr_ops structure to struct block_device_operations instead of adding the members directly to avoid bloating all instances of devices that will never support Persistent Reservations. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ include/linux/pr.h | 18 ++++++++++++++++++ 2 files changed, 20 insertions(+) create mode 100644 include/linux/pr.h (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 19c2e947d4d1..fe25da05e823 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -35,6 +35,7 @@ struct sg_io_hdr; struct bsg_job; struct blkcg_gq; struct blk_flush_queue; +struct pr_ops; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -1633,6 +1634,7 @@ struct block_device_operations { /* this callback is with swap_lock and sometimes page table lock held */ void (*swap_slot_free_notify) (struct block_device *, unsigned long); struct module *owner; + const struct pr_ops *pr_ops; }; extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, diff --git a/include/linux/pr.h b/include/linux/pr.h new file mode 100644 index 000000000000..65c01c10b335 --- /dev/null +++ b/include/linux/pr.h @@ -0,0 +1,18 @@ +#ifndef LINUX_PR_H +#define LINUX_PR_H + +#include + +struct pr_ops { + int (*pr_register)(struct block_device *bdev, u64 old_key, u64 new_key, + u32 flags); + int (*pr_reserve)(struct block_device *bdev, u64 key, + enum pr_type type, u32 flags); + int (*pr_release)(struct block_device *bdev, u64 key, + enum pr_type type); + int (*pr_preempt)(struct block_device *bdev, u64 old_key, u64 new_key, + enum pr_type type, bool abort); + int (*pr_clear)(struct block_device *bdev, u64 key); +}; + +#endif /* LINUX_PR_H */ -- cgit v1.2.3 From 46c6b2bc88a729366605d0dedb6a35b8cf7cc4f0 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:36 +0200 Subject: iommu: Revive device_group iommu-ops call-back That call-back is currently unused, change it into a call-back function for finding the right IOMMU group for a device. This is a first step to remove the hard-coded PCI dependency in the iommu-group code. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index f174506a5343..a1e6e8914c17 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -168,7 +168,7 @@ struct iommu_ops { phys_addr_t (*iova_to_phys)(struct iommu_domain *domain, dma_addr_t iova); int (*add_device)(struct device *dev); void (*remove_device)(struct device *dev); - int (*device_group)(struct device *dev, unsigned int *groupid); + struct iommu_group *(*device_group)(struct device *dev); int (*domain_get_attr)(struct iommu_domain *domain, enum iommu_attr attr, void *data); int (*domain_set_attr)(struct iommu_domain *domain, -- cgit v1.2.3 From 5e62292bad10cff25ff75d136c54e62b43bfb0fa Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:37 +0200 Subject: iommu: Export and rename iommu_group_get_for_pci_dev() Rename that function to pci_device_group() and export it, so that IOMMU drivers can use it as their device_group call-back. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a1e6e8914c17..a1a06639a64a 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -317,6 +317,9 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain, return domain->ops->map_sg(domain, iova, sg, nents, prot); } +/* PCI device grouping function */ +extern struct iommu_group *pci_device_group(struct device *dev); + #else /* CONFIG_IOMMU_API */ struct iommu_ops {}; -- cgit v1.2.3 From 6eab556a40384de94c2d03c8d9d632e5154367f5 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 21 Oct 2015 23:51:38 +0200 Subject: iommu: Add generic_device_group() function This function can be used as a device_group call-back and just allocates one iommu-group per device. Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a1a06639a64a..f28dff313b07 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -319,6 +319,8 @@ static inline size_t iommu_map_sg(struct iommu_domain *domain, /* PCI device grouping function */ extern struct iommu_group *pci_device_group(struct device *dev); +/* Generic device grouping function */ +extern struct iommu_group *generic_device_group(struct device *dev); #else /* CONFIG_IOMMU_API */ -- cgit v1.2.3 From 948486544713492f00ac8a9572909101ea892cb0 Mon Sep 17 00:00:00 2001 From: Scott Wood Date: Sat, 19 Sep 2015 23:29:53 -0500 Subject: powerpc/fsl: Move fsl_guts.h out of arch/powerpc Freescale's Layerscape ARM chips use the same structure. Signed-off-by: Scott Wood --- include/linux/fsl/guts.h | 192 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 192 insertions(+) create mode 100644 include/linux/fsl/guts.h (limited to 'include/linux') diff --git a/include/linux/fsl/guts.h b/include/linux/fsl/guts.h new file mode 100644 index 000000000000..84d971ff3fba --- /dev/null +++ b/include/linux/fsl/guts.h @@ -0,0 +1,192 @@ +/** + * Freecale 85xx and 86xx Global Utilties register set + * + * Authors: Jeff Brown + * Timur Tabi + * + * Copyright 2004,2007,2012 Freescale Semiconductor, Inc + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + */ + +#ifndef __FSL_GUTS_H__ +#define __FSL_GUTS_H__ + +#include + +/** + * Global Utility Registers. + * + * Not all registers defined in this structure are available on all chips, so + * you are expected to know whether a given register actually exists on your + * chip before you access it. + * + * Also, some registers are similar on different chips but have slightly + * different names. In these cases, one name is chosen to avoid extraneous + * #ifdefs. + */ +struct ccsr_guts { + __be32 porpllsr; /* 0x.0000 - POR PLL Ratio Status Register */ + __be32 porbmsr; /* 0x.0004 - POR Boot Mode Status Register */ + __be32 porimpscr; /* 0x.0008 - POR I/O Impedance Status and Control Register */ + __be32 pordevsr; /* 0x.000c - POR I/O Device Status Register */ + __be32 pordbgmsr; /* 0x.0010 - POR Debug Mode Status Register */ + __be32 pordevsr2; /* 0x.0014 - POR device status register 2 */ + u8 res018[0x20 - 0x18]; + __be32 porcir; /* 0x.0020 - POR Configuration Information Register */ + u8 res024[0x30 - 0x24]; + __be32 gpiocr; /* 0x.0030 - GPIO Control Register */ + u8 res034[0x40 - 0x34]; + __be32 gpoutdr; /* 0x.0040 - General-Purpose Output Data Register */ + u8 res044[0x50 - 0x44]; + __be32 gpindr; /* 0x.0050 - General-Purpose Input Data Register */ + u8 res054[0x60 - 0x54]; + __be32 pmuxcr; /* 0x.0060 - Alternate Function Signal Multiplex Control */ + __be32 pmuxcr2; /* 0x.0064 - Alternate function signal multiplex control 2 */ + __be32 dmuxcr; /* 0x.0068 - DMA Mux Control Register */ + u8 res06c[0x70 - 0x6c]; + __be32 devdisr; /* 0x.0070 - Device Disable Control */ +#define CCSR_GUTS_DEVDISR_TB1 0x00001000 +#define CCSR_GUTS_DEVDISR_TB0 0x00004000 + __be32 devdisr2; /* 0x.0074 - Device Disable Control 2 */ + u8 res078[0x7c - 0x78]; + __be32 pmjcr; /* 0x.007c - 4 Power Management Jog Control Register */ + __be32 powmgtcsr; /* 0x.0080 - Power Management Status and Control Register */ + __be32 pmrccr; /* 0x.0084 - Power Management Reset Counter Configuration Register */ + __be32 pmpdccr; /* 0x.0088 - Power Management Power Down Counter Configuration Register */ + __be32 pmcdr; /* 0x.008c - 4Power management clock disable register */ + __be32 mcpsumr; /* 0x.0090 - Machine Check Summary Register */ + __be32 rstrscr; /* 0x.0094 - Reset Request Status and Control Register */ + __be32 ectrstcr; /* 0x.0098 - Exception reset control register */ + __be32 autorstsr; /* 0x.009c - Automatic reset status register */ + __be32 pvr; /* 0x.00a0 - Processor Version Register */ + __be32 svr; /* 0x.00a4 - System Version Register */ + u8 res0a8[0xb0 - 0xa8]; + __be32 rstcr; /* 0x.00b0 - Reset Control Register */ + u8 res0b4[0xc0 - 0xb4]; + __be32 iovselsr; /* 0x.00c0 - I/O voltage select status register + Called 'elbcvselcr' on 86xx SOCs */ + u8 res0c4[0x100 - 0xc4]; + __be32 rcwsr[16]; /* 0x.0100 - Reset Control Word Status registers + There are 16 registers */ + u8 res140[0x224 - 0x140]; + __be32 iodelay1; /* 0x.0224 - IO delay control register 1 */ + __be32 iodelay2; /* 0x.0228 - IO delay control register 2 */ + u8 res22c[0x604 - 0x22c]; + __be32 pamubypenr; /* 0x.604 - PAMU bypass enable register */ + u8 res608[0x800 - 0x608]; + __be32 clkdvdr; /* 0x.0800 - Clock Divide Register */ + u8 res804[0x900 - 0x804]; + __be32 ircr; /* 0x.0900 - Infrared Control Register */ + u8 res904[0x908 - 0x904]; + __be32 dmacr; /* 0x.0908 - DMA Control Register */ + u8 res90c[0x914 - 0x90c]; + __be32 elbccr; /* 0x.0914 - eLBC Control Register */ + u8 res918[0xb20 - 0x918]; + __be32 ddr1clkdr; /* 0x.0b20 - DDR1 Clock Disable Register */ + __be32 ddr2clkdr; /* 0x.0b24 - DDR2 Clock Disable Register */ + __be32 ddrclkdr; /* 0x.0b28 - DDR Clock Disable Register */ + u8 resb2c[0xe00 - 0xb2c]; + __be32 clkocr; /* 0x.0e00 - Clock Out Select Register */ + u8 rese04[0xe10 - 0xe04]; + __be32 ddrdllcr; /* 0x.0e10 - DDR DLL Control Register */ + u8 rese14[0xe20 - 0xe14]; + __be32 lbcdllcr; /* 0x.0e20 - LBC DLL Control Register */ + __be32 cpfor; /* 0x.0e24 - L2 charge pump fuse override register */ + u8 rese28[0xf04 - 0xe28]; + __be32 srds1cr0; /* 0x.0f04 - SerDes1 Control Register 0 */ + __be32 srds1cr1; /* 0x.0f08 - SerDes1 Control Register 0 */ + u8 resf0c[0xf2c - 0xf0c]; + __be32 itcr; /* 0x.0f2c - Internal transaction control register */ + u8 resf30[0xf40 - 0xf30]; + __be32 srds2cr0; /* 0x.0f40 - SerDes2 Control Register 0 */ + __be32 srds2cr1; /* 0x.0f44 - SerDes2 Control Register 0 */ +} __attribute__ ((packed)); + + +/* Alternate function signal multiplex control */ +#define MPC85xx_PMUXCR_QE(x) (0x8000 >> (x)) + +#ifdef CONFIG_PPC_86xx + +#define CCSR_GUTS_DMACR_DEV_SSI 0 /* DMA controller/channel set to SSI */ +#define CCSR_GUTS_DMACR_DEV_IR 1 /* DMA controller/channel set to IR */ + +/* + * Set the DMACR register in the GUTS + * + * The DMACR register determines the source of initiated transfers for each + * channel on each DMA controller. Rather than have a bunch of repetitive + * macros for the bit patterns, we just have a function that calculates + * them. + * + * guts: Pointer to GUTS structure + * co: The DMA controller (0 or 1) + * ch: The channel on the DMA controller (0, 1, 2, or 3) + * device: The device to set as the source (CCSR_GUTS_DMACR_DEV_xx) + */ +static inline void guts_set_dmacr(struct ccsr_guts __iomem *guts, + unsigned int co, unsigned int ch, unsigned int device) +{ + unsigned int shift = 16 + (8 * (1 - co) + 2 * (3 - ch)); + + clrsetbits_be32(&guts->dmacr, 3 << shift, device << shift); +} + +#define CCSR_GUTS_PMUXCR_LDPSEL 0x00010000 +#define CCSR_GUTS_PMUXCR_SSI1_MASK 0x0000C000 /* Bitmask for SSI1 */ +#define CCSR_GUTS_PMUXCR_SSI1_LA 0x00000000 /* Latched address */ +#define CCSR_GUTS_PMUXCR_SSI1_HI 0x00004000 /* High impedance */ +#define CCSR_GUTS_PMUXCR_SSI1_SSI 0x00008000 /* Used for SSI1 */ +#define CCSR_GUTS_PMUXCR_SSI2_MASK 0x00003000 /* Bitmask for SSI2 */ +#define CCSR_GUTS_PMUXCR_SSI2_LA 0x00000000 /* Latched address */ +#define CCSR_GUTS_PMUXCR_SSI2_HI 0x00001000 /* High impedance */ +#define CCSR_GUTS_PMUXCR_SSI2_SSI 0x00002000 /* Used for SSI2 */ +#define CCSR_GUTS_PMUXCR_LA_22_25_LA 0x00000000 /* Latched Address */ +#define CCSR_GUTS_PMUXCR_LA_22_25_HI 0x00000400 /* High impedance */ +#define CCSR_GUTS_PMUXCR_DBGDRV 0x00000200 /* Signals not driven */ +#define CCSR_GUTS_PMUXCR_DMA2_0 0x00000008 +#define CCSR_GUTS_PMUXCR_DMA2_3 0x00000004 +#define CCSR_GUTS_PMUXCR_DMA1_0 0x00000002 +#define CCSR_GUTS_PMUXCR_DMA1_3 0x00000001 + +/* + * Set the DMA external control bits in the GUTS + * + * The DMA external control bits in the PMUXCR are only meaningful for + * channels 0 and 3. Any other channels are ignored. + * + * guts: Pointer to GUTS structure + * co: The DMA controller (0 or 1) + * ch: The channel on the DMA controller (0, 1, 2, or 3) + * value: the new value for the bit (0 or 1) + */ +static inline void guts_set_pmuxcr_dma(struct ccsr_guts __iomem *guts, + unsigned int co, unsigned int ch, unsigned int value) +{ + if ((ch == 0) || (ch == 3)) { + unsigned int shift = 2 * (co + 1) - (ch & 1) - 1; + + clrsetbits_be32(&guts->pmuxcr, 1 << shift, value << shift); + } +} + +#define CCSR_GUTS_CLKDVDR_PXCKEN 0x80000000 +#define CCSR_GUTS_CLKDVDR_SSICKEN 0x20000000 +#define CCSR_GUTS_CLKDVDR_PXCKINV 0x10000000 +#define CCSR_GUTS_CLKDVDR_PXCKDLY_SHIFT 25 +#define CCSR_GUTS_CLKDVDR_PXCKDLY_MASK 0x06000000 +#define CCSR_GUTS_CLKDVDR_PXCKDLY(x) \ + (((x) & 3) << CCSR_GUTS_CLKDVDR_PXCKDLY_SHIFT) +#define CCSR_GUTS_CLKDVDR_PXCLK_SHIFT 16 +#define CCSR_GUTS_CLKDVDR_PXCLK_MASK 0x001F0000 +#define CCSR_GUTS_CLKDVDR_PXCLK(x) (((x) & 31) << CCSR_GUTS_CLKDVDR_PXCLK_SHIFT) +#define CCSR_GUTS_CLKDVDR_SSICLK_MASK 0x000000FF +#define CCSR_GUTS_CLKDVDR_SSICLK(x) ((x) & CCSR_GUTS_CLKDVDR_SSICLK_MASK) + +#endif + +#endif -- cgit v1.2.3 From 3ecb3e09b042e70799ff3a1ff464a5ecaa7547d9 Mon Sep 17 00:00:00 2001 From: "Ivan T. Ivanov" Date: Mon, 7 Sep 2015 14:45:25 +0300 Subject: usb: chipidea: Use extcon framework for VBUS and ID detect On recent Qualcomm platforms VBUS and ID lines are not routed to USB PHY LINK controller. Use extcon framework to receive connect and disconnect ID and VBUS notification. Signed-off-by: Ivan T. Ivanov Signed-off-by: Peter Chen --- include/linux/usb/chipidea.h | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index a41833cd184c..c5cddc6901d0 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -5,9 +5,28 @@ #ifndef __LINUX_USB_CHIPIDEA_H #define __LINUX_USB_CHIPIDEA_H +#include #include struct ci_hdrc; + +/** + * struct ci_hdrc_cable - structure for external connector cable state tracking + * @state: current state of the line + * @changed: set to true when extcon event happen + * @edev: device which generate events + * @ci: driver state of the chipidea device + * @nb: hold event notification callback + * @conn: used for notification registration + */ +struct ci_hdrc_cable { + bool state; + bool changed; + struct extcon_dev *edev; + struct ci_hdrc *ci; + struct notifier_block nb; +}; + struct ci_hdrc_platform_data { const char *name; /* offset of the capability registers */ @@ -48,6 +67,10 @@ struct ci_hdrc_platform_data { u32 ahb_burst_config; u32 tx_burst_size; u32 rx_burst_size; + + /* VBUS and ID signal state tracking, using extcon framework */ + struct ci_hdrc_cable vbus_extcon; + struct ci_hdrc_cable id_extcon; }; /* Default offset of capability registers */ -- cgit v1.2.3 From 1fbf46280eb6866c762de5ec8ba35f09097b0d53 Mon Sep 17 00:00:00 2001 From: Fabio Estevam Date: Tue, 8 Sep 2015 22:18:14 -0300 Subject: usb: chipidea: Add support for 'phy-clkgate-delay-us' property Add support for the optional 'phy-clkgate-delay-us' property that is used to describe the delay time between putting PHY into low power mode and turning off the PHY clock. Signed-off-by: Li Jun Signed-off-by: Fabio Estevam Signed-off-by: Peter Chen --- include/linux/usb/chipidea.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/usb/chipidea.h b/include/linux/usb/chipidea.h index c5cddc6901d0..5dd75fa47dd8 100644 --- a/include/linux/usb/chipidea.h +++ b/include/linux/usb/chipidea.h @@ -71,6 +71,7 @@ struct ci_hdrc_platform_data { /* VBUS and ID signal state tracking, using extcon framework */ struct ci_hdrc_cable vbus_extcon; struct ci_hdrc_cable id_extcon; + u32 phy_clkgate_delay_us; }; /* Default offset of capability registers */ -- cgit v1.2.3 From e2aacd963a06fc558a809ecb62f5833e6c340b28 Mon Sep 17 00:00:00 2001 From: Vivien Didelot Date: Tue, 20 Oct 2015 10:08:59 -0400 Subject: net: mdio-gpio: move platform data header This header file only contains the platform data structure definition, so move it to the include/linux/platform_data/ directory. Signed-off-by: Vivien Didelot Signed-off-by: David S. Miller --- include/linux/mdio-gpio.h | 33 --------------------------------- include/linux/platform_data/mdio-gpio.h | 33 +++++++++++++++++++++++++++++++++ 2 files changed, 33 insertions(+), 33 deletions(-) delete mode 100644 include/linux/mdio-gpio.h create mode 100644 include/linux/platform_data/mdio-gpio.h (limited to 'include/linux') diff --git a/include/linux/mdio-gpio.h b/include/linux/mdio-gpio.h deleted file mode 100644 index 11f00cdabe3d..000000000000 --- a/include/linux/mdio-gpio.h +++ /dev/null @@ -1,33 +0,0 @@ -/* - * MDIO-GPIO bus platform data structures - * - * Copyright (C) 2008, Paulius Zaleckas - * - * This file is licensed under the terms of the GNU General Public License - * version 2. This program is licensed "as is" without any warranty of any - * kind, whether express or implied. - */ - -#ifndef __LINUX_MDIO_GPIO_H -#define __LINUX_MDIO_GPIO_H - -#include - -struct mdio_gpio_platform_data { - /* GPIO numbers for bus pins */ - unsigned int mdc; - unsigned int mdio; - unsigned int mdo; - - bool mdc_active_low; - bool mdio_active_low; - bool mdo_active_low; - - u32 phy_mask; - u32 phy_ignore_ta_mask; - int irqs[PHY_MAX_ADDR]; - /* reset callback */ - int (*reset)(struct mii_bus *bus); -}; - -#endif /* __LINUX_MDIO_GPIO_H */ diff --git a/include/linux/platform_data/mdio-gpio.h b/include/linux/platform_data/mdio-gpio.h new file mode 100644 index 000000000000..11f00cdabe3d --- /dev/null +++ b/include/linux/platform_data/mdio-gpio.h @@ -0,0 +1,33 @@ +/* + * MDIO-GPIO bus platform data structures + * + * Copyright (C) 2008, Paulius Zaleckas + * + * This file is licensed under the terms of the GNU General Public License + * version 2. This program is licensed "as is" without any warranty of any + * kind, whether express or implied. + */ + +#ifndef __LINUX_MDIO_GPIO_H +#define __LINUX_MDIO_GPIO_H + +#include + +struct mdio_gpio_platform_data { + /* GPIO numbers for bus pins */ + unsigned int mdc; + unsigned int mdio; + unsigned int mdo; + + bool mdc_active_low; + bool mdio_active_low; + bool mdo_active_low; + + u32 phy_mask; + u32 phy_ignore_ta_mask; + int irqs[PHY_MAX_ADDR]; + /* reset callback */ + int (*reset)(struct mii_bus *bus); +}; + +#endif /* __LINUX_MDIO_GPIO_H */ -- cgit v1.2.3 From 9a8928359736ab170303ee8a2cc15db54e3a4a8f Mon Sep 17 00:00:00 2001 From: Maor Gottlieb Date: Thu, 15 Oct 2015 14:44:38 +0300 Subject: net/mlx4_core: Add support for filtering multicast loopback Update device capabilities regarding HW filtering multicast loopback support. Add MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB attribute to mlx4_update_qp to enable changing QP context to support filtering incoming multicast loopback traffic according the sender's counter index. Set the corresponding bits in QP context to force the loopback source checks if attribute is given and HW supports it. Signed-off-by: Maor Gottlieb Signed-off-by: Eran Ben Elisha Signed-off-by: Doug Ledford --- include/linux/mlx4/device.h | 2 ++ include/linux/mlx4/qp.h | 24 +++++++++++++++++++----- 2 files changed, 21 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index baad4cb8e9b0..dac6872dbaea 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -214,6 +214,8 @@ enum { MLX4_DEV_CAP_FLAG2_IGNORE_FCS = 1LL << 28, MLX4_DEV_CAP_FLAG2_PHV_EN = 1LL << 29, MLX4_DEV_CAP_FLAG2_SKIP_OUTER_VLAN = 1LL << 30, + MLX4_DEV_CAP_FLAG2_UPDATE_QP_SRC_CHECK_LB = 1ULL << 31, + MLX4_DEV_CAP_FLAG2_LB_SRC_CHK = 1ULL << 32, }; enum { diff --git a/include/linux/mlx4/qp.h b/include/linux/mlx4/qp.h index de45a51b3f04..fe052e234906 100644 --- a/include/linux/mlx4/qp.h +++ b/include/linux/mlx4/qp.h @@ -135,7 +135,10 @@ struct mlx4_rss_context { struct mlx4_qp_path { u8 fl; - u8 vlan_control; + union { + u8 vlan_control; + u8 control; + }; u8 disable_pkey_check; u8 pkey_index; u8 counter_index; @@ -156,9 +159,16 @@ struct mlx4_qp_path { }; enum { /* fl */ - MLX4_FL_CV = 1 << 6, - MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2 + MLX4_FL_CV = 1 << 6, + MLX4_FL_ETH_HIDE_CQE_VLAN = 1 << 2, + MLX4_FL_ETH_SRC_CHECK_MC_LB = 1 << 1, + MLX4_FL_ETH_SRC_CHECK_UC_LB = 1 << 0, }; + +enum { /* control */ + MLX4_CTRL_ETH_SRC_CHECK_IF_COUNTER = 1 << 7, +}; + enum { /* vlan_control */ MLX4_VLAN_CTRL_ETH_TX_BLOCK_TAGGED = 1 << 6, MLX4_VLAN_CTRL_ETH_TX_BLOCK_PRIO_TAGGED = 1 << 5, /* 802.1p priority tag */ @@ -254,6 +264,8 @@ enum { MLX4_UPD_QP_PATH_MASK_SCHED_QUEUE = 14 + 32, MLX4_UPD_QP_PATH_MASK_IF_COUNTER_INDEX = 15 + 32, MLX4_UPD_QP_PATH_MASK_FVL_RX = 16 + 32, + MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_UC_LB = 18 + 32, + MLX4_UPD_QP_PATH_MASK_ETH_SRC_CHECK_MC_LB = 19 + 32, }; enum { /* param3 */ @@ -436,11 +448,13 @@ enum mlx4_update_qp_attr { MLX4_UPDATE_QP_VSD = 1 << 1, MLX4_UPDATE_QP_RATE_LIMIT = 1 << 2, MLX4_UPDATE_QP_QOS_VPORT = 1 << 3, - MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 4) - 1 + MLX4_UPDATE_QP_ETH_SRC_CHECK_MC_LB = 1 << 4, + MLX4_UPDATE_QP_SUPPORTED_ATTRS = (1 << 5) - 1 }; enum mlx4_update_qp_params_flags { - MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 0, + MLX4_UPDATE_QP_PARAMS_FLAGS_ETH_CHECK_MC_LB = 1 << 0, + MLX4_UPDATE_QP_PARAMS_FLAGS_VSD_ENABLE = 1 << 1, }; struct mlx4_update_qp_params { -- cgit v1.2.3 From dbf650b67bb4db1b95807d2aafe2d7cfafd458da Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Tue, 20 Oct 2015 13:17:40 -0700 Subject: tcp: fastopen: limit max_qlen Allowing an application to set whatever limit for the list of recently RST fastopen sessions [1] is not wise, as it open ways to deplete kernel memory. Cap the user provided limit by somaxconn sysctl, like listen() backlog. [1] https://tools.ietf.org/html/rfc7413#section-5.1 Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/tcp.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index 5dce9705fe84..c906f4534581 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -392,8 +392,9 @@ static inline bool tcp_passive_fastopen(const struct sock *sk) static inline void fastopen_queue_tune(struct sock *sk, int backlog) { struct request_sock_queue *queue = &inet_csk(sk)->icsk_accept_queue; + int somaxconn = READ_ONCE(sock_net(sk)->core.sysctl_somaxconn); - queue->fastopenq.max_qlen = backlog; + queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn); } static inline void tcp_saved_syn_free(struct tcp_sock *tp) -- cgit v1.2.3 From ee1d267423a1f8041e2b1a33fc23e4393c67677e Mon Sep 17 00:00:00 2001 From: Geliang Tang Date: Tue, 20 Oct 2015 00:39:03 -0700 Subject: pstore: add pstore unregister pstore doesn't support unregistering yet. It was marked as TODO. This patch adds some code to fix it: 1) Add functions to unregister kmsg/console/ftrace/pmsg. 2) Add a function to free compression buffer. 3) Unmap the memory and free it. 4) Add a function to unregister pstore filesystem. Signed-off-by: Geliang Tang Acked-by: Kees Cook [Removed __exit annotation from ramoops_remove(). Reported by Arnd Bergmann] Signed-off-by: Tony Luck --- include/linux/pstore.h | 14 +------------- 1 file changed, 1 insertion(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pstore.h b/include/linux/pstore.h index 8e7a25b068b0..831479f8df8f 100644 --- a/include/linux/pstore.h +++ b/include/linux/pstore.h @@ -75,20 +75,8 @@ struct pstore_info { #define PSTORE_FLAGS_FRAGILE 1 -#ifdef CONFIG_PSTORE extern int pstore_register(struct pstore_info *); +extern void pstore_unregister(struct pstore_info *); extern bool pstore_cannot_block_path(enum kmsg_dump_reason reason); -#else -static inline int -pstore_register(struct pstore_info *psi) -{ - return -ENODEV; -} -static inline bool -pstore_cannot_block_path(enum kmsg_dump_reason reason) -{ - return false; -} -#endif #endif /*_LINUX_PSTORE_H*/ -- cgit v1.2.3 From e55c34a66f87e78fb1fc6b623b78c5ad74b475af Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 22 Oct 2015 13:38:13 -0400 Subject: locks: introduce locks_lock_inode_wait() Users of the locks API commonly call either posix_lock_file_wait() or flock_lock_file_wait() depending upon the lock type. Add a new function locks_lock_inode_wait() which will check and call the correct function for the type of lock passed in. Signed-off-by: Benjamin Coddington Signed-off-by: Jeff Layton --- include/linux/fs.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 72d8a844c692..b064d4c0b233 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1059,6 +1059,7 @@ extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); +extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); extern int generic_setlease(struct file *, long, struct file_lock **, void **priv); @@ -1177,6 +1178,11 @@ static inline int flock_lock_inode_wait(struct inode *inode, return -ENOLCK; } +static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) +{ + return -ENOLCK; +} + static inline int __break_lease(struct inode *inode, unsigned int mode, unsigned int type) { return 0; @@ -1225,6 +1231,11 @@ static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl) return flock_lock_inode_wait(file_inode(filp), fl); } +static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) +{ + return locks_lock_inode_wait(file_inode(filp), fl); +} + struct fasync_struct { spinlock_t fa_lock; int magic; -- cgit v1.2.3 From 616fb38fa7a9599293e05ae1fa9acfaf73922434 Mon Sep 17 00:00:00 2001 From: Benjamin Coddington Date: Thu, 22 Oct 2015 13:38:15 -0400 Subject: locks: cleanup posix_lock_inode_wait and flock_lock_inode_wait All callers use locks_lock_inode_wait() instead. Signed-off-by: Benjamin Coddington Signed-off-by: Jeff Layton --- include/linux/fs.h | 24 ------------------------ 1 file changed, 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index b064d4c0b233..49749688156d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1053,12 +1053,10 @@ extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); -extern int posix_lock_inode_wait(struct inode *, struct file_lock *); extern int posix_unblock_lock(struct file_lock *); extern int vfs_test_lock(struct file *, struct file_lock *); extern int vfs_lock_file(struct file *, unsigned int, struct file_lock *, struct file_lock *); extern int vfs_cancel_lock(struct file *filp, struct file_lock *fl); -extern int flock_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl); extern int __break_lease(struct inode *inode, unsigned int flags, unsigned int type); extern void lease_get_mtime(struct inode *, struct timespec *time); @@ -1145,12 +1143,6 @@ static inline int posix_lock_file(struct file *filp, struct file_lock *fl, return -ENOLCK; } -static inline int posix_lock_inode_wait(struct inode *inode, - struct file_lock *fl) -{ - return -ENOLCK; -} - static inline int posix_unblock_lock(struct file_lock *waiter) { return -ENOENT; @@ -1172,12 +1164,6 @@ static inline int vfs_cancel_lock(struct file *filp, struct file_lock *fl) return 0; } -static inline int flock_lock_inode_wait(struct inode *inode, - struct file_lock *request) -{ - return -ENOLCK; -} - static inline int locks_lock_inode_wait(struct inode *inode, struct file_lock *fl) { return -ENOLCK; @@ -1221,16 +1207,6 @@ static inline struct inode *file_inode(const struct file *f) return f->f_inode; } -static inline int posix_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return posix_lock_inode_wait(file_inode(filp), fl); -} - -static inline int flock_lock_file_wait(struct file *filp, struct file_lock *fl) -{ - return flock_lock_inode_wait(file_inode(filp), fl); -} - static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { return locks_lock_inode_wait(file_inode(filp), fl); -- cgit v1.2.3 From 3217f7c25bca66eed9b07f0b8bfd1937169b0736 Mon Sep 17 00:00:00 2001 From: Christoffer Dall Date: Thu, 27 Aug 2015 16:41:15 +0200 Subject: KVM: Add kvm_arch_vcpu_{un}blocking callbacks Some times it is useful for architecture implementations of KVM to know when the VCPU thread is about to block or when it comes back from blocking (arm/arm64 needs to know this to properly implement timers, for example). Therefore provide a generic architecture callback function in line with what we do elsewhere for KVM generic-arch interactions. Reviewed-by: Marc Zyngier Signed-off-by: Christoffer Dall --- include/linux/kvm_host.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 1bef9e21e725..4a86f5f072c0 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -625,6 +625,8 @@ int kvm_vcpu_write_guest(struct kvm_vcpu *vcpu, gpa_t gpa, const void *data, void kvm_vcpu_mark_page_dirty(struct kvm_vcpu *vcpu, gfn_t gfn); void kvm_vcpu_block(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_blocking(struct kvm_vcpu *vcpu); +void kvm_arch_vcpu_unblocking(struct kvm_vcpu *vcpu); void kvm_vcpu_kick(struct kvm_vcpu *vcpu); int kvm_vcpu_yield_to(struct kvm_vcpu *target); void kvm_vcpu_on_spin(struct kvm_vcpu *vcpu); -- cgit v1.2.3 From fc4099f17240767554ff3a73977acb78ef615404 Mon Sep 17 00:00:00 2001 From: Pravin B Shelar Date: Thu, 22 Oct 2015 18:17:16 -0700 Subject: openvswitch: Fix egress tunnel info. While transitioning to netdev based vport we broke OVS feature which allows user to retrieve tunnel packet egress information for lwtunnel devices. Following patch fixes it by introducing ndo operation to get the tunnel egress info. Same ndo operation can be used for lwtunnel devices and compat ovs-tnl-vport devices. So after adding such device operation we can remove similar operation from ovs-vport. Fixes: 614732eaa12d ("openvswitch: Use regular VXLAN net_device device"). Signed-off-by: Pravin B Shelar Signed-off-by: David S. Miller --- include/linux/netdevice.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d15e3831440..210d11a75e4f 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1054,6 +1054,10 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * This function is used to pass protocol port error state information * to the switch driver. The switch driver can react to the proto_down * by doing a phys down on the associated switch port. + * int (*ndo_fill_metadata_dst)(struct net_device *dev, struct sk_buff *skb); + * This function is used to get egress tunnel information for given skb. + * This is useful for retrieving outer tunnel header parameters while + * sampling packet. * */ struct net_device_ops { @@ -1227,6 +1231,8 @@ struct net_device_ops { int (*ndo_get_iflink)(const struct net_device *dev); int (*ndo_change_proto_down)(struct net_device *dev, bool proto_down); + int (*ndo_fill_metadata_dst)(struct net_device *dev, + struct sk_buff *skb); }; /** @@ -2203,6 +2209,7 @@ void dev_add_offload(struct packet_offload *po); void dev_remove_offload(struct packet_offload *po); int dev_get_iflink(const struct net_device *dev); +int dev_fill_metadata_dst(struct net_device *dev, struct sk_buff *skb); struct net_device *__dev_get_by_flags(struct net *net, unsigned short flags, unsigned short mask); struct net_device *dev_get_by_name(struct net *net, const char *name); -- cgit v1.2.3 From c56d4450eb6886225a5a0bb231ad2cea9f03284a Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Sun, 18 Oct 2015 19:55:04 +0530 Subject: PCI: Turn off Request Attributes to avoid Chelsio T5 Completion erratum The Chelsio T5 has a PCIe compliance erratum that causes Malformed TLP or Unexpected Completion errors in some systems, which may cause device access timeouts. Per PCIe r3.0, sec 2.2.9, "Completion headers must supply the same values for the Attribute as were supplied in the header of the corresponding Request, except as explicitly allowed when IDO is used." Instead of copying the Attributes from the Request to the Completion, the T5 always generates Completions with zero Attributes. The receiver of a Completion whose Attributes don't match the Request may accept it (which itself seems non-compliant based on sec 2.3.2), or it may handle it as a Malformed TLP or an Unexpected Completion, which will probably lead to a device access timeout. Work around this by disabling "Relaxed Ordering" and "No Snoop" in the Root Port so it always generate Requests with zero Attributes. This does affect all other devices which are downstream of that Root Port, but these are performance optimizations that should not make a functional difference. Note that Configuration Space accesses are never supposed to have TLP Attributes, so we're safe waiting till after any Configuration Space accesses to do the Root Port "fixup". Based on original work by Casey Leedom [bhelgaas: changelog, comments, rename to pci_find_pcie_root_port(), rework to use pci_upstream_bridge() and check for Root Port device type, edit diagnostics to clarify intent and devices affected] Signed-off-by: Hariprasad Shenai Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index b54fbf1571b1..e828e7b4afec 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -820,6 +820,7 @@ void pci_bus_add_device(struct pci_dev *dev); void pci_read_bridge_bases(struct pci_bus *child); struct resource *pci_find_parent_resource(const struct pci_dev *dev, struct resource *res); +struct pci_dev *pci_find_pcie_root_port(struct pci_dev *dev); u8 pci_swizzle_interrupt_pin(const struct pci_dev *dev, u8 pin); int pci_get_interrupt_pin(struct pci_dev *dev, struct pci_dev **bridge); u8 pci_common_swizzle(struct pci_dev *dev, u8 *pinp); -- cgit v1.2.3 From 67a2e213e7e937c41c52ab5bc46bf3f4de469f6e Mon Sep 17 00:00:00 2001 From: Rohit Vaswani Date: Thu, 22 Oct 2015 13:32:11 -0700 Subject: mm: cma: fix incorrect type conversion for size during dma allocation This was found during userspace fuzzing test when a large size dma cma allocation is made by driver(like ion) through userspace. show_stack+0x10/0x1c dump_stack+0x74/0xc8 kasan_report_error+0x2b0/0x408 kasan_report+0x34/0x40 __asan_storeN+0x15c/0x168 memset+0x20/0x44 __dma_alloc_coherent+0x114/0x18c Signed-off-by: Rohit Vaswani Acked-by: Greg Kroah-Hartman Cc: Marek Szyprowski Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cma.h | 2 +- include/linux/dma-contiguous.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cma.h b/include/linux/cma.h index f7ef093ec49a..29f9e774ab76 100644 --- a/include/linux/cma.h +++ b/include/linux/cma.h @@ -26,6 +26,6 @@ extern int __init cma_declare_contiguous(phys_addr_t base, extern int cma_init_reserved_mem(phys_addr_t base, phys_addr_t size, unsigned int order_per_bit, struct cma **res_cma); -extern struct page *cma_alloc(struct cma *cma, unsigned int count, unsigned int align); +extern struct page *cma_alloc(struct cma *cma, size_t count, unsigned int align); extern bool cma_release(struct cma *cma, const struct page *pages, unsigned int count); #endif diff --git a/include/linux/dma-contiguous.h b/include/linux/dma-contiguous.h index 569bbd039896..fec734df1524 100644 --- a/include/linux/dma-contiguous.h +++ b/include/linux/dma-contiguous.h @@ -111,7 +111,7 @@ static inline int dma_declare_contiguous(struct device *dev, phys_addr_t size, return ret; } -struct page *dma_alloc_from_contiguous(struct device *dev, int count, +struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, unsigned int order); bool dma_release_from_contiguous(struct device *dev, struct page *pages, int count); @@ -144,7 +144,7 @@ int dma_declare_contiguous(struct device *dev, phys_addr_t size, } static inline -struct page *dma_alloc_from_contiguous(struct device *dev, int count, +struct page *dma_alloc_from_contiguous(struct device *dev, size_t count, unsigned int order) { return NULL; -- cgit v1.2.3 From 79907146fb5b1778035870db895fb2bf64061284 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Fri, 16 Oct 2015 11:32:42 +0200 Subject: overflow-arith: begin to add support for overflow builtin functions The idea of the overflow-arith.h header is to collect overflow checking functions in one central place. If gcc compiler supports the __builtin_overflow_* builtins we use them because they might give better performance, otherwise the code falls back to normal overflow checking functions. The builtin_overflow functions are supported by gcc-5 and clang. The matter of supporting clang is to just provide a corresponding CC_HAVE_BUILTIN_OVERFLOW, because the specific overflow checking builtins don't differ between gcc and clang. I just provide overflow_usub function here as I intend this to get merged into net, more functions will definitely follow as they are needed. Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/compiler-gcc.h | 4 ++++ include/linux/overflow-arith.h | 18 ++++++++++++++++++ 2 files changed, 22 insertions(+) create mode 100644 include/linux/overflow-arith.h (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index dfaa7b3e9ae9..82c159e0532a 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -237,6 +237,10 @@ #define KASAN_ABI_VERSION 3 #endif +#if GCC_VERSION >= 50000 +#define CC_HAVE_BUILTIN_OVERFLOW +#endif + #endif /* gcc version >= 40000 specific checks */ #if !defined(__noclone) diff --git a/include/linux/overflow-arith.h b/include/linux/overflow-arith.h new file mode 100644 index 000000000000..e12ccf854a70 --- /dev/null +++ b/include/linux/overflow-arith.h @@ -0,0 +1,18 @@ +#pragma once + +#include + +#ifdef CC_HAVE_BUILTIN_OVERFLOW + +#define overflow_usub __builtin_usub_overflow + +#else + +static inline bool overflow_usub(unsigned int a, unsigned int b, + unsigned int *res) +{ + *res = a - b; + return *res > a ? true : false; +} + +#endif -- cgit v1.2.3 From dd461d6aa894761fe67c30ddf81eec0d08be216b Mon Sep 17 00:00:00 2001 From: Hiroshi Shimamoto Date: Fri, 28 Aug 2015 06:57:55 +0000 Subject: if_link: Add control trust VF Add netlink directives and ndo entry to trust VF user. This controls the special permission of VF user. The administrator will dedicatedly trust VF user to use some features which impacts security and/or performance. The administrator never turn it on unless VF user is fully trusted. CC: Sy Jong Choi Signed-off-by: Hiroshi Shimamoto Acked-by: Greg Rose Tested-by: Krishneil Singh Signed-off-by: Jeff Kirsher --- include/linux/if_link.h | 1 + include/linux/netdevice.h | 3 +++ 2 files changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/if_link.h b/include/linux/if_link.h index ae5d0d22955d..f923d15b432c 100644 --- a/include/linux/if_link.h +++ b/include/linux/if_link.h @@ -24,5 +24,6 @@ struct ifla_vf_info { __u32 min_tx_rate; __u32 max_tx_rate; __u32 rss_query_en; + __u32 trusted; }; #endif /* _LINUX_IF_LINK_H */ diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 69fdd427c8cb..773383859bd9 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -881,6 +881,7 @@ typedef u16 (*select_queue_fallback_t)(struct net_device *dev, * int (*ndo_set_vf_rate)(struct net_device *dev, int vf, int min_tx_rate, * int max_tx_rate); * int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); + * int (*ndo_set_vf_trust)(struct net_device *dev, int vf, bool setting); * int (*ndo_get_vf_config)(struct net_device *dev, * int vf, struct ifla_vf_info *ivf); * int (*ndo_set_vf_link_state)(struct net_device *dev, int vf, int link_state); @@ -1109,6 +1110,8 @@ struct net_device_ops { int max_tx_rate); int (*ndo_set_vf_spoofchk)(struct net_device *dev, int vf, bool setting); + int (*ndo_set_vf_trust)(struct net_device *dev, + int vf, bool setting); int (*ndo_get_vf_config)(struct net_device *dev, int vf, struct ifla_vf_info *ivf); -- cgit v1.2.3 From 21dd19fed3c3eb42a3877600f4a97a774323e562 Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Thu, 22 Oct 2015 10:37:49 +0200 Subject: net: phy: Add nested variants of mdiobus read/write Since nested variants of mdiobus_read/write are used in multiple drivers, add nested variants in the mdiobus core. Suggested-by: Andrew Lunn Signed-off-by: Neil Armstrong Signed-off-by: David S. Miller --- include/linux/phy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy.h b/include/linux/phy.h index 4c477e6ece33..05fde31b6dc6 100644 --- a/include/linux/phy.h +++ b/include/linux/phy.h @@ -213,7 +213,9 @@ static inline struct mii_bus *devm_mdiobus_alloc(struct device *dev) void devm_mdiobus_free(struct device *dev, struct mii_bus *bus); struct phy_device *mdiobus_scan(struct mii_bus *bus, int addr); int mdiobus_read(struct mii_bus *bus, int addr, u32 regnum); +int mdiobus_read_nested(struct mii_bus *bus, int addr, u32 regnum); int mdiobus_write(struct mii_bus *bus, int addr, u32 regnum, u16 val); +int mdiobus_write_nested(struct mii_bus *bus, int addr, u32 regnum, u16 val); #define PHY_INTERRUPT_DISABLED 0x0 -- cgit v1.2.3 From 62cedb9f135794ec26a93ae29e5f0231ab263c84 Mon Sep 17 00:00:00 2001 From: David Vrabel Date: Thu, 25 Jun 2015 16:35:49 +0100 Subject: mm: memory hotplug with an existing resource Add add_memory_resource() to add memory using an existing "System RAM" resource. This is useful if the memory region is being located by finding a free resource slot with allocate_resource(). Xen guests will make use of this in their balloon driver to hotplug arbitrary amounts of memory in response to toolstack requests. Signed-off-by: David Vrabel Reviewed-by: Daniel Kiper Reviewed-by: Tang Chen --- include/linux/memory_hotplug.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index 8f60e899b33c..2ea574ff9714 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -11,6 +11,7 @@ struct zone; struct pglist_data; struct mem_section; struct memory_block; +struct resource; #ifdef CONFIG_MEMORY_HOTPLUG @@ -266,6 +267,7 @@ static inline void remove_memory(int nid, u64 start, u64 size) {} extern int walk_memory_range(unsigned long start_pfn, unsigned long end_pfn, void *arg, int (*func)(struct memory_block *, void *)); extern int add_memory(int nid, u64 start, u64 size); +extern int add_memory_resource(int nid, struct resource *resource); extern int zone_for_memory(int nid, u64 start, u64 size, int zone_default, bool for_device); extern int arch_add_memory(int nid, u64 start, u64 size, bool for_device); -- cgit v1.2.3 From a1fdeaa71c95e5c6eba40245f84f762202dc69bb Mon Sep 17 00:00:00 2001 From: Javier Martinez Canillas Date: Thu, 22 Oct 2015 18:59:22 +0200 Subject: spi: fix kernel-doc warnings about missing return desc in spi.h When building docs with make htmldocs, warnings about not having a description for the return value are reported, i.e: warning: No description found for return value of 'spi_write' Fix these by following the kernel-doc conventions explained in Documentation/kernel-doc-nano-HOWTO.txt. Signed-off-by: Javier Martinez Canillas Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index e1f21778cb54..635bff60eda5 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -847,8 +847,10 @@ extern int spi_bus_unlock(struct spi_master *master); * @len: data buffer size * Context: can sleep * - * This writes the buffer and returns zero or a negative error code. + * This function writes the buffer @buf. * Callable only from contexts that can sleep. + * + * Return: zero on success, else a negative error code. */ static inline int spi_write(struct spi_device *spi, const void *buf, size_t len) @@ -871,8 +873,10 @@ spi_write(struct spi_device *spi, const void *buf, size_t len) * @len: data buffer size * Context: can sleep * - * This reads the buffer and returns zero or a negative error code. + * This function reads the buffer @buf. * Callable only from contexts that can sleep. + * + * Return: zero on success, else a negative error code. */ static inline int spi_read(struct spi_device *spi, void *buf, size_t len) @@ -899,7 +903,7 @@ spi_read(struct spi_device *spi, void *buf, size_t len) * * For more specific semantics see spi_sync(). * - * It returns zero on success, else a negative error code. + * Return: Return: zero on success, else a negative error code. */ static inline int spi_sync_transfer(struct spi_device *spi, struct spi_transfer *xfers, @@ -923,9 +927,10 @@ extern int spi_write_then_read(struct spi_device *spi, * @cmd: command to be written before data is read back * Context: can sleep * - * This returns the (unsigned) eight bit number returned by the - * device, or else a negative error code. Callable only from - * contexts that can sleep. + * Callable only from contexts that can sleep. + * + * Return: the (unsigned) eight bit number returned by the + * device, or else a negative error code. */ static inline ssize_t spi_w8r8(struct spi_device *spi, u8 cmd) { @@ -944,12 +949,13 @@ static inline ssize_t spi_w8r8(struct spi_device *spi, u8 cmd) * @cmd: command to be written before data is read back * Context: can sleep * - * This returns the (unsigned) sixteen bit number returned by the - * device, or else a negative error code. Callable only from - * contexts that can sleep. - * * The number is returned in wire-order, which is at least sometimes * big-endian. + * + * Callable only from contexts that can sleep. + * + * Return: the (unsigned) sixteen bit number returned by the + * device, or else a negative error code. */ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd) { @@ -968,13 +974,13 @@ static inline ssize_t spi_w8r16(struct spi_device *spi, u8 cmd) * @cmd: command to be written before data is read back * Context: can sleep * - * This returns the (unsigned) sixteen bit number returned by the device in cpu - * endianness, or else a negative error code. Callable only from contexts that - * can sleep. - * * This function is similar to spi_w8r16, with the exception that it will * convert the read 16 bit data word from big-endian to native endianness. * + * Callable only from contexts that can sleep. + * + * Return: the (unsigned) sixteen bit number returned by the device in cpu + * endianness, or else a negative error code. */ static inline ssize_t spi_w8r16be(struct spi_device *spi, u8 cmd) -- cgit v1.2.3 From 62a615e083604d291af0cb18f9b4549531ea4f94 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 23 Oct 2015 12:16:41 +0300 Subject: mfd: core: redo ACPI matching of the children devices There is at least one board on the market, i.e. Intel Galileo Gen2, that uses _ADR to distinguish the devices under one actual device. Due to this we have to improve the quirk in the MFD core to handle that board. Acked-by: Rafael J. Wysocki Acked-by: Lee Jones Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/mfd/core.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index a76bc100bf97..27dac3ff18b9 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -18,6 +18,12 @@ struct irq_domain; +/* Matches ACPI PNP id, either _HID or _CID, or ACPI _ADR */ +struct mfd_cell_acpi_match { + const char *pnpid; + const unsigned long long adr; +}; + /* * This struct describes the MFD part ("cell"). * After registration the copy of this structure will become the platform data @@ -44,8 +50,8 @@ struct mfd_cell { */ const char *of_compatible; - /* Matches ACPI PNP id, either _HID or _CID */ - const char *acpi_pnpid; + /* Matches ACPI */ + const struct mfd_cell_acpi_match *acpi_match; /* * These resources can be specified relative to the parent device. -- cgit v1.2.3 From 0d0f4aab4e4d290138a4ae7f2ef8469e48c9a669 Mon Sep 17 00:00:00 2001 From: Andrey Ryabinin Date: Wed, 7 Oct 2015 14:39:55 +0300 Subject: lockd: get rid of reference-counted NSM RPC clients Currently we have reference-counted per-net NSM RPC client which created on the first monitor request and destroyed after the last unmonitor request. It's needed because RPC client need to know 'utsname()->nodename', but utsname() might be NULL when nsm_unmonitor() called. So instead of holding the rpc client we could just save nodename in struct nlm_host and pass it to the rpc_create(). Thus ther is no need in keeping rpc client until last unmonitor request. We could create separate RPC clients for each monitor/unmonitor requests. Signed-off-by: Andrey Ryabinin Signed-off-by: J. Bruce Fields --- include/linux/lockd/lockd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lockd/lockd.h b/include/linux/lockd/lockd.h index fd3b65bf51b5..c15373894a42 100644 --- a/include/linux/lockd/lockd.h +++ b/include/linux/lockd/lockd.h @@ -68,6 +68,7 @@ struct nlm_host { struct nsm_handle *h_nsmhandle; /* NSM status handle */ char *h_addrbuf; /* address eyecatcher */ struct net *net; /* host net */ + char nodename[UNX_MAXNODENAME + 1]; }; /* -- cgit v1.2.3 From 778620364ef525e83597a6edee4d0a69db67fd3d Mon Sep 17 00:00:00 2001 From: Neil Brown Date: Fri, 16 Oct 2015 08:59:08 +1100 Subject: sunrpc/cache: make cache flushing more reliable. The caches used to store sunrpc authentication information can be flushed by writing a timestamp to a file in /proc. This timestamp has a one-second resolution and any entry in cache that was last_refreshed *before* that time is treated as expired. This is problematic as it is not possible to reliably flush the cache without interrupting NFS service. If the current time is written to the "flush" file, any entry that was added since the current second started will still be treated as valid. If one second beyond than the current time is written to the file then no entries can be valid until the second ticks over. This will mean that no NFS request will be handled for up to 1 second. To resolve this issue we make two changes: 1/ treat an entry as expired if the timestamp when it was last_refreshed is before *or the same as* the expiry time. This means that current code which writes out the current time will now flush the cache reliably. 2/ when a new entry in added to the cache - set the last_refresh timestamp to 1 second *beyond* the current flush time, when that not in the past. This ensures that newly added entries will always be valid. Now that we have a very reliable way to flush the cache, and also since we are using "since-boot" timestamps which are monotonic, change cache_purge() to set the smallest future flush_time which will work, and leave it there: don't revert to '1'. Also disable the setting of the 'flush_time' far into the future. That has never been useful and is now awkward as it would cause last_refresh times to be strange. Finally: if a request is made to set the 'flush_time' to the current second, assume the intent is to flush the cache and advance it, if necessary, to 1 second beyond the current 'flush_time' so that all active entries will be deemed to be expired. As part of this we need to add a 'cache_detail' arg to cache_init() and cache_fresh_locked() so they can find the current ->flush_time. Signed-off-by: NeilBrown Reported-by: Olaf Kirch Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/cache.h | 16 +++++++++++----- 1 file changed, 11 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/cache.h b/include/linux/sunrpc/cache.h index 03d3b4c92d9f..ed03c9f7f908 100644 --- a/include/linux/sunrpc/cache.h +++ b/include/linux/sunrpc/cache.h @@ -48,8 +48,10 @@ struct cache_head { struct hlist_node cache_list; time_t expiry_time; /* After time time, don't use the data */ - time_t last_refresh; /* If CACHE_PENDING, this is when upcall - * was sent, else this is when update was received + time_t last_refresh; /* If CACHE_PENDING, this is when upcall was + * sent, else this is when update was + * received, though it is alway set to + * be *after* ->flush_time. */ struct kref ref; unsigned long flags; @@ -105,8 +107,12 @@ struct cache_detail { /* fields below this comment are for internal use * and should not be touched by cache owners */ - time_t flush_time; /* flush all cache items with last_refresh - * earlier than this */ + time_t flush_time; /* flush all cache items with + * last_refresh at or earlier + * than this. last_refresh + * is never set at or earlier + * than this. + */ struct list_head others; time_t nextcheck; int entries; @@ -203,7 +209,7 @@ static inline void cache_put(struct cache_head *h, struct cache_detail *cd) static inline int cache_is_expired(struct cache_detail *detail, struct cache_head *h) { return (h->expiry_time < seconds_since_boot()) || - (detail->flush_time > h->last_refresh); + (detail->flush_time >= h->last_refresh); } extern int cache_check(struct cache_detail *detail, -- cgit v1.2.3 From acba7855dda0d6e7d87dec2f89b4d9eebb36bbe2 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 21 Oct 2015 16:26:44 -0700 Subject: clk: Remove clk_{register,unregister}_multiplier() These APIs aren't used, so remove them. This can be reverted if we get a user at some point. Reviewed-by: Maxime Ripard Suggested-by: Michael Turquette Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index e9a4d1ea556e..837cd7c7c8a7 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -554,13 +554,6 @@ struct clk_multiplier { extern const struct clk_ops clk_multiplier_ops; -struct clk *clk_register_multiplier(struct device *dev, const char *name, - const char *parent_name, - unsigned long flags, - void __iomem *reg, u8 shift, u8 width, - u8 clk_mult_flags, spinlock_t *lock); -void clk_unregister_multiplier(struct clk *clk); - /*** * struct clk_composite - aggregate clock of mux, divider and gate clocks * -- cgit v1.2.3 From c59f419bdd1f056e1ceacbd29f7be7bcff746a5d Mon Sep 17 00:00:00 2001 From: Helge Deller Date: Sun, 25 Oct 2015 10:00:32 +0100 Subject: net/xps: Fix calculation of initial number of xps queues The existing code breaks on architectures where the L1 cache size (L1_CACHE_BYTES) is smaller or equal the size of struct xps_map. The new code ensures that we get at minimum one initial xps queue, or even more as long as it fits into the next multiple of L1_CACHE_SIZE. Signed-off-by: Helge Deller Acked-by: Alexander Duyck --- include/linux/netdevice.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2d15e3831440..2212c8225deb 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -718,8 +718,8 @@ struct xps_map { u16 queues[0]; }; #define XPS_MAP_SIZE(_num) (sizeof(struct xps_map) + ((_num) * sizeof(u16))) -#define XPS_MIN_MAP_ALLOC ((L1_CACHE_BYTES - sizeof(struct xps_map)) \ - / sizeof(u16)) +#define XPS_MIN_MAP_ALLOC ((L1_CACHE_ALIGN(offsetof(struct xps_map, queues[1])) \ + - sizeof(struct xps_map)) / sizeof(u16)) /* * This structure holds all XPS maps for device. Maps are indexed by CPU. -- cgit v1.2.3 From c0e5c4450494d74c8deb4f47ddcbb74c94937e20 Mon Sep 17 00:00:00 2001 From: Dustin Byford Date: Fri, 23 Oct 2015 12:27:06 -0700 Subject: acpi: add acpi_preset_companion() stub Add a stub for acpi_preset_companion(). Fixes build failures when acpi_preset_companion() is used and CONFIG_ACPI is not set. Acked-by: Mika Westerberg Signed-off-by: Dustin Byford Acked-by: Rafael J. Wysocki Signed-off-by: Wolfram Sang --- include/linux/acpi.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 43856d19cf4d..43b55e751dea 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -477,6 +477,11 @@ static inline bool has_acpi_companion(struct device *dev) return false; } +static inline void acpi_preset_companion(struct device *dev, + struct acpi_device *parent, u64 addr) +{ +} + static inline const char *acpi_dev_name(struct acpi_device *adev) { return NULL; -- cgit v1.2.3 From d787dcdb9c8f412b1dd0727f90d3f793a61a2551 Mon Sep 17 00:00:00 2001 From: Chen-Yu Tsai Date: Fri, 23 Oct 2015 20:41:31 +0200 Subject: bus: sunxi-rsb: Add driver for Allwinner Reduced Serial Bus Reduced Serial Bus (RSB) is an Allwinner proprietery interface used to communicate with PMICs and other peripheral ICs. RSB is a two-wire push-pull serial bus that supports 1 master device and up to 15 active slave devices. Signed-off-by: Chen-Yu Tsai Reviewed-by: Mark Brown Acked-by: Arnd Bergmann Signed-off-by: Maxime Ripard Signed-off-by: Olof Johansson --- include/linux/sunxi-rsb.h | 105 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 105 insertions(+) create mode 100644 include/linux/sunxi-rsb.h (limited to 'include/linux') diff --git a/include/linux/sunxi-rsb.h b/include/linux/sunxi-rsb.h new file mode 100644 index 000000000000..7e75bb0346d0 --- /dev/null +++ b/include/linux/sunxi-rsb.h @@ -0,0 +1,105 @@ +/* + * Allwinner Reduced Serial Bus Driver + * + * Copyright (c) 2015 Chen-Yu Tsai + * + * Author: Chen-Yu Tsai + * + * This file is licensed under the terms of the GNU General Public + * License version 2. This program is licensed "as is" without any + * warranty of any kind, whether express or implied. + */ +#ifndef _SUNXI_RSB_H +#define _SUNXI_RSB_H + +#include +#include +#include + +struct sunxi_rsb; + +/** + * struct sunxi_rsb_device - Basic representation of an RSB device + * @dev: Driver model representation of the device. + * @ctrl: RSB controller managing the bus hosting this device. + * @rtaddr: This device's runtime address + * @hwaddr: This device's hardware address + */ +struct sunxi_rsb_device { + struct device dev; + struct sunxi_rsb *rsb; + int irq; + u8 rtaddr; + u16 hwaddr; +}; + +static inline struct sunxi_rsb_device *to_sunxi_rsb_device(struct device *d) +{ + return container_of(d, struct sunxi_rsb_device, dev); +} + +static inline void *sunxi_rsb_device_get_drvdata(const struct sunxi_rsb_device *rdev) +{ + return dev_get_drvdata(&rdev->dev); +} + +static inline void sunxi_rsb_device_set_drvdata(struct sunxi_rsb_device *rdev, + void *data) +{ + dev_set_drvdata(&rdev->dev, data); +} + +/** + * struct sunxi_rsb_driver - RSB slave device driver + * @driver: RSB device drivers should initialize name and owner field of + * this structure. + * @probe: binds this driver to a RSB device. + * @remove: unbinds this driver from the RSB device. + */ +struct sunxi_rsb_driver { + struct device_driver driver; + int (*probe)(struct sunxi_rsb_device *rdev); + int (*remove)(struct sunxi_rsb_device *rdev); +}; + +static inline struct sunxi_rsb_driver *to_sunxi_rsb_driver(struct device_driver *d) +{ + return container_of(d, struct sunxi_rsb_driver, driver); +} + +int sunxi_rsb_driver_register(struct sunxi_rsb_driver *rdrv); + +/** + * sunxi_rsb_driver_unregister() - unregister an RSB client driver + * @rdrv: the driver to unregister + */ +static inline void sunxi_rsb_driver_unregister(struct sunxi_rsb_driver *rdrv) +{ + if (rdrv) + driver_unregister(&rdrv->driver); +} + +#define module_sunxi_rsb_driver(__sunxi_rsb_driver) \ + module_driver(__sunxi_rsb_driver, sunxi_rsb_driver_register, \ + sunxi_rsb_driver_unregister) + +struct regmap *__devm_regmap_init_sunxi_rsb(struct sunxi_rsb_device *rdev, + const struct regmap_config *config, + struct lock_class_key *lock_key, + const char *lock_name); + +/** + * devm_regmap_init_sunxi_rsb(): Initialise managed register map + * + * @rdev: Device that will be interacted with + * @config: Configuration for register map + * + * The return value will be an ERR_PTR() on error or a valid pointer + * to a struct regmap. The regmap will be automatically freed by the + * device management code. + */ +#define devm_regmap_init_sunxi_rsb(rdev, config) \ + __regmap_lockdep_wrapper(__devm_regmap_init_sunxi_rsb, #config, \ + rdev, config) + +#endif /* _SUNXI_RSB_H */ -- cgit v1.2.3 From 7904b5c4988e18b50056b5e71a3ffca752a8a451 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 22 Sep 2015 17:13:19 -0400 Subject: tracepoint: Give priority to probes of tracepoints In order to guarantee that a probe will be called before other probes that are attached to a tracepoint, there needs to be a mechanism to provide priority of one probe over the others. Adding a prio field to the struct tracepoint_func, which lets the probes be sorted by the priority set in the structure. If no priority is specified, then a priority of 10 is given (this is a macro, and perhaps may be changed in the future). Now probes may be added to affect other probes that are attached to a tracepoint with a guaranteed order. One use case would be to allow tracing of tracepoints be able to filter by pid. A special (higher priority probe) may be added to the sched_switch tracepoint and set the necessary flags of the other tracepoints to notify them if they should be traced or not. In case a tracepoint is enabled at the sched_switch tracepoint too, the order of the two are not random. Cc: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index afada369c5b7..6b79537a42b1 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -26,6 +26,7 @@ struct notifier_block; struct tracepoint_func { void *func; void *data; + int prio; }; struct tracepoint { @@ -42,9 +43,14 @@ struct trace_enum_map { unsigned long enum_value; }; +#define TRACEPOINT_DEFAULT_PRIO 10 + extern int tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); extern int +tracepoint_probe_register_prio(struct tracepoint *tp, void *probe, void *data, + int prio); +extern int tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data); extern void for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), @@ -207,6 +213,13 @@ extern void syscall_unregfunc(void); (void *)probe, data); \ } \ static inline int \ + register_trace_prio_##name(void (*probe)(data_proto), void *data,\ + int prio) \ + { \ + return tracepoint_probe_register_prio(&__tracepoint_##name, \ + (void *)probe, data, prio); \ + } \ + static inline int \ unregister_trace_##name(void (*probe)(data_proto), void *data) \ { \ return tracepoint_probe_unregister(&__tracepoint_##name,\ -- cgit v1.2.3 From 3fdaf80f4a836911c0eda1cee92f8aa625f90197 Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Fri, 25 Sep 2015 12:58:44 -0400 Subject: tracing: Implement event pid filtering Add the necessary hooks to use the pids loaded in set_event_pid to filter all the events enabled in the tracing instance that match the pids listed. Two probes are added to both sched_switch and sched_wakeup tracepoints to be called before other probes are called and after the other probes are called. The first is used to set the necessary flags to let the probes know to test if they should be traced or not. The sched_switch pre probe will set the "ignore_pid" flag if neither the previous or next task has a matching pid. The sched_switch probe will set the "ignore_pid" flag if the next task does not match the matching pid. The pre probe allows for probes tracing sched_switch to be traced if necessary. The sched_wakeup pre probe will set the "ignore_pid" flag if neither the current task nor the wakee task has a matching pid. The sched_wakeup post probe will set the "ignore_pid" flag if the current task does not have a matching pid. Cc: "Paul E. McKenney" Signed-off-by: Steven Rostedt --- include/linux/trace_events.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/trace_events.h b/include/linux/trace_events.h index f85693bbcdc3..429fdfc3baf5 100644 --- a/include/linux/trace_events.h +++ b/include/linux/trace_events.h @@ -328,6 +328,7 @@ enum { EVENT_FILE_FL_SOFT_DISABLED_BIT, EVENT_FILE_FL_TRIGGER_MODE_BIT, EVENT_FILE_FL_TRIGGER_COND_BIT, + EVENT_FILE_FL_PID_FILTER_BIT, }; /* @@ -341,6 +342,7 @@ enum { * tracepoint may be enabled) * TRIGGER_MODE - When set, invoke the triggers associated with the event * TRIGGER_COND - When set, one or more triggers has an associated filter + * PID_FILTER - When set, the event is filtered based on pid */ enum { EVENT_FILE_FL_ENABLED = (1 << EVENT_FILE_FL_ENABLED_BIT), @@ -351,6 +353,7 @@ enum { EVENT_FILE_FL_SOFT_DISABLED = (1 << EVENT_FILE_FL_SOFT_DISABLED_BIT), EVENT_FILE_FL_TRIGGER_MODE = (1 << EVENT_FILE_FL_TRIGGER_MODE_BIT), EVENT_FILE_FL_TRIGGER_COND = (1 << EVENT_FILE_FL_TRIGGER_COND_BIT), + EVENT_FILE_FL_PID_FILTER = (1 << EVENT_FILE_FL_PID_FILTER_BIT), }; struct trace_event_file { @@ -429,6 +432,8 @@ extern enum event_trigger_type event_triggers_call(struct trace_event_file *file extern void event_triggers_post_call(struct trace_event_file *file, enum event_trigger_type tt); +bool trace_event_ignore_this_pid(struct trace_event_file *trace_file); + /** * trace_trigger_soft_disabled - do triggers and test if soft disabled * @file: The file pointer of the event to test @@ -448,6 +453,8 @@ trace_trigger_soft_disabled(struct trace_event_file *file) event_triggers_call(file, NULL); if (eflags & EVENT_FILE_FL_SOFT_DISABLED) return true; + if (eflags & EVENT_FILE_FL_PID_FILTER) + return trace_event_ignore_this_pid(file); } return false; } -- cgit v1.2.3 From 49e4b84333f338d4f183f28f1f3c1131b9fb2b5a Mon Sep 17 00:00:00 2001 From: Chen Yu Date: Sun, 25 Oct 2015 01:02:19 +0800 Subject: ACPI: Use correct IRQ when uninstalling ACPI interrupt handler Currently when the system is trying to uninstall the ACPI interrupt handler, it uses acpi_gbl_FADT.sci_interrupt as the IRQ number. However, the IRQ number that the ACPI interrupt handled is installed for comes from acpi_gsi_to_irq() and that is the number that should be used for the handler removal. Fix this problem by using the mapped IRQ returned from acpi_gsi_to_irq() as appropriate. Cc: All applicable Acked-by: Lv Zheng Signed-off-by: Chen Yu Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 43856d19cf4d..1ae6ba06f9ac 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -193,6 +193,12 @@ int acpi_ioapic_registered(acpi_handle handle, u32 gsi_base); void acpi_irq_stats_init(void); extern u32 acpi_irq_handled; extern u32 acpi_irq_not_handled; +extern unsigned int acpi_sci_irq; +#define INVALID_ACPI_IRQ ((unsigned)-1) +static inline bool acpi_sci_irq_valid(void) +{ + return acpi_sci_irq != INVALID_ACPI_IRQ; +} extern int sbf_port; extern unsigned long acpi_realmode_flags; -- cgit v1.2.3 From 8890624a4e8c2c7046d63bfd15d7331af9f55f10 Mon Sep 17 00:00:00 2001 From: Michael Grzeschik Date: Thu, 18 Sep 2014 00:12:50 +0200 Subject: arcnet: com20020-pci: add led trigger support The EAE PLX-PCI card has special leds on the the main io pci resource bar. This patch adds support to trigger the conflict and data leds with the packages. Signed-off-by: Michael Grzeschik --- include/linux/leds.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/leds.h b/include/linux/leds.h index b122eeafb5dc..fa359c79c825 100644 --- a/include/linux/leds.h +++ b/include/linux/leds.h @@ -283,6 +283,13 @@ static inline void led_trigger_register_simple(const char *name, static inline void led_trigger_unregister_simple(struct led_trigger *trigger) {} static inline void led_trigger_event(struct led_trigger *trigger, enum led_brightness event) {} +static inline void led_trigger_blink(struct led_trigger *trigger, + unsigned long *delay_on, + unsigned long *delay_off) {} +static inline void led_trigger_blink_oneshot(struct led_trigger *trigger, + unsigned long *delay_on, + unsigned long *delay_off, + int invert) {} static inline void led_trigger_set_default(struct led_classdev *led_cdev) {} static inline void led_trigger_set(struct led_classdev *led_cdev, struct led_trigger *trigger) {} -- cgit v1.2.3 From 98a3be44ffa67b812de7aa7aed9f2331edcfb1a5 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Fri, 23 Oct 2015 12:16:41 +0300 Subject: mfd: core: redo ACPI matching of the children devices There is at least one board on the market, i.e. Intel Galileo Gen2, that uses _ADR to distinguish the devices under one actual device. Due to this we have to improve the quirk in the MFD core to handle that board. Acked-by: Rafael J. Wysocki Acked-by: Lee Jones Signed-off-by: Andy Shevchenko Signed-off-by: Wolfram Sang --- include/linux/mfd/core.h | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/core.h b/include/linux/mfd/core.h index a76bc100bf97..27dac3ff18b9 100644 --- a/include/linux/mfd/core.h +++ b/include/linux/mfd/core.h @@ -18,6 +18,12 @@ struct irq_domain; +/* Matches ACPI PNP id, either _HID or _CID, or ACPI _ADR */ +struct mfd_cell_acpi_match { + const char *pnpid; + const unsigned long long adr; +}; + /* * This struct describes the MFD part ("cell"). * After registration the copy of this structure will become the platform data @@ -44,8 +50,8 @@ struct mfd_cell { */ const char *of_compatible; - /* Matches ACPI PNP id, either _HID or _CID */ - const char *acpi_pnpid; + /* Matches ACPI */ + const struct mfd_cell_acpi_match *acpi_match; /* * These resources can be specified relative to the parent device. -- cgit v1.2.3 From 62a2e633474107a9ae93dfedf16341ec6414a907 Mon Sep 17 00:00:00 2001 From: Vaibhav Hiremath Date: Tue, 25 Aug 2015 14:34:28 +0530 Subject: mfd: 88pm80x: Add 88pm860 chip type support Add chip identification support for 88PM860 device to the pm80x_chip_mapping table. Signed-off-by: Vaibhav Hiremath Reviewed-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/88pm80x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/88pm80x.h b/include/linux/mfd/88pm80x.h index 8fcad63fab55..d409ceb2231e 100644 --- a/include/linux/mfd/88pm80x.h +++ b/include/linux/mfd/88pm80x.h @@ -21,6 +21,7 @@ enum { CHIP_INVALID = 0, CHIP_PM800, CHIP_PM805, + CHIP_PM860, CHIP_MAX, }; -- cgit v1.2.3 From d785334a0d5deff30a487c74324b842d2179553d Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Mon, 14 Sep 2015 21:12:45 +0900 Subject: mfd: s2mps11: Add manual shutdown method for Odroid XU3 On Odroid XU3 board (with S2MPS11 PMIC) the PWRHOLD bit in CTRL1 register must be manually set to 0 before initiating power off sequence. One of usual power down methods for Exynos based devices looks like: 1. PWRHOLD pin of PMIC is connected to PSHOLD of Exynos SoC. 2. Exynos holds up this pin during system operation. 3. ACOKB pin of PMIC is pulled up to VBATT and optionally to pin in other device. 4. When PWRHOLD/PSHOLD goes low, the PMIC will turn off the power if ACOKB goes high. On Odroid XU3 family the difference is in (3) - the ACOKB is grounded. This means that PMIC must manually set PWRHOLD field to low and then wait for signal from Application Processor (the usual change in PWRHOLD/PSHOLD pin will actually cut off the power). The patch adds respective binding allowing Odroid XU3 device to be powered off. Signed-off-by: Krzysztof Kozlowski Reported-by: Anand Moon Tested-by: Anand Moon Reviewed-by: Javier Martinez Canillas Signed-off-by: Lee Jones --- include/linux/mfd/samsung/core.h | 2 ++ include/linux/mfd/samsung/s2mps11.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index 75115384f3fc..aa78957e092f 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -132,6 +132,8 @@ struct sec_platform_data { int buck2_init; int buck3_init; int buck4_init; + /* Whether or not manually set PWRHOLD to low during shutdown. */ + bool manual_poweroff; }; /** diff --git a/include/linux/mfd/samsung/s2mps11.h b/include/linux/mfd/samsung/s2mps11.h index 7981a9d77d3f..b288965e8101 100644 --- a/include/linux/mfd/samsung/s2mps11.h +++ b/include/linux/mfd/samsung/s2mps11.h @@ -179,6 +179,7 @@ enum s2mps11_regulators { #define S2MPS11_BUCK_N_VOLTAGES (S2MPS11_BUCK_VSEL_MASK + 1) #define S2MPS11_RAMP_DELAY 25000 /* uV/us */ +#define S2MPS11_CTRL1_PWRHOLD_MASK BIT(4) #define S2MPS11_BUCK2_RAMP_SHIFT 6 #define S2MPS11_BUCK34_RAMP_SHIFT 4 -- cgit v1.2.3 From 9111fa5c4032589e9b7ccc01e330810ba05726bf Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 9 Sep 2015 09:49:22 +0100 Subject: mfd: arizona: Add register bits to support the ANC block Some Arizona devices have a hardware ANC block present. This patch adds the registers necessary to configure this hardware block. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/arizona/registers.h | 70 +++++++++++++++++++++++++++++++++++ 1 file changed, 70 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index c7c11c900196..cd7e78eae006 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -1065,6 +1065,16 @@ #define ARIZONA_CLOCK_CONTROL 0xF00 #define ARIZONA_ANC_SRC 0xF01 #define ARIZONA_DSP_STATUS 0xF02 +#define ARIZONA_ANC_COEFF_START 0xF08 +#define ARIZONA_ANC_COEFF_END 0xF12 +#define ARIZONA_FCL_FILTER_CONTROL 0xF15 +#define ARIZONA_FCL_ADC_REFORMATTER_CONTROL 0xF17 +#define ARIZONA_FCL_COEFF_START 0xF18 +#define ARIZONA_FCL_COEFF_END 0xF69 +#define ARIZONA_FCR_FILTER_CONTROL 0xF70 +#define ARIZONA_FCR_ADC_REFORMATTER_CONTROL 0xF72 +#define ARIZONA_FCR_COEFF_START 0xF73 +#define ARIZONA_FCR_COEFF_END 0xFC4 #define ARIZONA_DSP1_CONTROL_1 0x1100 #define ARIZONA_DSP1_CLOCKING_1 0x1101 #define ARIZONA_DSP1_STATUS_1 0x1104 @@ -8050,6 +8060,66 @@ #define ARIZONA_ISRC3_NOTCH_ENA_SHIFT 0 /* ISRC3_NOTCH_ENA */ #define ARIZONA_ISRC3_NOTCH_ENA_WIDTH 1 /* ISRC3_NOTCH_ENA */ +/* + * R3840 (0xF00) - Clock Control + */ +#define ARIZONA_EXT_NG_SEL_CLR 0x0080 /* EXT_NG_SEL_CLR */ +#define ARIZONA_EXT_NG_SEL_CLR_MASK 0x0080 /* EXT_NG_SEL_CLR */ +#define ARIZONA_EXT_NG_SEL_CLR_SHIFT 7 /* EXT_NG_SEL_CLR */ +#define ARIZONA_EXT_NG_SEL_CLR_WIDTH 1 /* EXT_NG_SEL_CLR */ +#define ARIZONA_EXT_NG_SEL_SET 0x0040 /* EXT_NG_SEL_SET */ +#define ARIZONA_EXT_NG_SEL_SET_MASK 0x0040 /* EXT_NG_SEL_SET */ +#define ARIZONA_EXT_NG_SEL_SET_SHIFT 6 /* EXT_NG_SEL_SET */ +#define ARIZONA_EXT_NG_SEL_SET_WIDTH 1 /* EXT_NG_SEL_SET */ +#define ARIZONA_CLK_R_ENA_CLR 0x0020 /* CLK_R_ENA_CLR */ +#define ARIZONA_CLK_R_ENA_CLR_MASK 0x0020 /* CLK_R_ENA_CLR */ +#define ARIZONA_CLK_R_ENA_CLR_SHIFT 5 /* CLK_R_ENA_CLR */ +#define ARIZONA_CLK_R_ENA_CLR_WIDTH 1 /* CLK_R_ENA_CLR */ +#define ARIZONA_CLK_R_ENA_SET 0x0010 /* CLK_R_ENA_SET */ +#define ARIZONA_CLK_R_ENA_SET_MASK 0x0010 /* CLK_R_ENA_SET */ +#define ARIZONA_CLK_R_ENA_SET_SHIFT 4 /* CLK_R_ENA_SET */ +#define ARIZONA_CLK_R_ENA_SET_WIDTH 1 /* CLK_R_ENA_SET */ +#define ARIZONA_CLK_NG_ENA_CLR 0x0008 /* CLK_NG_ENA_CLR */ +#define ARIZONA_CLK_NG_ENA_CLR_MASK 0x0008 /* CLK_NG_ENA_CLR */ +#define ARIZONA_CLK_NG_ENA_CLR_SHIFT 3 /* CLK_NG_ENA_CLR */ +#define ARIZONA_CLK_NG_ENA_CLR_WIDTH 1 /* CLK_NG_ENA_CLR */ +#define ARIZONA_CLK_NG_ENA_SET 0x0004 /* CLK_NG_ENA_SET */ +#define ARIZONA_CLK_NG_ENA_SET_MASK 0x0004 /* CLK_NG_ENA_SET */ +#define ARIZONA_CLK_NG_ENA_SET_SHIFT 2 /* CLK_NG_ENA_SET */ +#define ARIZONA_CLK_NG_ENA_SET_WIDTH 1 /* CLK_NG_ENA_SET */ +#define ARIZONA_CLK_L_ENA_CLR 0x0002 /* CLK_L_ENA_CLR */ +#define ARIZONA_CLK_L_ENA_CLR_MASK 0x0002 /* CLK_L_ENA_CLR */ +#define ARIZONA_CLK_L_ENA_CLR_SHIFT 1 /* CLK_L_ENA_CLR */ +#define ARIZONA_CLK_L_ENA_CLR_WIDTH 1 /* CLK_L_ENA_CLR */ +#define ARIZONA_CLK_L_ENA_SET 0x0001 /* CLK_L_ENA_SET */ +#define ARIZONA_CLK_L_ENA_SET_MASK 0x0001 /* CLK_L_ENA_SET */ +#define ARIZONA_CLK_L_ENA_SET_SHIFT 0 /* CLK_L_ENA_SET */ +#define ARIZONA_CLK_L_ENA_SET_WIDTH 1 /* CLK_L_ENA_SET */ + +/* + * R3841 (0xF01) - ANC SRC + */ +#define ARIZONA_IN_RXANCR_SEL_MASK 0x0070 /* IN_RXANCR_SEL - [4:6] */ +#define ARIZONA_IN_RXANCR_SEL_SHIFT 4 /* IN_RXANCR_SEL - [4:6] */ +#define ARIZONA_IN_RXANCR_SEL_WIDTH 3 /* IN_RXANCR_SEL - [4:6] */ +#define ARIZONA_IN_RXANCL_SEL_MASK 0x0007 /* IN_RXANCL_SEL - [0:2] */ +#define ARIZONA_IN_RXANCL_SEL_SHIFT 0 /* IN_RXANCL_SEL - [0:2] */ +#define ARIZONA_IN_RXANCL_SEL_WIDTH 3 /* IN_RXANCL_SEL - [0:2] */ + +/* + * R3863 (0xF17) - FCL ADC Reformatter Control + */ +#define ARIZONA_FCL_MIC_MODE_SEL 0x000C /* FCL_MIC_MODE_SEL - [2:3] */ +#define ARIZONA_FCL_MIC_MODE_SEL_SHIFT 2 /* FCL_MIC_MODE_SEL - [2:3] */ +#define ARIZONA_FCL_MIC_MODE_SEL_WIDTH 2 /* FCL_MIC_MODE_SEL - [2:3] */ + +/* + * R3954 (0xF72) - FCR ADC Reformatter Control + */ +#define ARIZONA_FCR_MIC_MODE_SEL 0x000C /* FCR_MIC_MODE_SEL - [2:3] */ +#define ARIZONA_FCR_MIC_MODE_SEL_SHIFT 2 /* FCR_MIC_MODE_SEL - [2:3] */ +#define ARIZONA_FCR_MIC_MODE_SEL_WIDTH 2 /* FCR_MIC_MODE_SEL - [2:3] */ + /* * R4352 (0x1100) - DSP1 Control 1 */ -- cgit v1.2.3 From f90d2e4035d456cb20c0b784725d556eb4de4d8a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Tue, 15 Sep 2015 15:19:45 +0200 Subject: mmc: core: Convert __mmc_switch() into an internal core function As there are no users of the __mmc_switch() API, except for the mmc core itself, let's convert it from an exported function into an internal. Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 258daf914c6d..79a31d3c3788 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -152,8 +152,6 @@ extern int mmc_app_cmd(struct mmc_host *, struct mmc_card *); extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, struct mmc_command *, int); extern void mmc_start_bkops(struct mmc_card *card, bool from_exception); -extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool, - bool, bool); extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int); extern int mmc_send_tuning(struct mmc_host *host); extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); -- cgit v1.2.3 From 9eadcc0581a8ccaf4c2378aa1c193fb164304f1d Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Fri, 2 Oct 2015 10:56:11 +0200 Subject: mmc: core: Remove MMC_CLKGATE MMC_CLKGATE was once invented to save power by gating the bus clock at request inactivity. At that time it served its purpose. The modern way to deal with power saving for these scenarios, is by using runtime PM. Nowadays, several host drivers have deployed runtime PM, but for those that haven't and which still cares power saving at request inactivity, it's certainly time to deploy runtime PM as it has been around for several years now. To simplify code to mmc core and thus decrease maintenance efforts, this patch removes all code related to MMC_CLKGATE. Signed-off-by: Ulf Hansson Reviewed-by: Linus Walleij --- include/linux/mmc/card.h | 1 - include/linux/mmc/host.h | 32 -------------------------------- 2 files changed, 33 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index fdd0779ccdfa..eb0151bac50c 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -269,7 +269,6 @@ struct mmc_card { /* for byte mode */ #define MMC_QUIRK_NONSTD_SDIO (1<<2) /* non-standard SDIO card attached */ /* (missing CIA registers) */ -#define MMC_QUIRK_BROKEN_CLK_GATING (1<<3) /* clock gating the sdio bus will make card fail */ #define MMC_QUIRK_NONSTD_FUNC_IF (1<<4) /* SDIO card has nonstd function interfaces */ #define MMC_QUIRK_DISABLE_CD (1<<5) /* disconnect CD/DAT[3] resistor */ #define MMC_QUIRK_INAND_CMD38 (1<<6) /* iNAND devices have broken CMD38 */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 83b81fd865f3..cfb3c99a6b4b 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -292,18 +292,6 @@ struct mmc_host { mmc_pm_flag_t pm_caps; /* supported pm features */ -#ifdef CONFIG_MMC_CLKGATE - int clk_requests; /* internal reference counter */ - unsigned int clk_delay; /* number of MCI clk hold cycles */ - bool clk_gated; /* clock gated */ - struct delayed_work clk_gate_work; /* delayed clock gate */ - unsigned int clk_old; /* old clock value cache */ - spinlock_t clk_lock; /* lock for clk fields */ - struct mutex clk_gate_mutex; /* mutex for clock gating */ - struct device_attribute clkgate_delay_attr; - unsigned long clkgate_delay; -#endif - /* host specific block data */ unsigned int max_seg_size; /* see blk_queue_max_segment_size */ unsigned short max_segs; /* see blk_queue_max_segments */ @@ -479,26 +467,6 @@ static inline int mmc_host_packed_wr(struct mmc_host *host) return host->caps2 & MMC_CAP2_PACKED_WR; } -#ifdef CONFIG_MMC_CLKGATE -void mmc_host_clk_hold(struct mmc_host *host); -void mmc_host_clk_release(struct mmc_host *host); -unsigned int mmc_host_clk_rate(struct mmc_host *host); - -#else -static inline void mmc_host_clk_hold(struct mmc_host *host) -{ -} - -static inline void mmc_host_clk_release(struct mmc_host *host) -{ -} - -static inline unsigned int mmc_host_clk_rate(struct mmc_host *host) -{ - return host->ios.clock; -} -#endif - static inline int mmc_card_hs(struct mmc_card *card) { return card->host->ios.timing == MMC_TIMING_SD_HS || -- cgit v1.2.3 From 2086f801cb2a796279e817e68255654c4cfd3be3 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Mon, 12 Oct 2015 14:48:25 +0200 Subject: mmc: core: Add mmc_regulator_set_vqmmc() This adds logic to the MMC core to set VQMMC. This is expected to be called by MMC drivers like dw_mmc as part of (or instead of) their start_signal_voltage_switch() callback. A few notes: * When setting the signal voltage to 3.3V we do our best to make VQMMC and VMMC match. It's been reported that this makes some old cards happy since they were tested back in the day before UHS when VQMMC and VMMC were provided by the same regulator. A nice side effect of this is that we don't end up on the hairy edge of VQMMC (2.7V), which some EEs claim is a little too close to the minimum for comfort. This is done in two steps. At first we try to find a VQMMC within a 0.3V tolerance of VMMC and if this is not supported by the supplying regulator we try to find a suitable voltage within the whole 2.7V-3.6V area of the spec. * The two step approach is currently necessary, as the used regulator_set_voltage_triplet(min, target, max) uses a simple implementation that just tries two basic steps: regulator_set_voltage(target, max); regulator_set_voltage(min, target); So with only one step with 2.7-3.6V borders, if a suitable voltage is a bit below VMMC, we would directly get the lowest 2.7V which some boards (like Rockchips) don't like at all. * When setting the signal voltage to 1.8V or 1.2V we aim for that specific voltage instead of picking the lowest one in the range. * We very purposely don't print errors in mmc_regulator_set_vqmmc(). There are cases where the MMC core will try several different voltages and we don't want to pollute the logs. Signed-off-by: Douglas Anderson Signed-off-by: Heiko Stuebner Signed-off-by: Ulf Hansson --- include/linux/mmc/host.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index cfb3c99a6b4b..8673ffe3d86e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -411,6 +411,7 @@ int mmc_regulator_get_ocrmask(struct regulator *supply); int mmc_regulator_set_ocr(struct mmc_host *mmc, struct regulator *supply, unsigned short vdd_bit); +int mmc_regulator_set_vqmmc(struct mmc_host *mmc, struct mmc_ios *ios); #else static inline int mmc_regulator_get_ocrmask(struct regulator *supply) { @@ -423,6 +424,12 @@ static inline int mmc_regulator_set_ocr(struct mmc_host *mmc, { return 0; } + +static inline int mmc_regulator_set_vqmmc(struct mmc_host *mmc, + struct mmc_ios *ios) +{ + return -EINVAL; +} #endif int mmc_regulator_get_supply(struct mmc_host *mmc); -- cgit v1.2.3 From 3fc7eaef44dbcbcd602b6bcd0ac6efba7a30b108 Mon Sep 17 00:00:00 2001 From: Shawn Lin Date: Wed, 16 Sep 2015 14:41:23 +0800 Subject: mmc: dw_mmc: Add external dma interface support DesignWare MMC Controller can supports two types of DMA mode: external dma and internal dma. We get a RK312x platform integrated dw_mmc and ARM pl330 dma controller. This patch add edmac ops to support these platforms. I've tested it on RK31xx platform with edmac mode and RK3288 platform with idmac mode. Signed-off-by: Shawn Lin Signed-off-by: Jaehoon Chung Signed-off-by: Ulf Hansson --- include/linux/mmc/dw_mmc.h | 23 +++++++++++++++++++++-- 1 file changed, 21 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/dw_mmc.h b/include/linux/mmc/dw_mmc.h index 134c57422740..f67b2ec18e6d 100644 --- a/include/linux/mmc/dw_mmc.h +++ b/include/linux/mmc/dw_mmc.h @@ -16,6 +16,7 @@ #include #include +#include #define MAX_MCI_SLOTS 2 @@ -40,6 +41,17 @@ enum { struct mmc_data; +enum { + TRANS_MODE_PIO = 0, + TRANS_MODE_IDMAC, + TRANS_MODE_EDMAC +}; + +struct dw_mci_dma_slave { + struct dma_chan *ch; + enum dma_transfer_direction direction; +}; + /** * struct dw_mci - MMC controller state shared between all slots * @lock: Spinlock protecting the queue and associated data. @@ -154,7 +166,14 @@ struct dw_mci { dma_addr_t sg_dma; void *sg_cpu; const struct dw_mci_dma_ops *dma_ops; + /* For idmac */ unsigned int ring_size; + + /* For edmac */ + struct dw_mci_dma_slave *dms; + /* Registers's physical base address */ + void *phy_regs; + u32 cmd_status; u32 data_status; u32 stop_cmdr; @@ -208,8 +227,8 @@ struct dw_mci { struct dw_mci_dma_ops { /* DMA Ops */ int (*init)(struct dw_mci *host); - void (*start)(struct dw_mci *host, unsigned int sg_len); - void (*complete)(struct dw_mci *host); + int (*start)(struct dw_mci *host, unsigned int sg_len); + void (*complete)(void *host); void (*stop)(struct dw_mci *host); void (*cleanup)(struct dw_mci *host); void (*exit)(struct dw_mci *host); -- cgit v1.2.3 From 679c51cffc3b316bd89ecc91ef92603dd6d4fc68 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Mon, 26 Oct 2015 11:55:34 -0700 Subject: clk: Add stubs for of_clk_*() APIs when CONFIG_OF=n Compiling the versatile clock driver with COMPILE_TEST=y and CONFIG_OF=n leads to the following error: drivers/clk/versatile/clk-sp810.c: In function 'clk_sp810_of_setup': drivers/clk/versatile/clk-sp810.c:103:6: error: implicit declaration of function 'of_clk_parent_fill' [-Werror=implicit-function-declaration] Silence it by providing stubs APIs for of_clk_parent_fill(). Throw in a stub for of_clk_get_parent_count() too because we're in the area. Reported-by: kbuild test robot Cc: Javier Martinez Canillas Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index 837cd7c7c8a7..ff7284fb37b2 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -726,6 +726,15 @@ static inline struct clk *of_clk_src_onecell_get( { return ERR_PTR(-ENOENT); } +static inline int of_clk_get_parent_count(struct device_node *np) +{ + return 0; +} +static inline int of_clk_parent_fill(struct device_node *np, + const char **parents, unsigned int size) +{ + return 0; +} static inline const char *of_clk_get_parent_name(struct device_node *np, int index) { -- cgit v1.2.3 From be68bf883170b3e4123fc4ff3745e38fb45a573e Mon Sep 17 00:00:00 2001 From: Joachim Eastwood Date: Sat, 24 Oct 2015 18:55:22 +0200 Subject: clk: Add clk_hw_is_enabled() for use by clk providers Add clk_hw_is_enabled() to the provider APIs so clk providers can use a struct clk_hw instead of a struct clk to check if a clk is enabled or not. Signed-off-by: Joachim Eastwood Signed-off-by: Stephen Boyd --- include/linux/clk-provider.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/clk-provider.h b/include/linux/clk-provider.h index ff7284fb37b2..c56988ac63f7 100644 --- a/include/linux/clk-provider.h +++ b/include/linux/clk-provider.h @@ -654,6 +654,7 @@ unsigned long clk_hw_get_rate(const struct clk_hw *hw); unsigned long __clk_get_flags(struct clk *clk); unsigned long clk_hw_get_flags(const struct clk_hw *hw); bool clk_hw_is_prepared(const struct clk_hw *hw); +bool clk_hw_is_enabled(const struct clk_hw *hw); bool __clk_is_enabled(struct clk *clk); struct clk *__clk_lookup(const char *name); int __clk_mux_determine_rate(struct clk_hw *hw, -- cgit v1.2.3 From da4689c0263ee5f4eee64e166a6bee6a68b9242e Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Tue, 27 Oct 2015 08:36:08 +0900 Subject: iommu/vt-d: Expose struct svm_dev_ops without CONFIG_INTEL_IOMMU_SVM The point in providing an inline version of intel_svm_bind_mm() which just returns -ENOSYS is that people are supposed to be able to *use* it and just see that it fails. So we need to let them have a definition of struct svm_dev_ops (and the flags) too. Signed-off-by: David Woodhouse --- include/linux/intel-svm.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/intel-svm.h b/include/linux/intel-svm.h index 0a48ccff24ae..3c25794042f9 100644 --- a/include/linux/intel-svm.h +++ b/include/linux/intel-svm.h @@ -16,8 +16,6 @@ #ifndef __INTEL_SVM_H__ #define __INTEL_SVM_H__ -#ifdef CONFIG_INTEL_IOMMU_SVM - struct device; struct svm_dev_ops { @@ -55,6 +53,8 @@ struct svm_dev_ops { */ #define SVM_FLAG_SUPERVISOR_MODE (1<<1) +#ifdef CONFIG_INTEL_IOMMU_SVM + /** * intel_svm_bind_mm() - Bind the current process to a PASID * @dev: Device to be granted acccess -- cgit v1.2.3 From 1be5336bc7ba050ee07d352643bf4c01c513553c Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Fri, 16 Oct 2015 10:18:10 +0300 Subject: dmaengine: edma: New device tree binding With the old binding and driver architecture we had many issues: No way to assign eDMA channels to event queues, thus not able to tune the system by moving specific DMA channels to low/high priority servicing. We moved the cyclic channels to high priority within the code, but that was just a workaround to this issue. Memcopy was fundamentally broken: even if the driver scanned the DT/devices in the booted system for direct DMA users (which is not effective when the events are going through a crossbar) and created a map of 'used' channels, this information was not really usable. Since via dmaengien API the eDMA driver will be called with _some_ channel number, we would try to request this channel when any channel is requested for memcpy. By luck we got channel which is not used by any device most of the time so things worked, but if a device would have been using the given channel, but not requested it, the memcpy channel would have been waiting for HW event. The old code had the am33xx/am43xx DMA event router handling embedded. This should have been done in a separate driver since it is not part of the actual eDMA IP. There were no way to 'lock' PaRAM slots to be used by the DSP for example when booting with DT. In DT boot the edma node used more than one hwmod which is not a good practice and the kernel prints warning because of this. With the new bindings and the changes in the driver we can: - No regression with Legacy binding and non DT boot - DMA channels can be assigned to any TC (to set priority) - PaRAM slots can be reserved for other cores to use - Dynamic power management for CC and TCs, if only TC0 is used all other TC can be powered down for example Signed-off-by: Peter Ujfalusi Signed-off-by: Vinod Koul --- include/linux/platform_data/edma.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/edma.h b/include/linux/platform_data/edma.h index 6b9d500956e4..e2878baeb90e 100644 --- a/include/linux/platform_data/edma.h +++ b/include/linux/platform_data/edma.h @@ -71,6 +71,9 @@ struct edma_soc_info { /* Resource reservation for other cores */ struct edma_rsv_info *rsv; + /* List of channels allocated for memcpy, terminated with -1 */ + s16 *memcpy_channels; + s8 (*queue_priority_mapping)[2]; const s16 (*xbar_chans)[2]; }; -- cgit v1.2.3 From 3648dc6d27f648b8e3ce9b48874627a833d53c3a Mon Sep 17 00:00:00 2001 From: Christophe Ricard Date: Sun, 25 Oct 2015 22:54:39 +0100 Subject: NFC: st-nci: Add ese-present/uicc-present dts properties In order to align with st21nfca, dts configuration properties ese_present and uicc_present are made available in st-nci driver. So far, in early development firmware, because nci_nfcee_mode_set(DISABLE) was not supported we had to try to enable it during the secure element discovery phase. After several trials on commercial and qualified firmware it appears that nci_nfcee_mode_set(ENABLE) and nci_nfcee_mode_set(DISABLE) are properly supported. Such feature also help us to eventually save some time (~5ms) when only one secure element is connected. Acked-by: Rob Herring Signed-off-by: Christophe Ricard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/st-nci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/st-nci.h b/include/linux/platform_data/st-nci.h index d9d400a297bd..f6494b347c06 100644 --- a/include/linux/platform_data/st-nci.h +++ b/include/linux/platform_data/st-nci.h @@ -24,6 +24,8 @@ struct st_nci_nfc_platform_data { unsigned int gpio_reset; unsigned int irq_polarity; + bool is_ese_present; + bool is_uicc_present; }; #endif /* _ST_NCI_H_ */ -- cgit v1.2.3 From b5b3e23e4cace008e1a30e8614a484d14dfd07a1 Mon Sep 17 00:00:00 2001 From: Vincent Cuissard Date: Mon, 26 Oct 2015 10:27:41 +0100 Subject: NFC: nfcmrvl: add i2c driver This driver adds the support of I2C-based Marvell NFC controller. Signed-off-by: Vincent Cuissard Signed-off-by: Samuel Ortiz --- include/linux/platform_data/nfcmrvl.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/nfcmrvl.h b/include/linux/platform_data/nfcmrvl.h index ac91707dabcb..a6f9d633f5be 100644 --- a/include/linux/platform_data/nfcmrvl.h +++ b/include/linux/platform_data/nfcmrvl.h @@ -35,6 +35,14 @@ struct nfcmrvl_platform_data { unsigned int flow_control; /* Tell if firmware supports break control for power management */ unsigned int break_control; + + + /* + * I2C specific + */ + + unsigned int irq; + unsigned int irq_polarity; }; #endif /* _NFCMRVL_PTF_H_ */ -- cgit v1.2.3 From 62544ce8e01c1879d420ba309f7f319d24c0f4e6 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Thu, 22 Oct 2015 17:10:14 -0700 Subject: bpf: fix bpf_perf_event_read() helper Fix safety checks for bpf_perf_event_read(): - only non-inherited events can be added to perf_event_array map (do this check statically at map insertion time) - dynamically check that event is local and !pmu->count Otherwise buggy bpf program can cause kernel splat. Also fix error path after perf_event_attrs() and remove redundant 'extern'. Fixes: 35578d798400 ("bpf: Implement function bpf_perf_event_read() that get the selected hardware PMU conuter") Signed-off-by: Alexei Starovoitov Tested-by: Wang Nan Signed-off-by: David S. Miller --- include/linux/bpf.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index e3a51b74e275..75718fa28260 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -194,7 +194,6 @@ extern const struct bpf_func_proto bpf_map_lookup_elem_proto; extern const struct bpf_func_proto bpf_map_update_elem_proto; extern const struct bpf_func_proto bpf_map_delete_elem_proto; -extern const struct bpf_func_proto bpf_perf_event_read_proto; extern const struct bpf_func_proto bpf_get_prandom_u32_proto; extern const struct bpf_func_proto bpf_get_smp_processor_id_proto; extern const struct bpf_func_proto bpf_tail_call_proto; -- cgit v1.2.3 From 174fd8d369613c4e06660f3704caaba48dac8554 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 22 Oct 2015 09:27:12 +0900 Subject: blkcg: fix incorrect read/write sync/async stat accounting While unifying how blkcg stats are collected, 77ea733884eb ("blkcg: move io_service_bytes and io_serviced stats into blkcg_gq") incorrectly used bio->flags instead of bio->rw to tell the IO type. This made IOs to be accounted as the wrong type. Fix it. Signed-off-by: Tejun Heo Fixes: 77ea733884eb ("blkcg: move io_service_bytes and io_serviced stats into blkcg_gq") Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 0a5cc7a1109b..c02e669945e9 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -713,9 +713,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, if (!throtl) { blkg = blkg ?: q->root_blkg; - blkg_rwstat_add(&blkg->stat_bytes, bio->bi_flags, + blkg_rwstat_add(&blkg->stat_bytes, bio->bi_rw, bio->bi_iter.bi_size); - blkg_rwstat_add(&blkg->stat_ios, bio->bi_flags, 1); + blkg_rwstat_add(&blkg->stat_ios, bio->bi_rw, 1); } rcu_read_unlock(); -- cgit v1.2.3 From 9979dbe5158899b556eb772b7335e29417ac0ddd Mon Sep 17 00:00:00 2001 From: Chaotian Jing Date: Tue, 27 Oct 2015 14:24:28 +0800 Subject: mmc: mmc: extend the mmc_send_tuning() The mmc_execute_tuning() has already prepared the opcode, there is no need to prepare it again at mmc_send_tuning(), and, there is a BUG of mmc_send_tuning() to determine the opcode by bus width, assume eMMC was running at HS200, 4bit mode, then the mmc_send_tuning() will overwrite the opcode from CMD21 to CMD19, then got error. in addition, extend an argument of "cmd_error" to allow getting if there was cmd error when tune response. Signed-off-by: Chaotian Jing [Ulf: Rebased patch] Signed-off-by: Ulf Hansson --- include/linux/mmc/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 79a31d3c3788..37967b6da03c 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -153,7 +153,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, struct mmc_command *, int); extern void mmc_start_bkops(struct mmc_card *card, bool from_exception); extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int); -extern int mmc_send_tuning(struct mmc_host *host); +extern int mmc_send_tuning(struct mmc_host *host, u32 opcode, int *cmd_error); extern int mmc_get_ext_csd(struct mmc_card *card, u8 **new_ext_csd); #define MMC_ERASE_ARG 0x00000000 -- cgit v1.2.3 From ef0eebc05130b0d22b0ea65c0cd014ee16fc89c7 Mon Sep 17 00:00:00 2001 From: Douglas Anderson Date: Tue, 20 Oct 2015 21:15:06 -0700 Subject: drivers/pinctrl: Add the concept of an "init" state For pinctrl the "default" state is applied to pins before the driver's probe function is called. This is normally a sensible thing to do, but in some cases can cause problems. That's because the pins will change state before the driver is given a chance to program how those pins should behave. As an example you might have a regulator that is controlled by a PWM (output high = high voltage, output low = low voltage). The firmware might leave this pin as driven high. If we allow the driver core to reconfigure this pin as a PWM pin before the PWM's probe function runs then you might end up running at too low of a voltage while we probe. Let's introudce a new "init" state. If this is defined we'll set pinctrl to this state before probe and then "default" after probe (unless the driver explicitly changed states already). An alternative idea that was thought of was to use the pre-existing "sleep" or "idle" states and add a boolean property that we should start in that mode. This was not done because the "init" state is needed for correctness and those other states are only present (and only transitioned in to and out of) when (optional) power management is enabled. Changes in v3: - Moved declarations to pinctrl/devinfo.h - Fixed author/SoB Changes in v2: - Added comment to pinctrl_init_done() as per Linus W. Signed-off-by: Douglas Anderson Acked-by: Greg Kroah-Hartman Tested-by: Caesar Wang Signed-off-by: Linus Walleij --- include/linux/pinctrl/devinfo.h | 10 ++++++++++ include/linux/pinctrl/pinctrl-state.h | 8 ++++++++ 2 files changed, 18 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pinctrl/devinfo.h b/include/linux/pinctrl/devinfo.h index 281cb91ddcf5..05082e407c4a 100644 --- a/include/linux/pinctrl/devinfo.h +++ b/include/linux/pinctrl/devinfo.h @@ -24,10 +24,14 @@ * struct dev_pin_info - pin state container for devices * @p: pinctrl handle for the containing device * @default_state: the default state for the handle, if found + * @init_state: the state at probe time, if found + * @sleep_state: the state at suspend time, if found + * @idle_state: the state at idle (runtime suspend) time, if found */ struct dev_pin_info { struct pinctrl *p; struct pinctrl_state *default_state; + struct pinctrl_state *init_state; #ifdef CONFIG_PM struct pinctrl_state *sleep_state; struct pinctrl_state *idle_state; @@ -35,6 +39,7 @@ struct dev_pin_info { }; extern int pinctrl_bind_pins(struct device *dev); +extern int pinctrl_init_done(struct device *dev); #else @@ -45,5 +50,10 @@ static inline int pinctrl_bind_pins(struct device *dev) return 0; } +static inline int pinctrl_init_done(struct device *dev) +{ + return 0; +} + #endif /* CONFIG_PINCTRL */ #endif /* PINCTRL_DEVINFO_H */ diff --git a/include/linux/pinctrl/pinctrl-state.h b/include/linux/pinctrl/pinctrl-state.h index b5919f8e6d1a..23073519339f 100644 --- a/include/linux/pinctrl/pinctrl-state.h +++ b/include/linux/pinctrl/pinctrl-state.h @@ -9,6 +9,13 @@ * hogs to configure muxing and pins at boot, and also as a state * to go into when returning from sleep and idle in * .pm_runtime_resume() or ordinary .resume() for example. + * @PINCTRL_STATE_INIT: normally the pinctrl will be set to "default" + * before the driver's probe() function is called. There are some + * drivers where that is not appropriate becausing doing so would + * glitch the pins. In those cases you can add an "init" pinctrl + * which is the state of the pins before drive probe. After probe + * if the pins are still in "init" state they'll be moved to + * "default". * @PINCTRL_STATE_IDLE: the state the pinctrl handle shall be put into * when the pins are idle. This is a state where the system is relaxed * but not fully sleeping - some power may be on but clocks gated for @@ -20,5 +27,6 @@ * ordinary .suspend() function. */ #define PINCTRL_STATE_DEFAULT "default" +#define PINCTRL_STATE_INIT "init" #define PINCTRL_STATE_IDLE "idle" #define PINCTRL_STATE_SLEEP "sleep" -- cgit v1.2.3 From ca5d24854210dd02548a080d4271560e926c4fcb Mon Sep 17 00:00:00 2001 From: "Andrew F. Davis" Date: Fri, 23 Oct 2015 08:59:10 -0500 Subject: spi: Add THIS_MODULE to spi_driver in SPI core Add spi_register_driver helper macro that adds THIS_MODULE to spi_driver for the registering driver. We rename and modify the existing spi_register_driver to enable this. Signed-off-by: Andrew F. Davis Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index 269e8afd3e2a..e2da17b5900e 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -250,7 +250,7 @@ static inline struct spi_driver *to_spi_driver(struct device_driver *drv) return drv ? container_of(drv, struct spi_driver, driver) : NULL; } -extern int spi_register_driver(struct spi_driver *sdrv); +extern int __spi_register_driver(struct module *owner, struct spi_driver *sdrv); /** * spi_unregister_driver - reverse effect of spi_register_driver @@ -263,6 +263,10 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) driver_unregister(&sdrv->driver); } +/* use a define to avoid include chaining to get THIS_MODULE */ +#define spi_register_driver(driver) \ + __spi_register_driver(THIS_MODULE, driver) + /** * module_spi_driver() - Helper macro for registering a SPI driver * @__spi_driver: spi_driver struct -- cgit v1.2.3 From 5523662edd4fe937267053c2018b75be2ac17860 Mon Sep 17 00:00:00 2001 From: Stephen Chandler Paul Date: Sat, 24 Oct 2015 13:10:29 -0700 Subject: Input: add userio module Debugging input devices, specifically laptop touchpads, can be tricky without having the physical device handy. Here we try to remedy that with userio. This module allows an application to connect to a character device provided by the kernel, and emulate any serio device. In combination with userspace programs that can record PS/2 devices and replay them through the /dev/userio device, this allows developers to debug driver issues on the PS/2 level with devices simply by requesting a recording from the user experiencing the issue without having to have the physical hardware in front of them. Signed-off-by: Stephen Chandler Paul Reviewed-by: David Herrmann Signed-off-by: Dmitry Torokhov --- include/linux/miscdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/miscdevice.h b/include/linux/miscdevice.h index 81f6e427ba6b..543037465973 100644 --- a/include/linux/miscdevice.h +++ b/include/linux/miscdevice.h @@ -49,6 +49,7 @@ #define LOOP_CTRL_MINOR 237 #define VHOST_NET_MINOR 238 #define UHID_MINOR 239 +#define USERIO_MINOR 240 #define MISC_DYNAMIC_MINOR 255 struct device; -- cgit v1.2.3 From fe56b9e6a8d957d6a20729d626027f800c17a2da Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 26 Oct 2015 11:02:25 +0200 Subject: qed: Add module with basic common support The Qlogic Everest Driver is the backend module for the QL4xxx ethernet products by Qlogic. This module serves two main purposes: 1. It's responsible to contain all the common code that will be shared between the various drivers that would be used with said line of products. Flows such as chip initialization and de-initialization fall under this category. 2. It would abstract the protocol-specific HW & FW components, allowing the protocol drivers to have a clean APIs which is detached in its slowpath configuration from the actual HSI. This adds a very basic module without any protocol-specific bits. I.e., this adds a basic implementation that almost entirely falls under the first category. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/common_hsi.h | 607 +++++++++++++++++++++++++++++++++++++++++ include/linux/qed/qed_chain.h | 539 ++++++++++++++++++++++++++++++++++++ include/linux/qed/qed_if.h | 498 +++++++++++++++++++++++++++++++++ 3 files changed, 1644 insertions(+) create mode 100644 include/linux/qed/common_hsi.h create mode 100644 include/linux/qed/qed_chain.h create mode 100644 include/linux/qed/qed_if.h (limited to 'include/linux') diff --git a/include/linux/qed/common_hsi.h b/include/linux/qed/common_hsi.h new file mode 100644 index 000000000000..6a4347639c03 --- /dev/null +++ b/include/linux/qed/common_hsi.h @@ -0,0 +1,607 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef __COMMON_HSI__ +#define __COMMON_HSI__ + +#define FW_MAJOR_VERSION 8 +#define FW_MINOR_VERSION 4 +#define FW_REVISION_VERSION 2 +#define FW_ENGINEERING_VERSION 0 + +/***********************/ +/* COMMON HW CONSTANTS */ +/***********************/ + +/* PCI functions */ +#define MAX_NUM_PORTS_K2 (4) +#define MAX_NUM_PORTS_BB (2) +#define MAX_NUM_PORTS (MAX_NUM_PORTS_K2) + +#define MAX_NUM_PFS_K2 (16) +#define MAX_NUM_PFS_BB (8) +#define MAX_NUM_PFS (MAX_NUM_PFS_K2) +#define MAX_NUM_OF_PFS_IN_CHIP (16) /* On both engines */ + +#define MAX_NUM_VFS_K2 (192) +#define MAX_NUM_VFS_BB (120) +#define MAX_NUM_VFS (MAX_NUM_VFS_K2) + +#define MAX_NUM_FUNCTIONS_BB (MAX_NUM_PFS_BB + MAX_NUM_VFS_BB) +#define MAX_NUM_FUNCTIONS (MAX_NUM_PFS + MAX_NUM_VFS) + +#define MAX_FUNCTION_NUMBER_BB (MAX_NUM_PFS + MAX_NUM_VFS_BB) +#define MAX_FUNCTION_NUMBER (MAX_NUM_PFS + MAX_NUM_VFS) + +#define MAX_NUM_VPORTS_K2 (208) +#define MAX_NUM_VPORTS_BB (160) +#define MAX_NUM_VPORTS (MAX_NUM_VPORTS_K2) + +#define MAX_NUM_L2_QUEUES_K2 (320) +#define MAX_NUM_L2_QUEUES_BB (256) +#define MAX_NUM_L2_QUEUES (MAX_NUM_L2_QUEUES_K2) + +/* Traffic classes in network-facing blocks (PBF, BTB, NIG, BRB, PRS and QM) */ +#define NUM_PHYS_TCS_4PORT_K2 (4) +#define NUM_OF_PHYS_TCS (8) + +#define NUM_TCS_4PORT_K2 (NUM_PHYS_TCS_4PORT_K2 + 1) +#define NUM_OF_TCS (NUM_OF_PHYS_TCS + 1) + +#define LB_TC (NUM_OF_PHYS_TCS) + +/* Num of possible traffic priority values */ +#define NUM_OF_PRIO (8) + +#define MAX_NUM_VOQS_K2 (NUM_TCS_4PORT_K2 * MAX_NUM_PORTS_K2) +#define MAX_NUM_VOQS_BB (NUM_OF_TCS * MAX_NUM_PORTS_BB) +#define MAX_NUM_VOQS (MAX_NUM_VOQS_K2) +#define MAX_PHYS_VOQS (NUM_OF_PHYS_TCS * MAX_NUM_PORTS_BB) + +/* CIDs */ +#define NUM_OF_CONNECTION_TYPES (8) +#define NUM_OF_LCIDS (320) +#define NUM_OF_LTIDS (320) + +/*****************/ +/* CDU CONSTANTS */ +/*****************/ + +#define CDU_SEG_TYPE_OFFSET_REG_TYPE_SHIFT (17) +#define CDU_SEG_TYPE_OFFSET_REG_OFFSET_MASK (0x1ffff) + +/*****************/ +/* DQ CONSTANTS */ +/*****************/ + +/* DEMS */ +#define DQ_DEMS_LEGACY 0 + +/* XCM agg val selection */ +#define DQ_XCM_AGG_VAL_SEL_WORD2 0 +#define DQ_XCM_AGG_VAL_SEL_WORD3 1 +#define DQ_XCM_AGG_VAL_SEL_WORD4 2 +#define DQ_XCM_AGG_VAL_SEL_WORD5 3 +#define DQ_XCM_AGG_VAL_SEL_REG3 4 +#define DQ_XCM_AGG_VAL_SEL_REG4 5 +#define DQ_XCM_AGG_VAL_SEL_REG5 6 +#define DQ_XCM_AGG_VAL_SEL_REG6 7 + +/* XCM agg val selection */ +#define DQ_XCM_ETH_EDPM_NUM_BDS_CMD \ + DQ_XCM_AGG_VAL_SEL_WORD2 +#define DQ_XCM_ETH_TX_BD_CONS_CMD \ + DQ_XCM_AGG_VAL_SEL_WORD3 +#define DQ_XCM_CORE_TX_BD_CONS_CMD \ + DQ_XCM_AGG_VAL_SEL_WORD3 +#define DQ_XCM_ETH_TX_BD_PROD_CMD \ + DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_CORE_TX_BD_PROD_CMD \ + DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_CORE_SPQ_PROD_CMD \ + DQ_XCM_AGG_VAL_SEL_WORD4 +#define DQ_XCM_ETH_GO_TO_BD_CONS_CMD DQ_XCM_AGG_VAL_SEL_WORD5 + +/* XCM agg counter flag selection */ +#define DQ_XCM_AGG_FLG_SHIFT_BIT14 0 +#define DQ_XCM_AGG_FLG_SHIFT_BIT15 1 +#define DQ_XCM_AGG_FLG_SHIFT_CF12 2 +#define DQ_XCM_AGG_FLG_SHIFT_CF13 3 +#define DQ_XCM_AGG_FLG_SHIFT_CF18 4 +#define DQ_XCM_AGG_FLG_SHIFT_CF19 5 +#define DQ_XCM_AGG_FLG_SHIFT_CF22 6 +#define DQ_XCM_AGG_FLG_SHIFT_CF23 7 + +/* XCM agg counter flag selection */ +#define DQ_XCM_ETH_DQ_CF_CMD (1 << \ + DQ_XCM_AGG_FLG_SHIFT_CF18) +#define DQ_XCM_CORE_DQ_CF_CMD (1 << \ + DQ_XCM_AGG_FLG_SHIFT_CF18) +#define DQ_XCM_ETH_TERMINATE_CMD (1 << \ + DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_CORE_TERMINATE_CMD (1 << \ + DQ_XCM_AGG_FLG_SHIFT_CF19) +#define DQ_XCM_ETH_SLOW_PATH_CMD (1 << \ + DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_CORE_SLOW_PATH_CMD (1 << \ + DQ_XCM_AGG_FLG_SHIFT_CF22) +#define DQ_XCM_ETH_TPH_EN_CMD (1 << \ + DQ_XCM_AGG_FLG_SHIFT_CF23) + +/*****************/ +/* QM CONSTANTS */ +/*****************/ + +/* number of TX queues in the QM */ +#define MAX_QM_TX_QUEUES_K2 512 +#define MAX_QM_TX_QUEUES_BB 448 +#define MAX_QM_TX_QUEUES MAX_QM_TX_QUEUES_K2 + +/* number of Other queues in the QM */ +#define MAX_QM_OTHER_QUEUES_BB 64 +#define MAX_QM_OTHER_QUEUES_K2 128 +#define MAX_QM_OTHER_QUEUES MAX_QM_OTHER_QUEUES_K2 + +/* number of queues in a PF queue group */ +#define QM_PF_QUEUE_GROUP_SIZE 8 + +/* base number of Tx PQs in the CM PQ representation. + * should be used when storing PQ IDs in CM PQ registers and context + */ +#define CM_TX_PQ_BASE 0x200 + +/* QM registers data */ +#define QM_LINE_CRD_REG_WIDTH 16 +#define QM_LINE_CRD_REG_SIGN_BIT (1 << (QM_LINE_CRD_REG_WIDTH - 1)) +#define QM_BYTE_CRD_REG_WIDTH 24 +#define QM_BYTE_CRD_REG_SIGN_BIT (1 << (QM_BYTE_CRD_REG_WIDTH - 1)) +#define QM_WFQ_CRD_REG_WIDTH 32 +#define QM_WFQ_CRD_REG_SIGN_BIT (1 << (QM_WFQ_CRD_REG_WIDTH - 1)) +#define QM_RL_CRD_REG_WIDTH 32 +#define QM_RL_CRD_REG_SIGN_BIT (1 << (QM_RL_CRD_REG_WIDTH - 1)) + +/*****************/ +/* CAU CONSTANTS */ +/*****************/ + +#define CAU_FSM_ETH_RX 0 +#define CAU_FSM_ETH_TX 1 + +/* Number of Protocol Indices per Status Block */ +#define PIS_PER_SB 12 + +#define CAU_HC_STOPPED_STATE 3 +#define CAU_HC_DISABLE_STATE 4 +#define CAU_HC_ENABLE_STATE 0 + +/*****************/ +/* IGU CONSTANTS */ +/*****************/ + +#define MAX_SB_PER_PATH_K2 (368) +#define MAX_SB_PER_PATH_BB (288) +#define MAX_TOT_SB_PER_PATH \ + MAX_SB_PER_PATH_K2 + +#define MAX_SB_PER_PF_MIMD 129 +#define MAX_SB_PER_PF_SIMD 64 +#define MAX_SB_PER_VF 64 + +/* Memory addresses on the BAR for the IGU Sub Block */ +#define IGU_MEM_BASE 0x0000 + +#define IGU_MEM_MSIX_BASE 0x0000 +#define IGU_MEM_MSIX_UPPER 0x0101 +#define IGU_MEM_MSIX_RESERVED_UPPER 0x01ff + +#define IGU_MEM_PBA_MSIX_BASE 0x0200 +#define IGU_MEM_PBA_MSIX_UPPER 0x0202 +#define IGU_MEM_PBA_MSIX_RESERVED_UPPER 0x03ff + +#define IGU_CMD_INT_ACK_BASE 0x0400 +#define IGU_CMD_INT_ACK_UPPER (IGU_CMD_INT_ACK_BASE + \ + MAX_TOT_SB_PER_PATH - \ + 1) +#define IGU_CMD_INT_ACK_RESERVED_UPPER 0x05ff + +#define IGU_CMD_ATTN_BIT_UPD_UPPER 0x05f0 +#define IGU_CMD_ATTN_BIT_SET_UPPER 0x05f1 +#define IGU_CMD_ATTN_BIT_CLR_UPPER 0x05f2 + +#define IGU_REG_SISR_MDPC_WMASK_UPPER 0x05f3 +#define IGU_REG_SISR_MDPC_WMASK_LSB_UPPER 0x05f4 +#define IGU_REG_SISR_MDPC_WMASK_MSB_UPPER 0x05f5 +#define IGU_REG_SISR_MDPC_WOMASK_UPPER 0x05f6 + +#define IGU_CMD_PROD_UPD_BASE 0x0600 +#define IGU_CMD_PROD_UPD_UPPER (IGU_CMD_PROD_UPD_BASE +\ + MAX_TOT_SB_PER_PATH - \ + 1) +#define IGU_CMD_PROD_UPD_RESERVED_UPPER 0x07ff + +/*****************/ +/* PXP CONSTANTS */ +/*****************/ + +/* PTT and GTT */ +#define PXP_NUM_PF_WINDOWS 12 +#define PXP_PER_PF_ENTRY_SIZE 8 +#define PXP_NUM_GLOBAL_WINDOWS 243 +#define PXP_GLOBAL_ENTRY_SIZE 4 +#define PXP_ADMIN_WINDOW_ALLOWED_LENGTH 4 +#define PXP_PF_WINDOW_ADMIN_START 0 +#define PXP_PF_WINDOW_ADMIN_LENGTH 0x1000 +#define PXP_PF_WINDOW_ADMIN_END (PXP_PF_WINDOW_ADMIN_START + \ + PXP_PF_WINDOW_ADMIN_LENGTH - 1) +#define PXP_PF_WINDOW_ADMIN_PER_PF_START 0 +#define PXP_PF_WINDOW_ADMIN_PER_PF_LENGTH (PXP_NUM_PF_WINDOWS * \ + PXP_PER_PF_ENTRY_SIZE) +#define PXP_PF_WINDOW_ADMIN_PER_PF_END (PXP_PF_WINDOW_ADMIN_PER_PF_START + \ + PXP_PF_WINDOW_ADMIN_PER_PF_LENGTH - 1) +#define PXP_PF_WINDOW_ADMIN_GLOBAL_START 0x200 +#define PXP_PF_WINDOW_ADMIN_GLOBAL_LENGTH (PXP_NUM_GLOBAL_WINDOWS * \ + PXP_GLOBAL_ENTRY_SIZE) +#define PXP_PF_WINDOW_ADMIN_GLOBAL_END \ + (PXP_PF_WINDOW_ADMIN_GLOBAL_START + \ + PXP_PF_WINDOW_ADMIN_GLOBAL_LENGTH - 1) +#define PXP_PF_GLOBAL_PRETEND_ADDR 0x1f0 +#define PXP_PF_ME_OPAQUE_MASK_ADDR 0xf4 +#define PXP_PF_ME_OPAQUE_ADDR 0x1f8 +#define PXP_PF_ME_CONCRETE_ADDR 0x1fc + +#define PXP_EXTERNAL_BAR_PF_WINDOW_START 0x1000 +#define PXP_EXTERNAL_BAR_PF_WINDOW_NUM PXP_NUM_PF_WINDOWS +#define PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE 0x1000 +#define PXP_EXTERNAL_BAR_PF_WINDOW_LENGTH \ + (PXP_EXTERNAL_BAR_PF_WINDOW_NUM * \ + PXP_EXTERNAL_BAR_PF_WINDOW_SINGLE_SIZE) +#define PXP_EXTERNAL_BAR_PF_WINDOW_END \ + (PXP_EXTERNAL_BAR_PF_WINDOW_START + \ + PXP_EXTERNAL_BAR_PF_WINDOW_LENGTH - 1) + +#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START \ + (PXP_EXTERNAL_BAR_PF_WINDOW_END + 1) +#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_NUM PXP_NUM_GLOBAL_WINDOWS +#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_SINGLE_SIZE 0x1000 +#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH \ + (PXP_EXTERNAL_BAR_GLOBAL_WINDOW_NUM * \ + PXP_EXTERNAL_BAR_GLOBAL_WINDOW_SINGLE_SIZE) +#define PXP_EXTERNAL_BAR_GLOBAL_WINDOW_END \ + (PXP_EXTERNAL_BAR_GLOBAL_WINDOW_START + \ + PXP_EXTERNAL_BAR_GLOBAL_WINDOW_LENGTH - 1) + +#define PXP_ILT_PAGE_SIZE_NUM_BITS_MIN 12 +#define PXP_ILT_BLOCK_FACTOR_MULTIPLIER 1024 + +/* ILT Records */ +#define PXP_NUM_ILT_RECORDS_BB 7600 +#define PXP_NUM_ILT_RECORDS_K2 11000 +#define MAX_NUM_ILT_RECORDS MAX(PXP_NUM_ILT_RECORDS_BB, PXP_NUM_ILT_RECORDS_K2) + +/******************/ +/* PBF CONSTANTS */ +/******************/ + +/* Number of PBF command queue lines. Each line is 32B. */ +#define PBF_MAX_CMD_LINES 3328 + +/* Number of BTB blocks. Each block is 256B. */ +#define BTB_MAX_BLOCKS 1440 + +/*****************/ +/* PRS CONSTANTS */ +/*****************/ + +/* Async data KCQ CQE */ +struct async_data { + __le32 cid; + __le16 itid; + u8 error_code; + u8 fw_debug_param; +}; + +struct regpair { + __le32 lo; + __le32 hi; +}; + +/* Event Data Union */ +union event_ring_data { + u8 bytes[8]; + struct async_data async_info; +}; + +/* Event Ring Entry */ +struct event_ring_entry { + u8 protocol_id; + u8 opcode; + __le16 reserved0; + __le16 echo; + u8 fw_return_code; + u8 flags; +#define EVENT_RING_ENTRY_ASYNC_MASK 0x1 +#define EVENT_RING_ENTRY_ASYNC_SHIFT 0 +#define EVENT_RING_ENTRY_RESERVED1_MASK 0x7F +#define EVENT_RING_ENTRY_RESERVED1_SHIFT 1 + union event_ring_data data; +}; + +/* Multi function mode */ +enum mf_mode { + SF, + MF_OVLAN, + MF_NPAR, + MAX_MF_MODE +}; + +/* Per-protocol connection types */ +enum protocol_type { + PROTOCOLID_RESERVED1, + PROTOCOLID_RESERVED2, + PROTOCOLID_RESERVED3, + PROTOCOLID_CORE, + PROTOCOLID_ETH, + PROTOCOLID_RESERVED4, + PROTOCOLID_RESERVED5, + PROTOCOLID_PREROCE, + PROTOCOLID_COMMON, + PROTOCOLID_RESERVED6, + MAX_PROTOCOL_TYPE +}; + +/* status block structure */ +struct cau_pi_entry { + u32 prod; +#define CAU_PI_ENTRY_PROD_VAL_MASK 0xFFFF +#define CAU_PI_ENTRY_PROD_VAL_SHIFT 0 +#define CAU_PI_ENTRY_PI_TIMESET_MASK 0x7F +#define CAU_PI_ENTRY_PI_TIMESET_SHIFT 16 +#define CAU_PI_ENTRY_FSM_SEL_MASK 0x1 +#define CAU_PI_ENTRY_FSM_SEL_SHIFT 23 +#define CAU_PI_ENTRY_RESERVED_MASK 0xFF +#define CAU_PI_ENTRY_RESERVED_SHIFT 24 +}; + +/* status block structure */ +struct cau_sb_entry { + u32 data; +#define CAU_SB_ENTRY_SB_PROD_MASK 0xFFFFFF +#define CAU_SB_ENTRY_SB_PROD_SHIFT 0 +#define CAU_SB_ENTRY_STATE0_MASK 0xF +#define CAU_SB_ENTRY_STATE0_SHIFT 24 +#define CAU_SB_ENTRY_STATE1_MASK 0xF +#define CAU_SB_ENTRY_STATE1_SHIFT 28 + u32 params; +#define CAU_SB_ENTRY_SB_TIMESET0_MASK 0x7F +#define CAU_SB_ENTRY_SB_TIMESET0_SHIFT 0 +#define CAU_SB_ENTRY_SB_TIMESET1_MASK 0x7F +#define CAU_SB_ENTRY_SB_TIMESET1_SHIFT 7 +#define CAU_SB_ENTRY_TIMER_RES0_MASK 0x3 +#define CAU_SB_ENTRY_TIMER_RES0_SHIFT 14 +#define CAU_SB_ENTRY_TIMER_RES1_MASK 0x3 +#define CAU_SB_ENTRY_TIMER_RES1_SHIFT 16 +#define CAU_SB_ENTRY_VF_NUMBER_MASK 0xFF +#define CAU_SB_ENTRY_VF_NUMBER_SHIFT 18 +#define CAU_SB_ENTRY_VF_VALID_MASK 0x1 +#define CAU_SB_ENTRY_VF_VALID_SHIFT 26 +#define CAU_SB_ENTRY_PF_NUMBER_MASK 0xF +#define CAU_SB_ENTRY_PF_NUMBER_SHIFT 27 +#define CAU_SB_ENTRY_TPH_MASK 0x1 +#define CAU_SB_ENTRY_TPH_SHIFT 31 +}; + +/* core doorbell data */ +struct core_db_data { + u8 params; +#define CORE_DB_DATA_DEST_MASK 0x3 +#define CORE_DB_DATA_DEST_SHIFT 0 +#define CORE_DB_DATA_AGG_CMD_MASK 0x3 +#define CORE_DB_DATA_AGG_CMD_SHIFT 2 +#define CORE_DB_DATA_BYPASS_EN_MASK 0x1 +#define CORE_DB_DATA_BYPASS_EN_SHIFT 4 +#define CORE_DB_DATA_RESERVED_MASK 0x1 +#define CORE_DB_DATA_RESERVED_SHIFT 5 +#define CORE_DB_DATA_AGG_VAL_SEL_MASK 0x3 +#define CORE_DB_DATA_AGG_VAL_SEL_SHIFT 6 + u8 agg_flags; + __le16 spq_prod; +}; + +/* Enum of doorbell aggregative command selection */ +enum db_agg_cmd_sel { + DB_AGG_CMD_NOP, + DB_AGG_CMD_SET, + DB_AGG_CMD_ADD, + DB_AGG_CMD_MAX, + MAX_DB_AGG_CMD_SEL +}; + +/* Enum of doorbell destination */ +enum db_dest { + DB_DEST_XCM, + DB_DEST_UCM, + DB_DEST_TCM, + DB_NUM_DESTINATIONS, + MAX_DB_DEST +}; + +/* Structure for doorbell address, in legacy mode */ +struct db_legacy_addr { + __le32 addr; +#define DB_LEGACY_ADDR_RESERVED0_MASK 0x3 +#define DB_LEGACY_ADDR_RESERVED0_SHIFT 0 +#define DB_LEGACY_ADDR_DEMS_MASK 0x7 +#define DB_LEGACY_ADDR_DEMS_SHIFT 2 +#define DB_LEGACY_ADDR_ICID_MASK 0x7FFFFFF +#define DB_LEGACY_ADDR_ICID_SHIFT 5 +}; + +/* Igu interrupt command */ +enum igu_int_cmd { + IGU_INT_ENABLE = 0, + IGU_INT_DISABLE = 1, + IGU_INT_NOP = 2, + IGU_INT_NOP2 = 3, + MAX_IGU_INT_CMD +}; + +/* IGU producer or consumer update command */ +struct igu_prod_cons_update { + u32 sb_id_and_flags; +#define IGU_PROD_CONS_UPDATE_SB_INDEX_MASK 0xFFFFFF +#define IGU_PROD_CONS_UPDATE_SB_INDEX_SHIFT 0 +#define IGU_PROD_CONS_UPDATE_UPDATE_FLAG_MASK 0x1 +#define IGU_PROD_CONS_UPDATE_UPDATE_FLAG_SHIFT 24 +#define IGU_PROD_CONS_UPDATE_ENABLE_INT_MASK 0x3 +#define IGU_PROD_CONS_UPDATE_ENABLE_INT_SHIFT 25 +#define IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_MASK 0x1 +#define IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_SHIFT 27 +#define IGU_PROD_CONS_UPDATE_TIMER_MASK_MASK 0x1 +#define IGU_PROD_CONS_UPDATE_TIMER_MASK_SHIFT 28 +#define IGU_PROD_CONS_UPDATE_RESERVED0_MASK 0x3 +#define IGU_PROD_CONS_UPDATE_RESERVED0_SHIFT 29 +#define IGU_PROD_CONS_UPDATE_COMMAND_TYPE_MASK 0x1 +#define IGU_PROD_CONS_UPDATE_COMMAND_TYPE_SHIFT 31 + u32 reserved1; +}; + +/* Igu segments access for default status block only */ +enum igu_seg_access { + IGU_SEG_ACCESS_REG = 0, + IGU_SEG_ACCESS_ATTN = 1, + MAX_IGU_SEG_ACCESS +}; + +struct parsing_and_err_flags { + __le16 flags; +#define PARSING_AND_ERR_FLAGS_L3TYPE_MASK 0x3 +#define PARSING_AND_ERR_FLAGS_L3TYPE_SHIFT 0 +#define PARSING_AND_ERR_FLAGS_L4PROTOCOL_MASK 0x3 +#define PARSING_AND_ERR_FLAGS_L4PROTOCOL_SHIFT 2 +#define PARSING_AND_ERR_FLAGS_IPV4FRAG_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_IPV4FRAG_SHIFT 4 +#define PARSING_AND_ERR_FLAGS_TAG8021QEXIST_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_TAG8021QEXIST_SHIFT 5 +#define PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_L4CHKSMWASCALCULATED_SHIFT 6 +#define PARSING_AND_ERR_FLAGS_TIMESYNCPKT_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_TIMESYNCPKT_SHIFT 7 +#define PARSING_AND_ERR_FLAGS_TIMESTAMPRECORDED_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_TIMESTAMPRECORDED_SHIFT 8 +#define PARSING_AND_ERR_FLAGS_IPHDRERROR_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_IPHDRERROR_SHIFT 9 +#define PARSING_AND_ERR_FLAGS_L4CHKSMERROR_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_L4CHKSMERROR_SHIFT 10 +#define PARSING_AND_ERR_FLAGS_TUNNELEXIST_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_TUNNELEXIST_SHIFT 11 +#define PARSING_AND_ERR_FLAGS_TUNNEL8021QTAGEXIST_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_TUNNEL8021QTAGEXIST_SHIFT 12 +#define PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_TUNNELIPHDRERROR_SHIFT 13 +#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMWASCALCULATED_SHIFT 14 +#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_MASK 0x1 +#define PARSING_AND_ERR_FLAGS_TUNNELL4CHKSMERROR_SHIFT 15 +}; + +/* Concrete Function ID. */ +struct pxp_concrete_fid { + __le16 fid; +#define PXP_CONCRETE_FID_PFID_MASK 0xF +#define PXP_CONCRETE_FID_PFID_SHIFT 0 +#define PXP_CONCRETE_FID_PORT_MASK 0x3 +#define PXP_CONCRETE_FID_PORT_SHIFT 4 +#define PXP_CONCRETE_FID_PATH_MASK 0x1 +#define PXP_CONCRETE_FID_PATH_SHIFT 6 +#define PXP_CONCRETE_FID_VFVALID_MASK 0x1 +#define PXP_CONCRETE_FID_VFVALID_SHIFT 7 +#define PXP_CONCRETE_FID_VFID_MASK 0xFF +#define PXP_CONCRETE_FID_VFID_SHIFT 8 +}; + +struct pxp_pretend_concrete_fid { + __le16 fid; +#define PXP_PRETEND_CONCRETE_FID_PFID_MASK 0xF +#define PXP_PRETEND_CONCRETE_FID_PFID_SHIFT 0 +#define PXP_PRETEND_CONCRETE_FID_RESERVED_MASK 0x7 +#define PXP_PRETEND_CONCRETE_FID_RESERVED_SHIFT 4 +#define PXP_PRETEND_CONCRETE_FID_VFVALID_MASK 0x1 +#define PXP_PRETEND_CONCRETE_FID_VFVALID_SHIFT 7 +#define PXP_PRETEND_CONCRETE_FID_VFID_MASK 0xFF +#define PXP_PRETEND_CONCRETE_FID_VFID_SHIFT 8 +}; + +union pxp_pretend_fid { + struct pxp_pretend_concrete_fid concrete_fid; + __le16 opaque_fid; +}; + +/* Pxp Pretend Command Register. */ +struct pxp_pretend_cmd { + union pxp_pretend_fid fid; + __le16 control; +#define PXP_PRETEND_CMD_PATH_MASK 0x1 +#define PXP_PRETEND_CMD_PATH_SHIFT 0 +#define PXP_PRETEND_CMD_USE_PORT_MASK 0x1 +#define PXP_PRETEND_CMD_USE_PORT_SHIFT 1 +#define PXP_PRETEND_CMD_PORT_MASK 0x3 +#define PXP_PRETEND_CMD_PORT_SHIFT 2 +#define PXP_PRETEND_CMD_RESERVED0_MASK 0xF +#define PXP_PRETEND_CMD_RESERVED0_SHIFT 4 +#define PXP_PRETEND_CMD_RESERVED1_MASK 0xF +#define PXP_PRETEND_CMD_RESERVED1_SHIFT 8 +#define PXP_PRETEND_CMD_PRETEND_PATH_MASK 0x1 +#define PXP_PRETEND_CMD_PRETEND_PATH_SHIFT 12 +#define PXP_PRETEND_CMD_PRETEND_PORT_MASK 0x1 +#define PXP_PRETEND_CMD_PRETEND_PORT_SHIFT 13 +#define PXP_PRETEND_CMD_PRETEND_FUNCTION_MASK 0x1 +#define PXP_PRETEND_CMD_PRETEND_FUNCTION_SHIFT 14 +#define PXP_PRETEND_CMD_IS_CONCRETE_MASK 0x1 +#define PXP_PRETEND_CMD_IS_CONCRETE_SHIFT 15 +}; + +/* PTT Record in PXP Admin Window. */ +struct pxp_ptt_entry { + __le32 offset; +#define PXP_PTT_ENTRY_OFFSET_MASK 0x7FFFFF +#define PXP_PTT_ENTRY_OFFSET_SHIFT 0 +#define PXP_PTT_ENTRY_RESERVED0_MASK 0x1FF +#define PXP_PTT_ENTRY_RESERVED0_SHIFT 23 + struct pxp_pretend_cmd pretend; +}; + +/* RSS hash type */ +enum rss_hash_type { + RSS_HASH_TYPE_DEFAULT = 0, + RSS_HASH_TYPE_IPV4 = 1, + RSS_HASH_TYPE_TCP_IPV4 = 2, + RSS_HASH_TYPE_IPV6 = 3, + RSS_HASH_TYPE_TCP_IPV6 = 4, + RSS_HASH_TYPE_UDP_IPV4 = 5, + RSS_HASH_TYPE_UDP_IPV6 = 6, + MAX_RSS_HASH_TYPE +}; + +/* status block structure */ +struct status_block { + __le16 pi_array[PIS_PER_SB]; + __le32 sb_num; +#define STATUS_BLOCK_SB_NUM_MASK 0x1FF +#define STATUS_BLOCK_SB_NUM_SHIFT 0 +#define STATUS_BLOCK_ZERO_PAD_MASK 0x7F +#define STATUS_BLOCK_ZERO_PAD_SHIFT 9 +#define STATUS_BLOCK_ZERO_PAD2_MASK 0xFFFF +#define STATUS_BLOCK_ZERO_PAD2_SHIFT 16 + __le32 prod_index; +#define STATUS_BLOCK_PROD_INDEX_MASK 0xFFFFFF +#define STATUS_BLOCK_PROD_INDEX_SHIFT 0 +#define STATUS_BLOCK_ZERO_PAD3_MASK 0xFF +#define STATUS_BLOCK_ZERO_PAD3_SHIFT 24 +}; + +#endif /* __COMMON_HSI__ */ diff --git a/include/linux/qed/qed_chain.h b/include/linux/qed/qed_chain.h new file mode 100644 index 000000000000..b920c3605c46 --- /dev/null +++ b/include/linux/qed/qed_chain.h @@ -0,0 +1,539 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_CHAIN_H +#define _QED_CHAIN_H + +#include +#include +#include +#include +#include +#include + +/* dma_addr_t manip */ +#define DMA_LO_LE(x) cpu_to_le32(lower_32_bits(x)) +#define DMA_HI_LE(x) cpu_to_le32(upper_32_bits(x)) + +#define HILO_GEN(hi, lo, type) ((((type)(hi)) << 32) + (lo)) +#define HILO_DMA(hi, lo) HILO_GEN(hi, lo, dma_addr_t) +#define HILO_64(hi, lo) HILO_GEN((le32_to_cpu(hi)), (le32_to_cpu(lo)), u64) +#define HILO_DMA_REGPAIR(regpair) (HILO_DMA(regpair.hi, regpair.lo)) +#define HILO_64_REGPAIR(regpair) (HILO_64(regpair.hi, regpair.lo)) + +enum qed_chain_mode { + /* Each Page contains a next pointer at its end */ + QED_CHAIN_MODE_NEXT_PTR, + + /* Chain is a single page (next ptr) is unrequired */ + QED_CHAIN_MODE_SINGLE, + + /* Page pointers are located in a side list */ + QED_CHAIN_MODE_PBL, +}; + +enum qed_chain_use_mode { + QED_CHAIN_USE_TO_PRODUCE, /* Chain starts empty */ + QED_CHAIN_USE_TO_CONSUME, /* Chain starts full */ + QED_CHAIN_USE_TO_CONSUME_PRODUCE, /* Chain starts empty */ +}; + +struct qed_chain_next { + struct regpair next_phys; + void *next_virt; +}; + +struct qed_chain_pbl { + dma_addr_t p_phys_table; + void *p_virt_table; + u16 prod_page_idx; + u16 cons_page_idx; +}; + +struct qed_chain { + void *p_virt_addr; + dma_addr_t p_phys_addr; + void *p_prod_elem; + void *p_cons_elem; + u16 page_cnt; + enum qed_chain_mode mode; + enum qed_chain_use_mode intended_use; /* used to produce/consume */ + u16 capacity; /*< number of _usable_ elements */ + u16 size; /* number of elements */ + u16 prod_idx; + u16 cons_idx; + u16 elem_per_page; + u16 elem_per_page_mask; + u16 elem_unusable; + u16 usable_per_page; + u16 elem_size; + u16 next_page_mask; + struct qed_chain_pbl pbl; +}; + +#define QED_CHAIN_PBL_ENTRY_SIZE (8) +#define QED_CHAIN_PAGE_SIZE (0x1000) +#define ELEMS_PER_PAGE(elem_size) (QED_CHAIN_PAGE_SIZE / (elem_size)) + +#define UNUSABLE_ELEMS_PER_PAGE(elem_size, mode) \ + ((mode == QED_CHAIN_MODE_NEXT_PTR) ? \ + (1 + ((sizeof(struct qed_chain_next) - 1) / \ + (elem_size))) : 0) + +#define USABLE_ELEMS_PER_PAGE(elem_size, mode) \ + ((u32)(ELEMS_PER_PAGE(elem_size) - \ + UNUSABLE_ELEMS_PER_PAGE(elem_size, mode))) + +#define QED_CHAIN_PAGE_CNT(elem_cnt, elem_size, mode) \ + DIV_ROUND_UP(elem_cnt, USABLE_ELEMS_PER_PAGE(elem_size, mode)) + +/* Accessors */ +static inline u16 qed_chain_get_prod_idx(struct qed_chain *p_chain) +{ + return p_chain->prod_idx; +} + +static inline u16 qed_chain_get_cons_idx(struct qed_chain *p_chain) +{ + return p_chain->cons_idx; +} + +static inline u16 qed_chain_get_elem_left(struct qed_chain *p_chain) +{ + u16 used; + + /* we don't need to trancate upon assignmet, as we assign u32->u16 */ + used = ((u32)0x10000u + (u32)(p_chain->prod_idx)) - + (u32)p_chain->cons_idx; + if (p_chain->mode == QED_CHAIN_MODE_NEXT_PTR) + used -= (used / p_chain->elem_per_page); + + return p_chain->capacity - used; +} + +static inline u8 qed_chain_is_full(struct qed_chain *p_chain) +{ + return qed_chain_get_elem_left(p_chain) == p_chain->capacity; +} + +static inline u8 qed_chain_is_empty(struct qed_chain *p_chain) +{ + return qed_chain_get_elem_left(p_chain) == 0; +} + +static inline u16 qed_chain_get_elem_per_page( + struct qed_chain *p_chain) +{ + return p_chain->elem_per_page; +} + +static inline u16 qed_chain_get_usable_per_page( + struct qed_chain *p_chain) +{ + return p_chain->usable_per_page; +} + +static inline u16 qed_chain_get_unusable_per_page( + struct qed_chain *p_chain) +{ + return p_chain->elem_unusable; +} + +static inline u16 qed_chain_get_size(struct qed_chain *p_chain) +{ + return p_chain->size; +} + +static inline dma_addr_t +qed_chain_get_pbl_phys(struct qed_chain *p_chain) +{ + return p_chain->pbl.p_phys_table; +} + +/** + * @brief qed_chain_advance_page - + * + * Advance the next element accros pages for a linked chain + * + * @param p_chain + * @param p_next_elem + * @param idx_to_inc + * @param page_to_inc + */ +static inline void +qed_chain_advance_page(struct qed_chain *p_chain, + void **p_next_elem, + u16 *idx_to_inc, + u16 *page_to_inc) + +{ + switch (p_chain->mode) { + case QED_CHAIN_MODE_NEXT_PTR: + { + struct qed_chain_next *p_next = *p_next_elem; + *p_next_elem = p_next->next_virt; + *idx_to_inc += p_chain->elem_unusable; + break; + } + case QED_CHAIN_MODE_SINGLE: + *p_next_elem = p_chain->p_virt_addr; + break; + + case QED_CHAIN_MODE_PBL: + /* It is assumed pages are sequential, next element needs + * to change only when passing going back to first from last. + */ + if (++(*page_to_inc) == p_chain->page_cnt) { + *page_to_inc = 0; + *p_next_elem = p_chain->p_virt_addr; + } + } +} + +#define is_unusable_idx(p, idx) \ + (((p)->idx & (p)->elem_per_page_mask) == (p)->usable_per_page) + +#define is_unusable_next_idx(p, idx) \ + ((((p)->idx + 1) & (p)->elem_per_page_mask) == (p)->usable_per_page) + +#define test_ans_skip(p, idx) \ + do { \ + if (is_unusable_idx(p, idx)) { \ + (p)->idx += (p)->elem_unusable; \ + } \ + } while (0) + +/** + * @brief qed_chain_return_multi_produced - + * + * A chain in which the driver "Produces" elements should use this API + * to indicate previous produced elements are now consumed. + * + * @param p_chain + * @param num + */ +static inline void +qed_chain_return_multi_produced(struct qed_chain *p_chain, + u16 num) +{ + p_chain->cons_idx += num; + test_ans_skip(p_chain, cons_idx); +} + +/** + * @brief qed_chain_return_produced - + * + * A chain in which the driver "Produces" elements should use this API + * to indicate previous produced elements are now consumed. + * + * @param p_chain + */ +static inline void qed_chain_return_produced(struct qed_chain *p_chain) +{ + p_chain->cons_idx++; + test_ans_skip(p_chain, cons_idx); +} + +/** + * @brief qed_chain_produce - + * + * A chain in which the driver "Produces" elements should use this to get + * a pointer to the next element which can be "Produced". It's driver + * responsibility to validate that the chain has room for new element. + * + * @param p_chain + * + * @return void*, a pointer to next element + */ +static inline void *qed_chain_produce(struct qed_chain *p_chain) +{ + void *ret = NULL; + + if ((p_chain->prod_idx & p_chain->elem_per_page_mask) == + p_chain->next_page_mask) { + qed_chain_advance_page(p_chain, &p_chain->p_prod_elem, + &p_chain->prod_idx, + &p_chain->pbl.prod_page_idx); + } + + ret = p_chain->p_prod_elem; + p_chain->prod_idx++; + p_chain->p_prod_elem = (void *)(((u8 *)p_chain->p_prod_elem) + + p_chain->elem_size); + + return ret; +} + +/** + * @brief qed_chain_get_capacity - + * + * Get the maximum number of BDs in chain + * + * @param p_chain + * @param num + * + * @return u16, number of unusable BDs + */ +static inline u16 qed_chain_get_capacity(struct qed_chain *p_chain) +{ + return p_chain->capacity; +} + +/** + * @brief qed_chain_recycle_consumed - + * + * Returns an element which was previously consumed; + * Increments producers so they could be written to FW. + * + * @param p_chain + */ +static inline void +qed_chain_recycle_consumed(struct qed_chain *p_chain) +{ + test_ans_skip(p_chain, prod_idx); + p_chain->prod_idx++; +} + +/** + * @brief qed_chain_consume - + * + * A Chain in which the driver utilizes data written by a different source + * (i.e., FW) should use this to access passed buffers. + * + * @param p_chain + * + * @return void*, a pointer to the next buffer written + */ +static inline void *qed_chain_consume(struct qed_chain *p_chain) +{ + void *ret = NULL; + + if ((p_chain->cons_idx & p_chain->elem_per_page_mask) == + p_chain->next_page_mask) { + qed_chain_advance_page(p_chain, &p_chain->p_cons_elem, + &p_chain->cons_idx, + &p_chain->pbl.cons_page_idx); + } + + ret = p_chain->p_cons_elem; + p_chain->cons_idx++; + p_chain->p_cons_elem = (void *)(((u8 *)p_chain->p_cons_elem) + + p_chain->elem_size); + + return ret; +} + +/** + * @brief qed_chain_reset - Resets the chain to its start state + * + * @param p_chain pointer to a previously allocted chain + */ +static inline void qed_chain_reset(struct qed_chain *p_chain) +{ + int i; + + p_chain->prod_idx = 0; + p_chain->cons_idx = 0; + p_chain->p_cons_elem = p_chain->p_virt_addr; + p_chain->p_prod_elem = p_chain->p_virt_addr; + + if (p_chain->mode == QED_CHAIN_MODE_PBL) { + p_chain->pbl.prod_page_idx = p_chain->page_cnt - 1; + p_chain->pbl.cons_page_idx = p_chain->page_cnt - 1; + } + + switch (p_chain->intended_use) { + case QED_CHAIN_USE_TO_CONSUME_PRODUCE: + case QED_CHAIN_USE_TO_PRODUCE: + /* Do nothing */ + break; + + case QED_CHAIN_USE_TO_CONSUME: + /* produce empty elements */ + for (i = 0; i < p_chain->capacity; i++) + qed_chain_recycle_consumed(p_chain); + break; + } +} + +/** + * @brief qed_chain_init - Initalizes a basic chain struct + * + * @param p_chain + * @param p_virt_addr + * @param p_phys_addr physical address of allocated buffer's beginning + * @param page_cnt number of pages in the allocated buffer + * @param elem_size size of each element in the chain + * @param intended_use + * @param mode + */ +static inline void qed_chain_init(struct qed_chain *p_chain, + void *p_virt_addr, + dma_addr_t p_phys_addr, + u16 page_cnt, + u8 elem_size, + enum qed_chain_use_mode intended_use, + enum qed_chain_mode mode) +{ + /* chain fixed parameters */ + p_chain->p_virt_addr = p_virt_addr; + p_chain->p_phys_addr = p_phys_addr; + p_chain->elem_size = elem_size; + p_chain->page_cnt = page_cnt; + p_chain->mode = mode; + + p_chain->intended_use = intended_use; + p_chain->elem_per_page = ELEMS_PER_PAGE(elem_size); + p_chain->usable_per_page = + USABLE_ELEMS_PER_PAGE(elem_size, mode); + p_chain->capacity = p_chain->usable_per_page * page_cnt; + p_chain->size = p_chain->elem_per_page * page_cnt; + p_chain->elem_per_page_mask = p_chain->elem_per_page - 1; + + p_chain->elem_unusable = UNUSABLE_ELEMS_PER_PAGE(elem_size, mode); + + p_chain->next_page_mask = (p_chain->usable_per_page & + p_chain->elem_per_page_mask); + + if (mode == QED_CHAIN_MODE_NEXT_PTR) { + struct qed_chain_next *p_next; + u16 i; + + for (i = 0; i < page_cnt - 1; i++) { + /* Increment mem_phy to the next page. */ + p_phys_addr += QED_CHAIN_PAGE_SIZE; + + /* Initialize the physical address of the next page. */ + p_next = (struct qed_chain_next *)((u8 *)p_virt_addr + + elem_size * + p_chain-> + usable_per_page); + + p_next->next_phys.lo = DMA_LO_LE(p_phys_addr); + p_next->next_phys.hi = DMA_HI_LE(p_phys_addr); + + /* Initialize the virtual address of the next page. */ + p_next->next_virt = (void *)((u8 *)p_virt_addr + + QED_CHAIN_PAGE_SIZE); + + /* Move to the next page. */ + p_virt_addr = p_next->next_virt; + } + + /* Last page's next should point to beginning of the chain */ + p_next = (struct qed_chain_next *)((u8 *)p_virt_addr + + elem_size * + p_chain->usable_per_page); + + p_next->next_phys.lo = DMA_LO_LE(p_chain->p_phys_addr); + p_next->next_phys.hi = DMA_HI_LE(p_chain->p_phys_addr); + p_next->next_virt = p_chain->p_virt_addr; + } + qed_chain_reset(p_chain); +} + +/** + * @brief qed_chain_pbl_init - Initalizes a basic pbl chain + * struct + * @param p_chain + * @param p_virt_addr virtual address of allocated buffer's beginning + * @param p_phys_addr physical address of allocated buffer's beginning + * @param page_cnt number of pages in the allocated buffer + * @param elem_size size of each element in the chain + * @param use_mode + * @param p_phys_pbl pointer to a pre-allocated side table + * which will hold physical page addresses. + * @param p_virt_pbl pointer to a pre allocated side table + * which will hold virtual page addresses. + */ +static inline void +qed_chain_pbl_init(struct qed_chain *p_chain, + void *p_virt_addr, + dma_addr_t p_phys_addr, + u16 page_cnt, + u8 elem_size, + enum qed_chain_use_mode use_mode, + dma_addr_t p_phys_pbl, + dma_addr_t *p_virt_pbl) +{ + dma_addr_t *p_pbl_dma = p_virt_pbl; + int i; + + qed_chain_init(p_chain, p_virt_addr, p_phys_addr, page_cnt, + elem_size, use_mode, QED_CHAIN_MODE_PBL); + + p_chain->pbl.p_phys_table = p_phys_pbl; + p_chain->pbl.p_virt_table = p_virt_pbl; + + /* Fill the PBL with physical addresses*/ + for (i = 0; i < page_cnt; i++) { + *p_pbl_dma = p_phys_addr; + p_phys_addr += QED_CHAIN_PAGE_SIZE; + p_pbl_dma++; + } +} + +/** + * @brief qed_chain_set_prod - sets the prod to the given + * value + * + * @param prod_idx + * @param p_prod_elem + */ +static inline void qed_chain_set_prod(struct qed_chain *p_chain, + u16 prod_idx, + void *p_prod_elem) +{ + p_chain->prod_idx = prod_idx; + p_chain->p_prod_elem = p_prod_elem; +} + +/** + * @brief qed_chain_get_elem - + * + * get a pointer to an element represented by absolute idx + * + * @param p_chain + * @assumption p_chain->size is a power of 2 + * + * @return void*, a pointer to next element + */ +static inline void *qed_chain_sge_get_elem(struct qed_chain *p_chain, + u16 idx) +{ + void *ret = NULL; + + if (idx >= p_chain->size) + return NULL; + + ret = (u8 *)p_chain->p_virt_addr + p_chain->elem_size * idx; + + return ret; +} + +/** + * @brief qed_chain_sge_inc_cons_prod + * + * for sge chains, producer isn't increased serially, the ring + * is expected to be full at all times. Once elements are + * consumed, they are immediately produced. + * + * @param p_chain + * @param cnt + * + * @return inline void + */ +static inline void +qed_chain_sge_inc_cons_prod(struct qed_chain *p_chain, + u16 cnt) +{ + p_chain->prod_idx += cnt; + p_chain->cons_idx += cnt; +} + +#endif diff --git a/include/linux/qed/qed_if.h b/include/linux/qed/qed_if.h new file mode 100644 index 000000000000..dc9a1353f971 --- /dev/null +++ b/include/linux/qed/qed_if.h @@ -0,0 +1,498 @@ +/* QLogic qed NIC Driver + * + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_IF_H +#define _QED_IF_H + +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#define DIRECT_REG_WR(reg_addr, val) writel((u32)val, \ + (void __iomem *)(reg_addr)) + +#define DIRECT_REG_RD(reg_addr) readl((void __iomem *)(reg_addr)) + +#define QED_COALESCE_MAX 0xFF + +/* forward */ +struct qed_dev; + +struct qed_eth_pf_params { + /* The following parameters are used during HW-init + * and these parameters need to be passed as arguments + * to update_pf_params routine invoked before slowpath start + */ + u16 num_cons; +}; + +struct qed_pf_params { + struct qed_eth_pf_params eth_pf_params; +}; + +enum qed_int_mode { + QED_INT_MODE_INTA, + QED_INT_MODE_MSIX, + QED_INT_MODE_MSI, + QED_INT_MODE_POLL, +}; + +struct qed_sb_info { + struct status_block *sb_virt; + dma_addr_t sb_phys; + u32 sb_ack; /* Last given ack */ + u16 igu_sb_id; + void __iomem *igu_addr; + u8 flags; +#define QED_SB_INFO_INIT 0x1 +#define QED_SB_INFO_SETUP 0x2 + + struct qed_dev *cdev; +}; + +struct qed_dev_info { + unsigned long pci_mem_start; + unsigned long pci_mem_end; + unsigned int pci_irq; + u8 num_hwfns; + + u8 hw_mac[ETH_ALEN]; + bool is_mf; + + /* FW version */ + u16 fw_major; + u16 fw_minor; + u16 fw_rev; + u16 fw_eng; + + /* MFW version */ + u32 mfw_rev; + + u32 flash_size; + u8 mf_mode; +}; + +enum qed_sb_type { + QED_SB_TYPE_L2_QUEUE, +}; + +enum qed_protocol { + QED_PROTOCOL_ETH, +}; + +struct qed_link_params { + bool link_up; + +#define QED_LINK_OVERRIDE_SPEED_AUTONEG BIT(0) +#define QED_LINK_OVERRIDE_SPEED_ADV_SPEEDS BIT(1) +#define QED_LINK_OVERRIDE_SPEED_FORCED_SPEED BIT(2) +#define QED_LINK_OVERRIDE_PAUSE_CONFIG BIT(3) + u32 override_flags; + bool autoneg; + u32 adv_speeds; + u32 forced_speed; +#define QED_LINK_PAUSE_AUTONEG_ENABLE BIT(0) +#define QED_LINK_PAUSE_RX_ENABLE BIT(1) +#define QED_LINK_PAUSE_TX_ENABLE BIT(2) + u32 pause_config; +}; + +struct qed_link_output { + bool link_up; + + u32 supported_caps; /* In SUPPORTED defs */ + u32 advertised_caps; /* In ADVERTISED defs */ + u32 lp_caps; /* In ADVERTISED defs */ + u32 speed; /* In Mb/s */ + u8 duplex; /* In DUPLEX defs */ + u8 port; /* In PORT defs */ + bool autoneg; + u32 pause_config; +}; + +#define QED_DRV_VER_STR_SIZE 12 +struct qed_slowpath_params { + u32 int_mode; + u8 drv_major; + u8 drv_minor; + u8 drv_rev; + u8 drv_eng; + u8 name[QED_DRV_VER_STR_SIZE]; +}; + +#define ILT_PAGE_SIZE_TCFC 0x8000 /* 32KB */ + +struct qed_int_info { + struct msix_entry *msix; + u8 msix_cnt; + + /* This should be updated by the protocol driver */ + u8 used_cnt; +}; + +struct qed_common_cb_ops { + void (*link_update)(void *dev, + struct qed_link_output *link); +}; + +struct qed_common_ops { + struct qed_dev* (*probe)(struct pci_dev *dev, + enum qed_protocol protocol, + u32 dp_module, u8 dp_level); + + void (*remove)(struct qed_dev *cdev); + + int (*set_power_state)(struct qed_dev *cdev, + pci_power_t state); + + void (*set_id)(struct qed_dev *cdev, + char name[], + char ver_str[]); + + /* Client drivers need to make this call before slowpath_start. + * PF params required for the call before slowpath_start is + * documented within the qed_pf_params structure definition. + */ + void (*update_pf_params)(struct qed_dev *cdev, + struct qed_pf_params *params); + int (*slowpath_start)(struct qed_dev *cdev, + struct qed_slowpath_params *params); + + int (*slowpath_stop)(struct qed_dev *cdev); + + /* Requests to use `cnt' interrupts for fastpath. + * upon success, returns number of interrupts allocated for fastpath. + */ + int (*set_fp_int)(struct qed_dev *cdev, + u16 cnt); + + /* Fills `info' with pointers required for utilizing interrupts */ + int (*get_fp_int)(struct qed_dev *cdev, + struct qed_int_info *info); + + u32 (*sb_init)(struct qed_dev *cdev, + struct qed_sb_info *sb_info, + void *sb_virt_addr, + dma_addr_t sb_phy_addr, + u16 sb_id, + enum qed_sb_type type); + + u32 (*sb_release)(struct qed_dev *cdev, + struct qed_sb_info *sb_info, + u16 sb_id); + + void (*simd_handler_config)(struct qed_dev *cdev, + void *token, + int index, + void (*handler)(void *)); + + void (*simd_handler_clean)(struct qed_dev *cdev, + int index); +/** + * @brief set_link - set links according to params + * + * @param cdev + * @param params - values used to override the default link configuration + * + * @return 0 on success, error otherwise. + */ + int (*set_link)(struct qed_dev *cdev, + struct qed_link_params *params); + +/** + * @brief get_link - returns the current link state. + * + * @param cdev + * @param if_link - structure to be filled with current link configuration. + */ + void (*get_link)(struct qed_dev *cdev, + struct qed_link_output *if_link); + +/** + * @brief - drains chip in case Tx completions fail to arrive due to pause. + * + * @param cdev + */ + int (*drain)(struct qed_dev *cdev); + +/** + * @brief update_msglvl - update module debug level + * + * @param cdev + * @param dp_module + * @param dp_level + */ + void (*update_msglvl)(struct qed_dev *cdev, + u32 dp_module, + u8 dp_level); + + int (*chain_alloc)(struct qed_dev *cdev, + enum qed_chain_use_mode intended_use, + enum qed_chain_mode mode, + u16 num_elems, + size_t elem_size, + struct qed_chain *p_chain); + + void (*chain_free)(struct qed_dev *cdev, + struct qed_chain *p_chain); +}; + +/** + * @brief qed_get_protocol_version + * + * @param protocol + * + * @return version supported by qed for given protocol driver + */ +u32 qed_get_protocol_version(enum qed_protocol protocol); + +#define MASK_FIELD(_name, _value) \ + ((_value) &= (_name ## _MASK)) + +#define FIELD_VALUE(_name, _value) \ + ((_value & _name ## _MASK) << _name ## _SHIFT) + +#define SET_FIELD(value, name, flag) \ + do { \ + (value) &= ~(name ## _MASK << name ## _SHIFT); \ + (value) |= (((u64)flag) << (name ## _SHIFT)); \ + } while (0) + +#define GET_FIELD(value, name) \ + (((value) >> (name ## _SHIFT)) & name ## _MASK) + +/* Debug print definitions */ +#define DP_ERR(cdev, fmt, ...) \ + pr_err("[%s:%d(%s)]" fmt, \ + __func__, __LINE__, \ + DP_NAME(cdev) ? DP_NAME(cdev) : "", \ + ## __VA_ARGS__) \ + +#define DP_NOTICE(cdev, fmt, ...) \ + do { \ + if (unlikely((cdev)->dp_level <= QED_LEVEL_NOTICE)) { \ + pr_notice("[%s:%d(%s)]" fmt, \ + __func__, __LINE__, \ + DP_NAME(cdev) ? DP_NAME(cdev) : "", \ + ## __VA_ARGS__); \ + \ + } \ + } while (0) + +#define DP_INFO(cdev, fmt, ...) \ + do { \ + if (unlikely((cdev)->dp_level <= QED_LEVEL_INFO)) { \ + pr_notice("[%s:%d(%s)]" fmt, \ + __func__, __LINE__, \ + DP_NAME(cdev) ? DP_NAME(cdev) : "", \ + ## __VA_ARGS__); \ + } \ + } while (0) + +#define DP_VERBOSE(cdev, module, fmt, ...) \ + do { \ + if (unlikely(((cdev)->dp_level <= QED_LEVEL_VERBOSE) && \ + ((cdev)->dp_module & module))) { \ + pr_notice("[%s:%d(%s)]" fmt, \ + __func__, __LINE__, \ + DP_NAME(cdev) ? DP_NAME(cdev) : "", \ + ## __VA_ARGS__); \ + } \ + } while (0) + +enum DP_LEVEL { + QED_LEVEL_VERBOSE = 0x0, + QED_LEVEL_INFO = 0x1, + QED_LEVEL_NOTICE = 0x2, + QED_LEVEL_ERR = 0x3, +}; + +#define QED_LOG_LEVEL_SHIFT (30) +#define QED_LOG_VERBOSE_MASK (0x3fffffff) +#define QED_LOG_INFO_MASK (0x40000000) +#define QED_LOG_NOTICE_MASK (0x80000000) + +enum DP_MODULE { + QED_MSG_SPQ = 0x10000, + QED_MSG_STATS = 0x20000, + QED_MSG_DCB = 0x40000, + QED_MSG_IOV = 0x80000, + QED_MSG_SP = 0x100000, + QED_MSG_STORAGE = 0x200000, + QED_MSG_CXT = 0x800000, + QED_MSG_ILT = 0x2000000, + QED_MSG_ROCE = 0x4000000, + QED_MSG_DEBUG = 0x8000000, + /* to be added...up to 0x8000000 */ +}; + +struct qed_eth_stats { + u64 no_buff_discards; + u64 packet_too_big_discard; + u64 ttl0_discard; + u64 rx_ucast_bytes; + u64 rx_mcast_bytes; + u64 rx_bcast_bytes; + u64 rx_ucast_pkts; + u64 rx_mcast_pkts; + u64 rx_bcast_pkts; + u64 mftag_filter_discards; + u64 mac_filter_discards; + u64 tx_ucast_bytes; + u64 tx_mcast_bytes; + u64 tx_bcast_bytes; + u64 tx_ucast_pkts; + u64 tx_mcast_pkts; + u64 tx_bcast_pkts; + u64 tx_err_drop_pkts; + u64 tpa_coalesced_pkts; + u64 tpa_coalesced_events; + u64 tpa_aborts_num; + u64 tpa_not_coalesced_pkts; + u64 tpa_coalesced_bytes; + + /* port */ + u64 rx_64_byte_packets; + u64 rx_127_byte_packets; + u64 rx_255_byte_packets; + u64 rx_511_byte_packets; + u64 rx_1023_byte_packets; + u64 rx_1518_byte_packets; + u64 rx_1522_byte_packets; + u64 rx_2047_byte_packets; + u64 rx_4095_byte_packets; + u64 rx_9216_byte_packets; + u64 rx_16383_byte_packets; + u64 rx_crc_errors; + u64 rx_mac_crtl_frames; + u64 rx_pause_frames; + u64 rx_pfc_frames; + u64 rx_align_errors; + u64 rx_carrier_errors; + u64 rx_oversize_packets; + u64 rx_jabbers; + u64 rx_undersize_packets; + u64 rx_fragments; + u64 tx_64_byte_packets; + u64 tx_65_to_127_byte_packets; + u64 tx_128_to_255_byte_packets; + u64 tx_256_to_511_byte_packets; + u64 tx_512_to_1023_byte_packets; + u64 tx_1024_to_1518_byte_packets; + u64 tx_1519_to_2047_byte_packets; + u64 tx_2048_to_4095_byte_packets; + u64 tx_4096_to_9216_byte_packets; + u64 tx_9217_to_16383_byte_packets; + u64 tx_pause_frames; + u64 tx_pfc_frames; + u64 tx_lpi_entry_count; + u64 tx_total_collisions; + u64 brb_truncates; + u64 brb_discards; + u64 rx_mac_bytes; + u64 rx_mac_uc_packets; + u64 rx_mac_mc_packets; + u64 rx_mac_bc_packets; + u64 rx_mac_frames_ok; + u64 tx_mac_bytes; + u64 tx_mac_uc_packets; + u64 tx_mac_mc_packets; + u64 tx_mac_bc_packets; + u64 tx_mac_ctrl_frames; +}; + +#define QED_SB_IDX 0x0002 + +#define RX_PI 0 +#define TX_PI(tc) (RX_PI + 1 + tc) + +static inline u16 qed_sb_update_sb_idx(struct qed_sb_info *sb_info) +{ + u32 prod = 0; + u16 rc = 0; + + prod = le32_to_cpu(sb_info->sb_virt->prod_index) & + STATUS_BLOCK_PROD_INDEX_MASK; + if (sb_info->sb_ack != prod) { + sb_info->sb_ack = prod; + rc |= QED_SB_IDX; + } + + /* Let SB update */ + mmiowb(); + return rc; +} + +/** + * + * @brief This function creates an update command for interrupts that is + * written to the IGU. + * + * @param sb_info - This is the structure allocated and + * initialized per status block. Assumption is + * that it was initialized using qed_sb_init + * @param int_cmd - Enable/Disable/Nop + * @param upd_flg - whether igu consumer should be + * updated. + * + * @return inline void + */ +static inline void qed_sb_ack(struct qed_sb_info *sb_info, + enum igu_int_cmd int_cmd, + u8 upd_flg) +{ + struct igu_prod_cons_update igu_ack = { 0 }; + + igu_ack.sb_id_and_flags = + ((sb_info->sb_ack << IGU_PROD_CONS_UPDATE_SB_INDEX_SHIFT) | + (upd_flg << IGU_PROD_CONS_UPDATE_UPDATE_FLAG_SHIFT) | + (int_cmd << IGU_PROD_CONS_UPDATE_ENABLE_INT_SHIFT) | + (IGU_SEG_ACCESS_REG << + IGU_PROD_CONS_UPDATE_SEGMENT_ACCESS_SHIFT)); + + DIRECT_REG_WR(sb_info->igu_addr, igu_ack.sb_id_and_flags); + + /* Both segments (interrupts & acks) are written to same place address; + * Need to guarantee all commands will be received (in-order) by HW. + */ + mmiowb(); + barrier(); +} + +static inline void __internal_ram_wr(void *p_hwfn, + void __iomem *addr, + int size, + u32 *data) + +{ + unsigned int i; + + for (i = 0; i < size / sizeof(*data); i++) + DIRECT_REG_WR(&((u32 __iomem *)addr)[i], data[i]); +} + +static inline void internal_ram_wr(void __iomem *addr, + int size, + u32 *data) +{ + __internal_ram_wr(NULL, addr, size, data); +} + +#endif -- cgit v1.2.3 From 25c089d78f3833edf614fc377e75e9cf848562f5 Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 26 Oct 2015 11:02:26 +0200 Subject: qed: Add basic L2 interface This patch adds a public API for a network driver to work on top of QED. The interface itself is very minimal - it's mostly infrastructure, as the only content it has after this patch is a query for HW-based information required for the creation of a network interface [I.e., no actual protocol-specific configurations are supported]. Signed-off-by: Manish Chopra Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/eth_common.h | 279 +++++++++++++++++++++++++++++++++++++++++ include/linux/qed/qed_eth_if.h | 38 ++++++ 2 files changed, 317 insertions(+) create mode 100644 include/linux/qed/eth_common.h create mode 100644 include/linux/qed/qed_eth_if.h (limited to 'include/linux') diff --git a/include/linux/qed/eth_common.h b/include/linux/qed/eth_common.h new file mode 100644 index 000000000000..320b3373ac1d --- /dev/null +++ b/include/linux/qed/eth_common.h @@ -0,0 +1,279 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef __ETH_COMMON__ +#define __ETH_COMMON__ + +/********************/ +/* ETH FW CONSTANTS */ +/********************/ +#define ETH_CACHE_LINE_SIZE 64 + +#define ETH_MAX_RAMROD_PER_CON 8 +#define ETH_TX_BD_PAGE_SIZE_BYTES 4096 +#define ETH_RX_BD_PAGE_SIZE_BYTES 4096 +#define ETH_RX_SGE_PAGE_SIZE_BYTES 4096 +#define ETH_RX_CQE_PAGE_SIZE_BYTES 4096 +#define ETH_RX_NUM_NEXT_PAGE_BDS 2 +#define ETH_RX_NUM_NEXT_PAGE_SGES 2 + +#define ETH_TX_MIN_BDS_PER_NON_LSO_PKT 1 +#define ETH_TX_MAX_BDS_PER_NON_LSO_PACKET 18 +#define ETH_TX_MAX_LSO_HDR_NBD 4 +#define ETH_TX_MIN_BDS_PER_LSO_PKT 3 +#define ETH_TX_MIN_BDS_PER_TUNN_IPV6_WITH_EXT_PKT 3 +#define ETH_TX_MIN_BDS_PER_IPV6_WITH_EXT_PKT 2 +#define ETH_TX_MIN_BDS_PER_PKT_W_LOOPBACK_MODE 2 +#define ETH_TX_MAX_NON_LSO_PKT_LEN (9700 - (4 + 12 + 8)) +#define ETH_TX_MAX_LSO_HDR_BYTES 510 + +#define ETH_NUM_STATISTIC_COUNTERS MAX_NUM_VPORTS + +#define ETH_REG_CQE_PBL_SIZE 3 + +/* num of MAC/VLAN filters */ +#define ETH_NUM_MAC_FILTERS 512 +#define ETH_NUM_VLAN_FILTERS 512 + +/* approx. multicast constants */ +#define ETH_MULTICAST_BIN_FROM_MAC_SEED 0 +#define ETH_MULTICAST_MAC_BINS 256 +#define ETH_MULTICAST_MAC_BINS_IN_REGS (ETH_MULTICAST_MAC_BINS / 32) + +/* ethernet vport update constants */ +#define ETH_FILTER_RULES_COUNT 10 +#define ETH_RSS_IND_TABLE_ENTRIES_NUM 128 +#define ETH_RSS_KEY_SIZE_REGS 10 +#define ETH_RSS_ENGINE_NUM_K2 207 +#define ETH_RSS_ENGINE_NUM_BB 127 + +/* TPA constants */ +#define ETH_TPA_MAX_AGGS_NUM 64 +#define ETH_TPA_CQE_START_SGL_SIZE 3 +#define ETH_TPA_CQE_CONT_SGL_SIZE 6 +#define ETH_TPA_CQE_END_SGL_SIZE 4 + +/* Queue Zone sizes */ +#define TSTORM_QZONE_SIZE 0 +#define MSTORM_QZONE_SIZE sizeof(struct mstorm_eth_queue_zone) +#define USTORM_QZONE_SIZE sizeof(struct ustorm_eth_queue_zone) +#define XSTORM_QZONE_SIZE 0 +#define YSTORM_QZONE_SIZE sizeof(struct ystorm_eth_queue_zone) +#define PSTORM_QZONE_SIZE 0 + +/* Interrupt coalescing TimeSet */ +struct coalescing_timeset { + u8 timeset; + u8 valid; +}; + +struct eth_tx_1st_bd_flags { + u8 bitfields; +#define ETH_TX_1ST_BD_FLAGS_FORCE_VLAN_MODE_MASK 0x1 +#define ETH_TX_1ST_BD_FLAGS_FORCE_VLAN_MODE_SHIFT 0 +#define ETH_TX_1ST_BD_FLAGS_IP_CSUM_MASK 0x1 +#define ETH_TX_1ST_BD_FLAGS_IP_CSUM_SHIFT 1 +#define ETH_TX_1ST_BD_FLAGS_L4_CSUM_MASK 0x1 +#define ETH_TX_1ST_BD_FLAGS_L4_CSUM_SHIFT 2 +#define ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_MASK 0x1 +#define ETH_TX_1ST_BD_FLAGS_VLAN_INSERTION_SHIFT 3 +#define ETH_TX_1ST_BD_FLAGS_LSO_MASK 0x1 +#define ETH_TX_1ST_BD_FLAGS_LSO_SHIFT 4 +#define ETH_TX_1ST_BD_FLAGS_START_BD_MASK 0x1 +#define ETH_TX_1ST_BD_FLAGS_START_BD_SHIFT 5 +#define ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_MASK 0x1 +#define ETH_TX_1ST_BD_FLAGS_TUNN_IP_CSUM_SHIFT 6 +#define ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_MASK 0x1 +#define ETH_TX_1ST_BD_FLAGS_TUNN_L4_CSUM_SHIFT 7 +}; + +/* The parsing information data fo rthe first tx bd of a given packet. */ +struct eth_tx_data_1st_bd { + __le16 vlan; + u8 nbds; + struct eth_tx_1st_bd_flags bd_flags; + __le16 fw_use_only; +}; + +/* The parsing information data for the second tx bd of a given packet. */ +struct eth_tx_data_2nd_bd { + __le16 tunn_ip_size; + __le16 bitfields; +#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_MASK 0x1FFF +#define ETH_TX_DATA_2ND_BD_L4_HDR_START_OFFSET_W_SHIFT 0 +#define ETH_TX_DATA_2ND_BD_RESERVED0_MASK 0x7 +#define ETH_TX_DATA_2ND_BD_RESERVED0_SHIFT 13 + __le16 bitfields2; +#define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_MASK 0xF +#define ETH_TX_DATA_2ND_BD_TUNN_INNER_L2_HDR_SIZE_W_SHIFT 0 +#define ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_MASK 0x3 +#define ETH_TX_DATA_2ND_BD_TUNN_INNER_ETH_TYPE_SHIFT 4 +#define ETH_TX_DATA_2ND_BD_DEST_PORT_MODE_MASK 0x3 +#define ETH_TX_DATA_2ND_BD_DEST_PORT_MODE_SHIFT 6 +#define ETH_TX_DATA_2ND_BD_TUNN_TYPE_MASK 0x3 +#define ETH_TX_DATA_2ND_BD_TUNN_TYPE_SHIFT 8 +#define ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_MASK 0x1 +#define ETH_TX_DATA_2ND_BD_TUNN_INNER_IPV6_SHIFT 10 +#define ETH_TX_DATA_2ND_BD_IPV6_EXT_MASK 0x1 +#define ETH_TX_DATA_2ND_BD_IPV6_EXT_SHIFT 11 +#define ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_MASK 0x1 +#define ETH_TX_DATA_2ND_BD_TUNN_IPV6_EXT_SHIFT 12 +#define ETH_TX_DATA_2ND_BD_L4_UDP_MASK 0x1 +#define ETH_TX_DATA_2ND_BD_L4_UDP_SHIFT 13 +#define ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_MASK 0x1 +#define ETH_TX_DATA_2ND_BD_L4_PSEUDO_CSUM_MODE_SHIFT 14 +#define ETH_TX_DATA_2ND_BD_RESERVED1_MASK 0x1 +#define ETH_TX_DATA_2ND_BD_RESERVED1_SHIFT 15 +}; + +/* Regular ETH Rx FP CQE. */ +struct eth_fast_path_rx_reg_cqe { + u8 type; + u8 bitfields; +#define ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_MASK 0x7 +#define ETH_FAST_PATH_RX_REG_CQE_RSS_HASH_TYPE_SHIFT 0 +#define ETH_FAST_PATH_RX_REG_CQE_TC_MASK 0xF +#define ETH_FAST_PATH_RX_REG_CQE_TC_SHIFT 3 +#define ETH_FAST_PATH_RX_REG_CQE_RESERVED0_MASK 0x1 +#define ETH_FAST_PATH_RX_REG_CQE_RESERVED0_SHIFT 7 + __le16 pkt_len; + struct parsing_and_err_flags pars_flags; + __le16 vlan_tag; + __le32 rss_hash; + __le16 len_on_bd; + u8 placement_offset; + u8 reserved; + __le16 pbl[ETH_REG_CQE_PBL_SIZE]; + u8 reserved1[10]; +}; + +/* The L4 pseudo checksum mode for Ethernet */ +enum eth_l4_pseudo_checksum_mode { + ETH_L4_PSEUDO_CSUM_CORRECT_LENGTH, + ETH_L4_PSEUDO_CSUM_ZERO_LENGTH, + MAX_ETH_L4_PSEUDO_CHECKSUM_MODE +}; + +struct eth_rx_bd { + struct regpair addr; +}; + +/* regular ETH Rx SP CQE */ +struct eth_slow_path_rx_cqe { + u8 type; + u8 ramrod_cmd_id; + u8 error_flag; + u8 reserved[27]; + __le16 echo; +}; + +/* union for all ETH Rx CQE types */ +union eth_rx_cqe { + struct eth_fast_path_rx_reg_cqe fast_path_regular; + struct eth_slow_path_rx_cqe slow_path; +}; + +/* ETH Rx CQE type */ +enum eth_rx_cqe_type { + ETH_RX_CQE_TYPE_UNUSED, + ETH_RX_CQE_TYPE_REGULAR, + ETH_RX_CQE_TYPE_SLOW_PATH, + MAX_ETH_RX_CQE_TYPE +}; + +/* ETH Rx producers data */ +struct eth_rx_prod_data { + __le16 bd_prod; + __le16 sge_prod; + __le16 cqe_prod; + __le16 reserved; +}; + +/* The first tx bd of a given packet */ +struct eth_tx_1st_bd { + struct regpair addr; + __le16 nbytes; + struct eth_tx_data_1st_bd data; +}; + +/* The second tx bd of a given packet */ +struct eth_tx_2nd_bd { + struct regpair addr; + __le16 nbytes; + struct eth_tx_data_2nd_bd data; +}; + +/* The parsing information data for the third tx bd of a given packet. */ +struct eth_tx_data_3rd_bd { + __le16 lso_mss; + u8 bitfields; +#define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_MASK 0xF +#define ETH_TX_DATA_3RD_BD_TCP_HDR_LEN_DW_SHIFT 0 +#define ETH_TX_DATA_3RD_BD_HDR_NBD_MASK 0xF +#define ETH_TX_DATA_3RD_BD_HDR_NBD_SHIFT 4 + u8 resereved0[3]; +}; + +/* The third tx bd of a given packet */ +struct eth_tx_3rd_bd { + struct regpair addr; + __le16 nbytes; + struct eth_tx_data_3rd_bd data; +}; + +/* The common non-special TX BD ring element */ +struct eth_tx_bd { + struct regpair addr; + __le16 nbytes; + __le16 reserved0; + __le32 reserved1; +}; + +union eth_tx_bd_types { + struct eth_tx_1st_bd first_bd; + struct eth_tx_2nd_bd second_bd; + struct eth_tx_3rd_bd third_bd; + struct eth_tx_bd reg_bd; +}; + +/* Mstorm Queue Zone */ +struct mstorm_eth_queue_zone { + struct eth_rx_prod_data rx_producers; + __le32 reserved[2]; +}; + +/* Ustorm Queue Zone */ +struct ustorm_eth_queue_zone { + struct coalescing_timeset int_coalescing_timeset; + __le16 reserved[3]; +}; + +/* Ystorm Queue Zone */ +struct ystorm_eth_queue_zone { + struct coalescing_timeset int_coalescing_timeset; + __le16 reserved[3]; +}; + +/* ETH doorbell data */ +struct eth_db_data { + u8 params; +#define ETH_DB_DATA_DEST_MASK 0x3 +#define ETH_DB_DATA_DEST_SHIFT 0 +#define ETH_DB_DATA_AGG_CMD_MASK 0x3 +#define ETH_DB_DATA_AGG_CMD_SHIFT 2 +#define ETH_DB_DATA_BYPASS_EN_MASK 0x1 +#define ETH_DB_DATA_BYPASS_EN_SHIFT 4 +#define ETH_DB_DATA_RESERVED_MASK 0x1 +#define ETH_DB_DATA_RESERVED_SHIFT 5 +#define ETH_DB_DATA_AGG_VAL_SEL_MASK 0x3 +#define ETH_DB_DATA_AGG_VAL_SEL_SHIFT 6 + u8 agg_flags; + __le16 bd_prod; +}; + +#endif /* __ETH_COMMON__ */ diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h new file mode 100644 index 000000000000..fbd8700f0b31 --- /dev/null +++ b/include/linux/qed/qed_eth_if.h @@ -0,0 +1,38 @@ +/* QLogic qed NIC Driver + * Copyright (c) 2015 QLogic Corporation + * + * This software is available under the terms of the GNU General Public License + * (GPL) Version 2, available from the file COPYING in the main directory of + * this source tree. + */ + +#ifndef _QED_ETH_IF_H +#define _QED_ETH_IF_H + +#include +#include +#include +#include + +struct qed_dev_eth_info { + struct qed_dev_info common; + + u8 num_queues; + u8 num_tc; + + u8 port_mac[ETH_ALEN]; + u8 num_vlan_filters; +}; + +struct qed_eth_ops { + const struct qed_common_ops *common; + + int (*fill_dev_info)(struct qed_dev *cdev, + struct qed_dev_eth_info *info); + +}; + +const struct qed_eth_ops *qed_get_eth_ops(u32 version); +void qed_put_eth_ops(void); + +#endif -- cgit v1.2.3 From cee4d26448c1000ccc1711eb5e6ed4c15f18fa83 Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 26 Oct 2015 11:02:28 +0200 Subject: qed: Add slowpath L2 support This patch adds to the qed the support to configure various L2 elements, such as channels and basic filtering conditions. It also enhances its public API to allow qede to later utilize this functionality. Signed-off-by: Manish Chopra Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 120 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 120 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index fbd8700f0b31..67a7b41b70aa 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -24,12 +24,132 @@ struct qed_dev_eth_info { u8 num_vlan_filters; }; +struct qed_update_vport_rss_params { + u16 rss_ind_table[128]; + u32 rss_key[10]; +}; + +struct qed_update_vport_params { + u8 vport_id; + u8 update_vport_active_flg; + u8 vport_active_flg; + u8 update_rss_flg; + struct qed_update_vport_rss_params rss_params; +}; + +struct qed_stop_rxq_params { + u8 rss_id; + u8 rx_queue_id; + u8 vport_id; + bool eq_completion_only; +}; + +struct qed_stop_txq_params { + u8 rss_id; + u8 tx_queue_id; +}; + +enum qed_filter_rx_mode_type { + QED_FILTER_RX_MODE_TYPE_REGULAR, + QED_FILTER_RX_MODE_TYPE_MULTI_PROMISC, + QED_FILTER_RX_MODE_TYPE_PROMISC, +}; + +enum qed_filter_xcast_params_type { + QED_FILTER_XCAST_TYPE_ADD, + QED_FILTER_XCAST_TYPE_DEL, + QED_FILTER_XCAST_TYPE_REPLACE, +}; + +struct qed_filter_ucast_params { + enum qed_filter_xcast_params_type type; + u8 vlan_valid; + u16 vlan; + u8 mac_valid; + unsigned char mac[ETH_ALEN]; +}; + +struct qed_filter_mcast_params { + enum qed_filter_xcast_params_type type; + u8 num; + unsigned char mac[64][ETH_ALEN]; +}; + +union qed_filter_type_params { + enum qed_filter_rx_mode_type accept_flags; + struct qed_filter_ucast_params ucast; + struct qed_filter_mcast_params mcast; +}; + +enum qed_filter_type { + QED_FILTER_TYPE_UCAST, + QED_FILTER_TYPE_MCAST, + QED_FILTER_TYPE_RX_MODE, + QED_MAX_FILTER_TYPES, +}; + +struct qed_filter_params { + enum qed_filter_type type; + union qed_filter_type_params filter; +}; + +struct qed_queue_start_common_params { + u8 rss_id; + u8 queue_id; + u8 vport_id; + u16 sb; + u16 sb_idx; +}; + +struct qed_eth_cb_ops { + struct qed_common_cb_ops common; +}; + struct qed_eth_ops { const struct qed_common_ops *common; int (*fill_dev_info)(struct qed_dev *cdev, struct qed_dev_eth_info *info); + int (*vport_start)(struct qed_dev *cdev, + u8 vport_id, u16 mtu, + u8 drop_ttl0_flg, + u8 inner_vlan_removal_en_flg); + + int (*vport_stop)(struct qed_dev *cdev, + u8 vport_id); + + int (*vport_update)(struct qed_dev *cdev, + struct qed_update_vport_params *params); + + int (*q_rx_start)(struct qed_dev *cdev, + struct qed_queue_start_common_params *params, + u16 bd_max_bytes, + dma_addr_t bd_chain_phys_addr, + dma_addr_t cqe_pbl_addr, + u16 cqe_pbl_size, + void __iomem **pp_prod); + + int (*q_rx_stop)(struct qed_dev *cdev, + struct qed_stop_rxq_params *params); + + int (*q_tx_start)(struct qed_dev *cdev, + struct qed_queue_start_common_params *params, + dma_addr_t pbl_addr, + u16 pbl_size, + void __iomem **pp_doorbell); + + int (*q_tx_stop)(struct qed_dev *cdev, + struct qed_stop_txq_params *params); + + int (*filter_config)(struct qed_dev *cdev, + struct qed_filter_params *params); + + int (*fastpath_stop)(struct qed_dev *cdev); + + int (*eth_cqe_completion)(struct qed_dev *cdev, + u8 rss_id, + struct eth_slow_path_rx_cqe *cqe); }; const struct qed_eth_ops *qed_get_eth_ops(u32 version); -- cgit v1.2.3 From cc875c2e4f34e86c2f562f18b6e917cfcc560bcb Mon Sep 17 00:00:00 2001 From: Yuval Mintz Date: Mon, 26 Oct 2015 11:02:31 +0200 Subject: qed: Add link support Physical link is handled by the management Firmware. This patch lays the infrastructure for attention handling in the driver, as link change notifications arrive via async. attentions, as well the handling of such notifications. This patch also extends the API with the protocol drivers by adding registered callbacks which the protocol driver passes to qed in order to be notified of async. events originating from the FW/HW. Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index 67a7b41b70aa..ab1041424013 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -111,6 +111,10 @@ struct qed_eth_ops { int (*fill_dev_info)(struct qed_dev *cdev, struct qed_dev_eth_info *info); + void (*register_ops)(struct qed_dev *cdev, + struct qed_eth_cb_ops *ops, + void *cookie); + int (*vport_start)(struct qed_dev *cdev, u8 vport_id, u16 mtu, u8 drop_ttl0_flg, -- cgit v1.2.3 From 9df2ed0415b13218f84262c2372323ef028310fc Mon Sep 17 00:00:00 2001 From: Manish Chopra Date: Mon, 26 Oct 2015 11:02:33 +0200 Subject: qed: Add statistics support Device statistics can be gathered on-demand. This adds the qed support for reading the statistics [both function and port] from the device, and adds to the public API a method for requesting the current statistics. Signed-off-by: Manish Chopra Signed-off-by: Yuval Mintz Signed-off-by: Ariel Elior Signed-off-by: David S. Miller --- include/linux/qed/qed_eth_if.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/qed/qed_eth_if.h b/include/linux/qed/qed_eth_if.h index ab1041424013..81ab178e31c1 100644 --- a/include/linux/qed/qed_eth_if.h +++ b/include/linux/qed/qed_eth_if.h @@ -154,6 +154,9 @@ struct qed_eth_ops { int (*eth_cqe_completion)(struct qed_dev *cdev, u8 rss_id, struct eth_slow_path_rx_cqe *cqe); + + void (*get_vport_stats)(struct qed_dev *cdev, + struct qed_eth_stats *stats); }; const struct qed_eth_ops *qed_get_eth_ops(u32 version); -- cgit v1.2.3 From f8e529ed941ba2bbcbf310b575d968159ce7e895 Mon Sep 17 00:00:00 2001 From: Tycho Andersen Date: Tue, 27 Oct 2015 09:23:59 +0900 Subject: seccomp, ptrace: add support for dumping seccomp filters This patch adds support for dumping a process' (classic BPF) seccomp filters via ptrace. PTRACE_SECCOMP_GET_FILTER allows the tracer to dump the user's classic BPF seccomp filters. addr should be an integer which represents the ith seccomp filter (0 is the most recently installed filter). data should be a struct sock_filter * with enough room for the ith filter, or NULL, in which case the filter is not saved. The return value for this command is the number of BPF instructions the program represents, or negative in the case of errors. Command specific errors are ENOENT: which indicates that there is no ith filter in this seccomp tree, and EMEDIUMTYPE, which indicates that the ith filter was not installed as a classic BPF filter. A caveat with this approach is that there is no way to get explicitly at the heirarchy of seccomp filters, and users need to memcmp() filters to decide which are inherited. This means that a task which installs two of the same filter can potentially confuse users of this interface. v2: * make save_orig const * check that the orig_prog exists (not necessary right now, but when grows eBPF support it will be) * s/n/filter_off and make it an unsigned long to match ptrace * count "down" the tree instead of "up" when passing a filter offset v3: * don't take the current task's lock for inspecting its seccomp mode * use a 0x42** constant for the ptrace command value v4: * don't copy to userspace while holding spinlocks v5: * add another condition to WARN_ON v6: * rebase on net-next Signed-off-by: Tycho Andersen Acked-by: Kees Cook CC: Will Drewry Reviewed-by: Oleg Nesterov CC: Andy Lutomirski CC: Pavel Emelyanov CC: Serge E. Hallyn CC: Alexei Starovoitov CC: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/seccomp.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index f4265039a94c..2296e6b2f690 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -95,4 +95,15 @@ static inline void get_seccomp_filter(struct task_struct *tsk) return; } #endif /* CONFIG_SECCOMP_FILTER */ + +#if defined(CONFIG_SECCOMP_FILTER) && defined(CONFIG_CHECKPOINT_RESTORE) +extern long seccomp_get_filter(struct task_struct *task, + unsigned long filter_off, void __user *data); +#else +static inline long seccomp_get_filter(struct task_struct *task, + unsigned long n, void __user *data) +{ + return -EINVAL; +} +#endif /* CONFIG_SECCOMP_FILTER && CONFIG_CHECKPOINT_RESTORE */ #endif /* _LINUX_SECCOMP_H */ -- cgit v1.2.3 From 2b1d88cda32f81685bae45c00bf517f77bcda3cd Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Thu, 15 Oct 2015 17:02:19 +0200 Subject: PM / Domains: Merge measurements for PM QoS device latencies Measure latency does by itself contribute to an increased latency, thus we should avoid it when it isn't needed. By merging the latency measurements for the ->save_state() and the ->stop() callbacks, we get one measurement instead of two and we get one value to store instead of two. Let's also apply the likewise change for the ->start() and ->restore_state() callbacks. Signed-off-by: Ulf Hansson Reviewed-by: Lina Iyer Acked-by: Kevin Hilman Signed-off-by: Rafael J. Wysocki --- include/linux/pm_domain.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h index f4dd8105b024..ba4ced38efae 100644 --- a/include/linux/pm_domain.h +++ b/include/linux/pm_domain.h @@ -81,10 +81,8 @@ struct gpd_link { }; struct gpd_timing_data { - s64 stop_latency_ns; - s64 start_latency_ns; - s64 save_state_latency_ns; - s64 restore_state_latency_ns; + s64 suspend_latency_ns; + s64 resume_latency_ns; s64 effective_constraint_ns; bool constraint_changed; bool cached_stop_ok; -- cgit v1.2.3 From 8eec1020f0c0c03f7219ed50cf1b754be49dd448 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 15 Oct 2015 21:35:22 +0530 Subject: cpufreq: create cpu/cpufreq at boot time Later patches will need to create policy specific directories in /sys/devices/system/cpu/cpufreq/ directory and so the cpufreq directory wouldn't be ever empty. And so no fun creating/destroying it on need basis anymore. Create it once on system boot. Reviewed-by: Saravana Kannan Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index dca22de98d94..338bf0e59bb8 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -149,8 +149,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) /* /sys/devices/system/cpu/cpufreq: entry point for global variables */ extern struct kobject *cpufreq_global_kobject; -int cpufreq_get_global_kobject(void); -void cpufreq_put_global_kobject(void); int cpufreq_sysfs_create_file(const struct attribute *attr); void cpufreq_sysfs_remove_file(const struct attribute *attr); -- cgit v1.2.3 From c82bd44437f5d53d1654d9e36a9e4e55610f6624 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 15 Oct 2015 21:35:23 +0530 Subject: cpufreq: remove cpufreq_sysfs_{create|remove}_file() They don't do anything special now, remove the unnecessary wrapper. Reviewed-by: Saravana Kannan Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 338bf0e59bb8..9623218d996a 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -149,8 +149,6 @@ static inline bool policy_is_shared(struct cpufreq_policy *policy) /* /sys/devices/system/cpu/cpufreq: entry point for global variables */ extern struct kobject *cpufreq_global_kobject; -int cpufreq_sysfs_create_file(const struct attribute *attr); -void cpufreq_sysfs_remove_file(const struct attribute *attr); #ifdef CONFIG_CPU_FREQ unsigned int cpufreq_get(unsigned int cpu); -- cgit v1.2.3 From 96bdda61f58b70431bbe8a3e49794c8210f7691b Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 15 Oct 2015 21:35:24 +0530 Subject: cpufreq: create cpu/cpufreq/policyX directories The cpufreq sysfs interface had been a bit inconsistent as one of the CPUs for a policy had a real directory within its sysfs 'cpuX' directory and all other CPUs had links to it. That also made the code a bit complex as we need to take care of moving the sysfs directory if the CPU containing the real directory is getting physically hot-unplugged. Solve this by creating 'policyX' directories (per-policy) in /sys/devices/system/cpu/cpufreq/ directory, where X is the CPU for which the policy was first created. This also removes the need of keeping kobj_cpu and we can remove it now. Suggested-by: Saravana Kannan Signed-off-by: Viresh Kumar Reviewed-by: Saravana Kannan Acked-by: is more of a general agreement from the person that he is Reviewed-by: is a more strict tag and implies that the reviewer has Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 9623218d996a..ef4c5b1a860f 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -65,7 +65,6 @@ struct cpufreq_policy { unsigned int shared_type; /* ACPI: ANY or ALL affected CPUs should set cpufreq */ unsigned int cpu; /* cpu managing this policy, must be online */ - unsigned int kobj_cpu; /* cpu managing sysfs files, can be offline */ struct clk *clk; struct cpufreq_cpuinfo cpuinfo;/* see above */ -- cgit v1.2.3 From 44511fb9e55ada760822b0b0d7be9d150576f17f Mon Sep 17 00:00:00 2001 From: Ard Biesheuvel Date: Fri, 23 Oct 2015 11:48:16 +0200 Subject: efi: Use correct type for struct efi_memory_map::phys_map We have been getting away with using a void* for the physical address of the UEFI memory map, since, even on 32-bit platforms with 64-bit physical addresses, no truncation takes place if the memory map has been allocated by the firmware (which only uses 1:1 virtually addressable memory), which is usually the case. However, commit: 0f96a99dab36 ("efi: Add "efi_fake_mem" boot option") adds code that clones and modifies the UEFI memory map, and the clone may live above 4 GB on 32-bit platforms. This means our use of void* for struct efi_memory_map::phys_map has graduated from 'incorrect but working' to 'incorrect and broken', and we need to fix it. So redefine struct efi_memory_map::phys_map as phys_addr_t, and get rid of a bunch of casts that are now unneeded. Signed-off-by: Ard Biesheuvel Reviewed-by: Matt Fleming Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: izumi.taku@jp.fujitsu.com Cc: kamezawa.hiroyu@jp.fujitsu.com Cc: linux-efi@vger.kernel.org Cc: matt.fleming@intel.com Link: http://lkml.kernel.org/r/1445593697-1342-1-git-send-email-ard.biesheuvel@linaro.org Signed-off-by: Ingo Molnar --- include/linux/efi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/efi.h b/include/linux/efi.h index 4d01c1033fce..569b5a866bb1 100644 --- a/include/linux/efi.h +++ b/include/linux/efi.h @@ -680,7 +680,7 @@ typedef struct { } efi_system_table_t; struct efi_memory_map { - void *phys_map; + phys_addr_t phys_map; void *map; void *map_end; int nr_map; -- cgit v1.2.3 From 1bd5dfe41b994a6e793363894befef76626965a9 Mon Sep 17 00:00:00 2001 From: Aaro Koskinen Date: Mon, 26 Oct 2015 20:23:53 +0200 Subject: ARM: OMAP1: fix incorrect INT_DMA_LCD Commit 685e2d08c54b ("ARM: OMAP1: Change interrupt numbering for sparse IRQ") turned on SPARSE_IRQ on OMAP1, but forgot to change the number of INT_DMA_LCD. This broke the boot at least on Nokia 770, where the device hangs during framebuffer initialization. Fix by defining INT_DMA_LCD like the other interrupts. Cc: stable@vger.kernel.org # v4.2+ Fixes: 685e2d08c54b ("ARM: OMAP1: Change interrupt numbering for sparse IRQ") Signed-off-by: Aaro Koskinen Signed-off-by: Tony Lindgren --- include/linux/omap-dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/omap-dma.h b/include/linux/omap-dma.h index e5a70132a240..88fa8af2b937 100644 --- a/include/linux/omap-dma.h +++ b/include/linux/omap-dma.h @@ -17,7 +17,7 @@ #include -#define INT_DMA_LCD 25 +#define INT_DMA_LCD (NR_IRQS_LEGACY + 25) #define OMAP1_DMA_TOUT_IRQ (1 << 0) #define OMAP_DMA_DROP_IRQ (1 << 1) -- cgit v1.2.3 From 412a15c0fe537c59c794d4e8134580b9cb984a0c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 13 Oct 2015 19:11:36 +0300 Subject: svcrdma: Port to new memory registration API Instead of maintaining a fastreg page list, keep an sg table and convert an array of pages to a sg list. Then call ib_map_mr_sg and construct ib_reg_wr. Signed-off-by: Sagi Grimberg Acked-by: Christoph Hellwig Tested-by: Steve Wise Tested-by: Selvin Xavier Signed-off-by: Doug Ledford --- include/linux/sunrpc/svc_rdma.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7ccc961f33e9..1e4438ea2380 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -105,11 +105,9 @@ struct svc_rdma_chunk_sge { }; struct svc_rdma_fastreg_mr { struct ib_mr *mr; - void *kva; - struct ib_fast_reg_page_list *page_list; - int page_list_len; + struct scatterlist *sg; + int sg_nents; unsigned long access_flags; - unsigned long map_len; enum dma_data_direction direction; struct list_head frmr_list; }; -- cgit v1.2.3 From cd9e9808d18fe7107c306f6e71c8be7230ee42b4 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Wed, 28 Oct 2015 19:54:55 +0100 Subject: lightnvm: Support for Open-Channel SSDs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Open-channel SSDs are devices that share responsibilities with the host in order to implement and maintain features that typical SSDs keep strictly in firmware. These include (i) the Flash Translation Layer (FTL), (ii) bad block management, and (iii) hardware units such as the flash controller, the interface controller, and large amounts of flash chips. In this way, Open-channels SSDs exposes direct access to their physical flash storage, while keeping a subset of the internal features of SSDs. LightNVM is a specification that gives support to Open-channel SSDs LightNVM allows the host to manage data placement, garbage collection, and parallelism. Device specific responsibilities such as bad block management, FTL extensions to support atomic IOs, or metadata persistence are still handled by the device. The implementation of LightNVM consists of two parts: core and (multiple) targets. The core implements functionality shared across targets. This is initialization, teardown and statistics. The targets implement the interface that exposes physical flash to user-space applications. Examples of such targets include key-value store, object-store, as well as traditional block devices, which can be application-specific. Contributions in this patch from: Javier Gonzalez Dongsheng Yang Jesper Madsen Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 522 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 522 insertions(+) create mode 100644 include/linux/lightnvm.h (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h new file mode 100644 index 000000000000..122b176600fa --- /dev/null +++ b/include/linux/lightnvm.h @@ -0,0 +1,522 @@ +#ifndef NVM_H +#define NVM_H + +enum { + NVM_IO_OK = 0, + NVM_IO_REQUEUE = 1, + NVM_IO_DONE = 2, + NVM_IO_ERR = 3, + + NVM_IOTYPE_NONE = 0, + NVM_IOTYPE_GC = 1, +}; + +#ifdef CONFIG_NVM + +#include +#include +#include +#include + +enum { + /* HW Responsibilities */ + NVM_RSP_L2P = 1 << 0, + NVM_RSP_ECC = 1 << 1, + + /* Physical Adressing Mode */ + NVM_ADDRMODE_LINEAR = 0, + NVM_ADDRMODE_CHANNEL = 1, + + /* Plane programming mode for LUN */ + NVM_PLANE_SINGLE = 0, + NVM_PLANE_DOUBLE = 1, + NVM_PLANE_QUAD = 2, + + /* Status codes */ + NVM_RSP_SUCCESS = 0x0, + NVM_RSP_NOT_CHANGEABLE = 0x1, + NVM_RSP_ERR_FAILWRITE = 0x40ff, + NVM_RSP_ERR_EMPTYPAGE = 0x42ff, + + /* Device opcodes */ + NVM_OP_HBREAD = 0x02, + NVM_OP_HBWRITE = 0x81, + NVM_OP_PWRITE = 0x91, + NVM_OP_PREAD = 0x92, + NVM_OP_ERASE = 0x90, + + /* PPA Command Flags */ + NVM_IO_SNGL_ACCESS = 0x0, + NVM_IO_DUAL_ACCESS = 0x1, + NVM_IO_QUAD_ACCESS = 0x2, + + NVM_IO_SUSPEND = 0x80, + NVM_IO_SLC_MODE = 0x100, + NVM_IO_SCRAMBLE_DISABLE = 0x200, +}; + +struct nvm_id_group { + u8 mtype; + u8 fmtype; + u16 res16; + u8 num_ch; + u8 num_lun; + u8 num_pln; + u16 num_blk; + u16 num_pg; + u16 fpg_sz; + u16 csecs; + u16 sos; + u32 trdt; + u32 trdm; + u32 tprt; + u32 tprm; + u32 tbet; + u32 tbem; + u32 mpos; + u16 cpar; + u8 res[913]; +} __packed; + +struct nvm_addr_format { + u8 ch_offset; + u8 ch_len; + u8 lun_offset; + u8 lun_len; + u8 pln_offset; + u8 pln_len; + u8 blk_offset; + u8 blk_len; + u8 pg_offset; + u8 pg_len; + u8 sect_offset; + u8 sect_len; + u8 res[4]; +}; + +struct nvm_id { + u8 ver_id; + u8 vmnt; + u8 cgrps; + u8 res[5]; + u32 cap; + u32 dom; + struct nvm_addr_format ppaf; + u8 ppat; + u8 resv[224]; + struct nvm_id_group groups[4]; +} __packed; + +struct nvm_target { + struct list_head list; + struct nvm_tgt_type *type; + struct gendisk *disk; +}; + +struct nvm_tgt_instance { + struct nvm_tgt_type *tt; +}; + +#define ADDR_EMPTY (~0ULL) + +#define NVM_VERSION_MAJOR 1 +#define NVM_VERSION_MINOR 0 +#define NVM_VERSION_PATCH 0 + +#define NVM_SEC_BITS (8) +#define NVM_PL_BITS (6) +#define NVM_PG_BITS (16) +#define NVM_BLK_BITS (16) +#define NVM_LUN_BITS (10) +#define NVM_CH_BITS (8) + +struct ppa_addr { + union { + /* Channel-based PPA format in nand 4x2x2x2x8x10 */ + struct { + sector_t ch : 4; + sector_t sec : 2; /* 4 sectors per page */ + sector_t pl : 2; /* 4 planes per LUN */ + sector_t lun : 2; /* 4 LUNs per channel */ + sector_t pg : 8; /* 256 pages per block */ + sector_t blk : 10;/* 1024 blocks per plane */ + sector_t resved : 36; + } chnl; + + /* Generic structure for all addresses */ + struct { + sector_t sec : NVM_SEC_BITS; + sector_t pl : NVM_PL_BITS; + sector_t pg : NVM_PG_BITS; + sector_t blk : NVM_BLK_BITS; + sector_t lun : NVM_LUN_BITS; + sector_t ch : NVM_CH_BITS; + } g; + + sector_t ppa; + }; +} __packed; + +struct nvm_rq { + struct nvm_tgt_instance *ins; + struct nvm_dev *dev; + + struct bio *bio; + + union { + struct ppa_addr ppa_addr; + dma_addr_t dma_ppa_list; + }; + + struct ppa_addr *ppa_list; + + void *metadata; + dma_addr_t dma_metadata; + + uint8_t opcode; + uint16_t nr_pages; + uint16_t flags; +}; + +static inline struct nvm_rq *nvm_rq_from_pdu(void *pdu) +{ + return pdu - sizeof(struct nvm_rq); +} + +static inline void *nvm_rq_to_pdu(struct nvm_rq *rqdata) +{ + return rqdata + 1; +} + +struct nvm_block; + +typedef int (nvm_l2p_update_fn)(u64, u32, __le64 *, void *); +typedef int (nvm_bb_update_fn)(u32, void *, unsigned int, void *); +typedef int (nvm_id_fn)(struct request_queue *, struct nvm_id *); +typedef int (nvm_get_l2p_tbl_fn)(struct request_queue *, u64, u32, + nvm_l2p_update_fn *, void *); +typedef int (nvm_op_bb_tbl_fn)(struct request_queue *, int, unsigned int, + nvm_bb_update_fn *, void *); +typedef int (nvm_op_set_bb_fn)(struct request_queue *, struct nvm_rq *, int); +typedef int (nvm_submit_io_fn)(struct request_queue *, struct nvm_rq *); +typedef int (nvm_erase_blk_fn)(struct request_queue *, struct nvm_rq *); +typedef void *(nvm_create_dma_pool_fn)(struct request_queue *, char *); +typedef void (nvm_destroy_dma_pool_fn)(void *); +typedef void *(nvm_dev_dma_alloc_fn)(struct request_queue *, void *, gfp_t, + dma_addr_t *); +typedef void (nvm_dev_dma_free_fn)(void *, void*, dma_addr_t); + +struct nvm_dev_ops { + nvm_id_fn *identity; + nvm_get_l2p_tbl_fn *get_l2p_tbl; + nvm_op_bb_tbl_fn *get_bb_tbl; + nvm_op_set_bb_fn *set_bb; + + nvm_submit_io_fn *submit_io; + nvm_erase_blk_fn *erase_block; + + nvm_create_dma_pool_fn *create_dma_pool; + nvm_destroy_dma_pool_fn *destroy_dma_pool; + nvm_dev_dma_alloc_fn *dev_dma_alloc; + nvm_dev_dma_free_fn *dev_dma_free; + + uint8_t max_phys_sect; +}; + +struct nvm_lun { + int id; + + int lun_id; + int chnl_id; + + unsigned int nr_free_blocks; /* Number of unused blocks */ + struct nvm_block *blocks; + + spinlock_t lock; +}; + +struct nvm_block { + struct list_head list; + struct nvm_lun *lun; + unsigned long id; + + void *priv; + int type; +}; + +struct nvm_dev { + struct nvm_dev_ops *ops; + + struct list_head devices; + struct list_head online_targets; + + /* Media manager */ + struct nvmm_type *mt; + void *mp; + + /* Device information */ + int nr_chnls; + int nr_planes; + int luns_per_chnl; + int sec_per_pg; /* only sectors for a single page */ + int pgs_per_blk; + int blks_per_lun; + int sec_size; + int oob_size; + int addr_mode; + struct nvm_addr_format addr_format; + + /* Calculated/Cached values. These do not reflect the actual usable + * blocks at run-time. + */ + int max_rq_size; + int plane_mode; /* drive device in single, double or quad mode */ + + int sec_per_pl; /* all sectors across planes */ + int sec_per_blk; + int sec_per_lun; + + unsigned long total_pages; + unsigned long total_blocks; + int nr_luns; + unsigned max_pages_per_blk; + + void *ppalist_pool; + + struct nvm_id identity; + + /* Backend device */ + struct request_queue *q; + char name[DISK_NAME_LEN]; +}; + +/* fallback conversion */ +static struct ppa_addr __generic_to_linear_addr(struct nvm_dev *dev, + struct ppa_addr r) +{ + struct ppa_addr l; + + l.ppa = r.g.sec + + r.g.pg * dev->sec_per_pg + + r.g.blk * (dev->pgs_per_blk * + dev->sec_per_pg) + + r.g.lun * (dev->blks_per_lun * + dev->pgs_per_blk * + dev->sec_per_pg) + + r.g.ch * (dev->blks_per_lun * + dev->pgs_per_blk * + dev->luns_per_chnl * + dev->sec_per_pg); + + return l; +} + +/* fallback conversion */ +static struct ppa_addr __linear_to_generic_addr(struct nvm_dev *dev, + struct ppa_addr r) +{ + struct ppa_addr l; + int secs, pgs, blks, luns; + sector_t ppa = r.ppa; + + l.ppa = 0; + + div_u64_rem(ppa, dev->sec_per_pg, &secs); + l.g.sec = secs; + + sector_div(ppa, dev->sec_per_pg); + div_u64_rem(ppa, dev->sec_per_blk, &pgs); + l.g.pg = pgs; + + sector_div(ppa, dev->pgs_per_blk); + div_u64_rem(ppa, dev->blks_per_lun, &blks); + l.g.blk = blks; + + sector_div(ppa, dev->blks_per_lun); + div_u64_rem(ppa, dev->luns_per_chnl, &luns); + l.g.lun = luns; + + sector_div(ppa, dev->luns_per_chnl); + l.g.ch = ppa; + + return l; +} + +static struct ppa_addr __generic_to_chnl_addr(struct ppa_addr r) +{ + struct ppa_addr l; + + l.ppa = 0; + + l.chnl.sec = r.g.sec; + l.chnl.pl = r.g.pl; + l.chnl.pg = r.g.pg; + l.chnl.blk = r.g.blk; + l.chnl.lun = r.g.lun; + l.chnl.ch = r.g.ch; + + return l; +} + +static struct ppa_addr __chnl_to_generic_addr(struct ppa_addr r) +{ + struct ppa_addr l; + + l.ppa = 0; + + l.g.sec = r.chnl.sec; + l.g.pl = r.chnl.pl; + l.g.pg = r.chnl.pg; + l.g.blk = r.chnl.blk; + l.g.lun = r.chnl.lun; + l.g.ch = r.chnl.ch; + + return l; +} + +static inline struct ppa_addr addr_to_generic_mode(struct nvm_dev *dev, + struct ppa_addr gppa) +{ + switch (dev->addr_mode) { + case NVM_ADDRMODE_LINEAR: + return __linear_to_generic_addr(dev, gppa); + case NVM_ADDRMODE_CHANNEL: + return __chnl_to_generic_addr(gppa); + default: + BUG(); + } + return gppa; +} + +static inline struct ppa_addr generic_to_addr_mode(struct nvm_dev *dev, + struct ppa_addr gppa) +{ + switch (dev->addr_mode) { + case NVM_ADDRMODE_LINEAR: + return __generic_to_linear_addr(dev, gppa); + case NVM_ADDRMODE_CHANNEL: + return __generic_to_chnl_addr(gppa); + default: + BUG(); + } + return gppa; +} + +static inline int ppa_empty(struct ppa_addr ppa_addr) +{ + return (ppa_addr.ppa == ADDR_EMPTY); +} + +static inline void ppa_set_empty(struct ppa_addr *ppa_addr) +{ + ppa_addr->ppa = ADDR_EMPTY; +} + +static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev, + struct nvm_block *blk) +{ + struct ppa_addr ppa; + struct nvm_lun *lun = blk->lun; + + ppa.ppa = 0; + ppa.g.blk = blk->id % dev->blks_per_lun; + ppa.g.lun = lun->lun_id; + ppa.g.ch = lun->chnl_id; + + return ppa; +} + +typedef void (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); +typedef sector_t (nvm_tgt_capacity_fn)(void *); +typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int); +typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int); +typedef void (nvm_tgt_exit_fn)(void *); + +struct nvm_tgt_type { + const char *name; + unsigned int version[3]; + + /* target entry points */ + nvm_tgt_make_rq_fn *make_rq; + nvm_tgt_capacity_fn *capacity; + nvm_tgt_end_io_fn *end_io; + + /* module-specific init/teardown */ + nvm_tgt_init_fn *init; + nvm_tgt_exit_fn *exit; + + /* For internal use */ + struct list_head list; +}; + +extern int nvm_register_target(struct nvm_tgt_type *); +extern void nvm_unregister_target(struct nvm_tgt_type *); + +extern void *nvm_dev_dma_alloc(struct nvm_dev *, gfp_t, dma_addr_t *); +extern void nvm_dev_dma_free(struct nvm_dev *, void *, dma_addr_t); + +typedef int (nvmm_register_fn)(struct nvm_dev *); +typedef void (nvmm_unregister_fn)(struct nvm_dev *); +typedef struct nvm_block *(nvmm_get_blk_fn)(struct nvm_dev *, + struct nvm_lun *, unsigned long); +typedef void (nvmm_put_blk_fn)(struct nvm_dev *, struct nvm_block *); +typedef int (nvmm_open_blk_fn)(struct nvm_dev *, struct nvm_block *); +typedef int (nvmm_close_blk_fn)(struct nvm_dev *, struct nvm_block *); +typedef void (nvmm_flush_blk_fn)(struct nvm_dev *, struct nvm_block *); +typedef int (nvmm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); +typedef int (nvmm_end_io_fn)(struct nvm_rq *, int); +typedef int (nvmm_erase_blk_fn)(struct nvm_dev *, struct nvm_block *, + unsigned long); +typedef struct nvm_lun *(nvmm_get_lun_fn)(struct nvm_dev *, int); +typedef void (nvmm_free_blocks_print_fn)(struct nvm_dev *); + +struct nvmm_type { + const char *name; + unsigned int version[3]; + + nvmm_register_fn *register_mgr; + nvmm_unregister_fn *unregister_mgr; + + /* Block administration callbacks */ + nvmm_get_blk_fn *get_blk; + nvmm_put_blk_fn *put_blk; + nvmm_open_blk_fn *open_blk; + nvmm_close_blk_fn *close_blk; + nvmm_flush_blk_fn *flush_blk; + + nvmm_submit_io_fn *submit_io; + nvmm_end_io_fn *end_io; + nvmm_erase_blk_fn *erase_blk; + + /* Configuration management */ + nvmm_get_lun_fn *get_lun; + + /* Statistics */ + nvmm_free_blocks_print_fn *free_blocks_print; + struct list_head list; +}; + +extern int nvm_register_mgr(struct nvmm_type *); +extern void nvm_unregister_mgr(struct nvmm_type *); + +extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *, + unsigned long); +extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *); + +extern int nvm_register(struct request_queue *, char *, + struct nvm_dev_ops *); +extern void nvm_unregister(char *); + +extern int nvm_submit_io(struct nvm_dev *, struct nvm_rq *); +extern int nvm_erase_blk(struct nvm_dev *, struct nvm_block *); +#else /* CONFIG_NVM */ +struct nvm_dev_ops; + +static inline int nvm_register(struct request_queue *q, char *disk_name, + struct nvm_dev_ops *ops) +{ + return -EINVAL; +} +static inline void nvm_unregister(char *disk_name) {} +#endif /* CONFIG_NVM */ +#endif /* LIGHTNVM.H */ -- cgit v1.2.3 From 1e0d69a9cc9172d7896c2113f983a74f6e8ff303 Mon Sep 17 00:00:00 2001 From: Hannes Frederic Sowa Date: Wed, 28 Oct 2015 13:21:03 +0100 Subject: Revert "Merge branch 'ipv6-overflow-arith'" Linus dislikes these changes. To not hold up the net-merge let's revert it for now and fix the bug like Linus suggested. This reverts commit ec3661b42257d9a06cf0d318175623ac7a660113, reversing changes made to c80dbe04612986fd6104b4a1be21681b113b5ac9. Cc: Linus Torvalds Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/compiler-gcc.h | 4 ---- include/linux/overflow-arith.h | 18 ------------------ 2 files changed, 22 deletions(-) delete mode 100644 include/linux/overflow-arith.h (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 82c159e0532a..dfaa7b3e9ae9 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -237,10 +237,6 @@ #define KASAN_ABI_VERSION 3 #endif -#if GCC_VERSION >= 50000 -#define CC_HAVE_BUILTIN_OVERFLOW -#endif - #endif /* gcc version >= 40000 specific checks */ #if !defined(__noclone) diff --git a/include/linux/overflow-arith.h b/include/linux/overflow-arith.h deleted file mode 100644 index e12ccf854a70..000000000000 --- a/include/linux/overflow-arith.h +++ /dev/null @@ -1,18 +0,0 @@ -#pragma once - -#include - -#ifdef CC_HAVE_BUILTIN_OVERFLOW - -#define overflow_usub __builtin_usub_overflow - -#else - -static inline bool overflow_usub(unsigned int a, unsigned int b, - unsigned int *res) -{ - *res = a - b; - return *res > a ? true : false; -} - -#endif -- cgit v1.2.3 From b7c08cf85c9a3a4b05474b7acacc9fbce8fb3eaf Mon Sep 17 00:00:00 2001 From: Jarkko Nikula Date: Wed, 28 Oct 2015 15:13:42 +0200 Subject: spi: pxa2xx: Add support for Intel Broxton LPSS SPI in Intel Broxton is otherwise the same than in Intel Sunrisepoint but it supports up to four chip selects per port and has different FIFO thresholds. Patch adds support for two Broxton SoC variants. Signed-off-by: Jarkko Nikula Signed-off-by: Mark Brown --- include/linux/pxa2xx_ssp.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pxa2xx_ssp.h b/include/linux/pxa2xx_ssp.h index 92273776bce6..c2f2574ff61c 100644 --- a/include/linux/pxa2xx_ssp.h +++ b/include/linux/pxa2xx_ssp.h @@ -198,6 +198,7 @@ enum pxa_ssp_type { LPSS_LPT_SSP, /* Keep LPSS types sorted with lpss_platforms[] */ LPSS_BYT_SSP, LPSS_SPT_SSP, + LPSS_BXT_SSP, }; struct ssp_device { -- cgit v1.2.3 From a519435a96597d8cd96123246fea4ae5a6c90b02 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Tue, 20 Oct 2015 16:34:16 +0200 Subject: dma-buf/fence: add fence_wait_any_timeout function v2 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Waiting for the first fence in an array of fences to signal. This is useful for device driver specific resource managers and also Vulkan needs something similar. v2: more parameter checks, handling for timeout==0, remove NULL entry support, better callback removal. Signed-off-by: Christian König Reviewed-by: Alex Deucher Reviewed-by: Maarten Lankhorst --- include/linux/fence.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fence.h b/include/linux/fence.h index 39efee130d2b..a4084d6bb851 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -305,7 +305,8 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2) } signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout); - +signed long fence_wait_any_timeout(struct fence **fences, uint32_t count, + bool intr, signed long timeout); /** * fence_wait - sleep until the fence gets signaled -- cgit v1.2.3 From 6c455ac17bcf4beae6c094a1007b976b60b4bb57 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Christian=20K=C3=B6nig?= Date: Wed, 21 Oct 2015 12:58:17 +0200 Subject: dma-buf/fence: add fence_is_later() MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Return true when fence 1 is later than fence 2 without checking if any of them are signaled. Useful for driver specific resource handling based on fences. Signed-off-by: Christian König Reviewed-by: Alex Deucher --- include/linux/fence.h | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fence.h b/include/linux/fence.h index a4084d6bb851..bb522011383b 100644 --- a/include/linux/fence.h +++ b/include/linux/fence.h @@ -279,6 +279,22 @@ fence_is_signaled(struct fence *fence) return false; } +/** + * fence_is_later - return if f1 is chronologically later than f2 + * @f1: [in] the first fence from the same context + * @f2: [in] the second fence from the same context + * + * Returns true if f1 is chronologically later than f2. Both fences must be + * from the same context, since a seqno is not re-used across contexts. + */ +static inline bool fence_is_later(struct fence *f1, struct fence *f2) +{ + if (WARN_ON(f1->context != f2->context)) + return false; + + return f1->seqno - f2->seqno < INT_MAX; +} + /** * fence_later - return the chronologically later fence * @f1: [in] the first fence from the same context @@ -298,10 +314,10 @@ static inline struct fence *fence_later(struct fence *f1, struct fence *f2) * set if enable_signaling wasn't called, and enabling that here is * overkill. */ - if (f2->seqno - f1->seqno <= INT_MAX) - return fence_is_signaled(f2) ? NULL : f2; - else + if (fence_is_later(f1, f2)) return fence_is_signaled(f1) ? NULL : f1; + else + return fence_is_signaled(f2) ? NULL : f2; } signed long fence_wait_timeout(struct fence *, bool intr, signed long timeout); -- cgit v1.2.3 From d9e4ad5badf4ccbfddee208c898fb8fd0c8836b1 Mon Sep 17 00:00:00 2001 From: David Woodhouse Date: Wed, 28 Oct 2015 16:14:31 +0900 Subject: Document that IRQ_NONE should be returned when IRQ not actually handled MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Our IRQ storm detection works when an interrupt handler returns IRQ_NONE for thousands of consecutive interrupts in a second. It doesn't hurt to occasionally return IRQ_NONE when the interrupt is actually genuine. Drivers should only be returning IRQ_HANDLED if they have actually *done* something to stop an interrupt from happening — it doesn't just mean "this really *was* my device". Signed-off-by: David Woodhouse Cc: davem@davemloft.net Link: http://lkml.kernel.org/r/1446016471.3405.201.camel@infradead.org Signed-off-by: Thomas Gleixner --- include/linux/irqreturn.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/irqreturn.h b/include/linux/irqreturn.h index e374e369fb2f..eb1bdcf95f2e 100644 --- a/include/linux/irqreturn.h +++ b/include/linux/irqreturn.h @@ -3,7 +3,7 @@ /** * enum irqreturn - * @IRQ_NONE interrupt was not from this device + * @IRQ_NONE interrupt was not from this device or was not handled * @IRQ_HANDLED interrupt was handled by this device * @IRQ_WAKE_THREAD handler requests to wake the handler thread */ -- cgit v1.2.3 From 6bb3b3acc3d096b938887c90f4bb19a639b99852 Mon Sep 17 00:00:00 2001 From: Qipeng Zha Date: Tue, 15 Sep 2015 00:39:18 +0800 Subject: mfd: intel_soc_pmic: Add support for Broxton WC PMIC IRQ control registers of Intel Broxton Whisky Cove PMIC are separated in two parts, so add secondary IRQ chip. And the new member of device will be used in PMC IPC regmap APIs. Signed-off-by: Qipeng Zha Signed-off-by: Lee Jones --- include/linux/mfd/intel_soc_pmic.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/intel_soc_pmic.h b/include/linux/mfd/intel_soc_pmic.h index abcbfcf32d10..cf619dbeace2 100644 --- a/include/linux/mfd/intel_soc_pmic.h +++ b/include/linux/mfd/intel_soc_pmic.h @@ -25,6 +25,8 @@ struct intel_soc_pmic { int irq; struct regmap *regmap; struct regmap_irq_chip_data *irq_chip_data; + struct regmap_irq_chip_data *irq_chip_data_level2; + struct device *dev; }; #endif /* __INTEL_SOC_PMIC_H__ */ -- cgit v1.2.3 From 39d047c0b1c812e9f0014e7100e372e61f2de3de Mon Sep 17 00:00:00 2001 From: Qipeng Zha Date: Tue, 15 Sep 2015 00:39:19 +0800 Subject: mfd: add Intel Broxton Whiskey Cove PMIC driver Add MFD core driver for Intel Broxton Whiskey Cove PMIC, which is specially accessed by hardware IPC, not a generic I2C device Signed-off-by: Qipeng Zha Signed-off-by: Lee Jones --- include/linux/mfd/intel_bxtwc.h | 69 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 69 insertions(+) create mode 100644 include/linux/mfd/intel_bxtwc.h (limited to 'include/linux') diff --git a/include/linux/mfd/intel_bxtwc.h b/include/linux/mfd/intel_bxtwc.h new file mode 100644 index 000000000000..1a0ee9d6efe9 --- /dev/null +++ b/include/linux/mfd/intel_bxtwc.h @@ -0,0 +1,69 @@ +/* + * intel_bxtwc.h - Header file for Intel Broxton Whiskey Cove PMIC + * + * Copyright (C) 2015 Intel Corporation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms and conditions of the GNU General Public License, + * version 2, as published by the Free Software Foundation. + * + * This program is distributed in the hope it will be useful, but WITHOUT + * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or + * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for + * more details. + */ + +#include + +#ifndef __INTEL_BXTWC_H__ +#define __INTEL_BXTWC_H__ + +/* BXT WC devices */ +#define BXTWC_DEVICE1_ADDR 0x4E +#define BXTWC_DEVICE2_ADDR 0x4F +#define BXTWC_DEVICE3_ADDR 0x5E + +/* device1 Registers */ +#define BXTWC_CHIPID 0x4E00 +#define BXTWC_CHIPVER 0x4E01 + +#define BXTWC_SCHGRIRQ0_ADDR 0x5E1A +#define BXTWC_CHGRCTRL0_ADDR 0x5E16 +#define BXTWC_CHGRCTRL1_ADDR 0x5E17 +#define BXTWC_CHGRCTRL2_ADDR 0x5E18 +#define BXTWC_CHGRSTATUS_ADDR 0x5E19 +#define BXTWC_THRMBATZONE_ADDR 0x4F22 + +#define BXTWC_USBPATH_ADDR 0x5E19 +#define BXTWC_USBPHYCTRL_ADDR 0x5E07 +#define BXTWC_USBIDCTRL_ADDR 0x5E05 +#define BXTWC_USBIDEN_MASK 0x01 +#define BXTWC_USBIDSTAT_ADDR 0x00FF +#define BXTWC_USBSRCDETSTATUS_ADDR 0x5E29 + +#define BXTWC_DBGUSBBC1_ADDR 0x5FE0 +#define BXTWC_DBGUSBBC2_ADDR 0x5FE1 +#define BXTWC_DBGUSBBCSTAT_ADDR 0x5FE2 + +#define BXTWC_WAKESRC_ADDR 0x4E22 +#define BXTWC_WAKESRC2_ADDR 0x4EE5 +#define BXTWC_CHRTTADDR_ADDR 0x5E22 +#define BXTWC_CHRTTDATA_ADDR 0x5E23 + +#define BXTWC_STHRMIRQ0_ADDR 0x4F19 +#define WC_MTHRMIRQ1_ADDR 0x4E12 +#define WC_STHRMIRQ1_ADDR 0x4F1A +#define WC_STHRMIRQ2_ADDR 0x4F1B + +#define BXTWC_THRMZN0H_ADDR 0x4F44 +#define BXTWC_THRMZN0L_ADDR 0x4F45 +#define BXTWC_THRMZN1H_ADDR 0x4F46 +#define BXTWC_THRMZN1L_ADDR 0x4F47 +#define BXTWC_THRMZN2H_ADDR 0x4F48 +#define BXTWC_THRMZN2L_ADDR 0x4F49 +#define BXTWC_THRMZN3H_ADDR 0x4F4A +#define BXTWC_THRMZN3L_ADDR 0x4F4B +#define BXTWC_THRMZN4H_ADDR 0x4F4C +#define BXTWC_THRMZN4L_ADDR 0x4F4D + +#endif -- cgit v1.2.3 From 0386af30d3d99d942dd68a8c64beb4f03958e74f Mon Sep 17 00:00:00 2001 From: Steve Twiss Date: Thu, 8 Oct 2015 16:17:51 +0100 Subject: mfd: da9053: Addition of extra registers for GPIOs 8-13 Definitions for GPIO registers 8, 9, 10, 11, 12 and 13 are added into the register header file. - DA9052_GPIO_8_9_REG 25 - DA9052_GPIO_10_11_REG 26 - DA9052_GPIO_12_13_REG 27 A modification is also made to the MFD core code to define these registers as readable and writable. The functions for da9052_reg_readable() and da9052_reg_writeable() have had their case statements altered to include these new registers. Signed-off-by: Steve Twiss Signed-off-by: Lee Jones --- include/linux/mfd/da9052/reg.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/da9052/reg.h b/include/linux/mfd/da9052/reg.h index c4dd3a8add21..5010f978725c 100644 --- a/include/linux/mfd/da9052/reg.h +++ b/include/linux/mfd/da9052/reg.h @@ -65,6 +65,9 @@ #define DA9052_GPIO_2_3_REG 22 #define DA9052_GPIO_4_5_REG 23 #define DA9052_GPIO_6_7_REG 24 +#define DA9052_GPIO_8_9_REG 25 +#define DA9052_GPIO_10_11_REG 26 +#define DA9052_GPIO_12_13_REG 27 #define DA9052_GPIO_14_15_REG 28 /* POWER SEQUENCER CONTROL REGISTERS */ -- cgit v1.2.3 From ce6a5acc93876f619f32f8f60c7c6e549e46d962 Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Mon, 29 Jun 2015 09:19:39 +0800 Subject: mfd: rtsx: Add support for rts522A rts522a(rts5227s) is derived from rts5227, and mainly same with rts5227. Add it to file mfd/rts5227.c to support this chip. Signed-off-by: Micky Ching Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_pci.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index ff843e7ca23d..7eb7cbac0a9a 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -589,6 +589,7 @@ #define FORCE_ASPM_NO_ASPM 0x00 #define PM_CLK_FORCE_CTL 0xFE58 #define FUNC_FORCE_CTL 0xFE59 +#define FUNC_FORCE_UPME_XMT_DBG 0x02 #define PERST_GLITCH_WIDTH 0xFE5C #define CHANGE_LINK_STATE 0xFE5B #define RESET_LOAD_REG 0xFE5E @@ -712,6 +713,7 @@ #define PHY_RCR1 0x02 #define PHY_RCR1_ADP_TIME_4 0x0400 #define PHY_RCR1_VCO_COARSE 0x001F +#define PHY_RCR1_INIT_27S 0x0A1F #define PHY_SSCCR2 0x02 #define PHY_SSCCR2_PLL_NCODE 0x0A00 #define PHY_SSCCR2_TIME0 0x001C @@ -724,6 +726,7 @@ #define PHY_RCR2_FREQSEL_12 0x0040 #define PHY_RCR2_CDR_SC_12P 0x0010 #define PHY_RCR2_CALIB_LATE 0x0002 +#define PHY_RCR2_INIT_27S 0xC152 #define PHY_SSCCR3 0x03 #define PHY_SSCCR3_STEP_IN 0x2740 #define PHY_SSCCR3_CHECK_DELAY 0x0008 @@ -800,12 +803,14 @@ #define PHY_ANA1A_RXT_BIST 0x0500 #define PHY_ANA1A_TXR_BIST 0x0040 #define PHY_ANA1A_REV 0x0006 +#define PHY_FLD0_INIT_27S 0x2546 #define PHY_FLD1 0x1B #define PHY_FLD2 0x1C #define PHY_FLD3 0x1D #define PHY_FLD3_TIMER_4 0x0800 #define PHY_FLD3_TIMER_6 0x0020 #define PHY_FLD3_RXDELINK 0x0004 +#define PHY_FLD3_INIT_27S 0x0004 #define PHY_ANA1D 0x1D #define PHY_ANA1D_DEBUG_ADDR 0x0004 #define _PHY_FLD0 0x1D @@ -824,6 +829,7 @@ #define PHY_FLD4_BER_COUNT 0x00E0 #define PHY_FLD4_BER_TIMER 0x000A #define PHY_FLD4_BER_CHK_EN 0x0001 +#define PHY_FLD4_INIT_27S 0x5C7F #define PHY_DIG1E 0x1E #define PHY_DIG1E_REV 0x4000 #define PHY_DIG1E_D0_X_D1 0x1000 -- cgit v1.2.3 From 8a97d4287e2659f3460a8dec61ccc935154726c0 Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Wed, 7 Oct 2015 09:44:41 +0900 Subject: mfd: sec-core: Disable buck voltage reset on watchdog falling edge The WRSTBI bit (disabled by default but enabled by bootloader), when set, is responsible for resetting voltages to default values of certain bucks on falling edge of Warm Reset Input pin from AP. However on some boards (with S2MPS13) the pin is pulled down so any suspend will effectively trigger the reset of bucks supplying the power to the little and big cores. In the same time when resuming, these bucks must provide voltage greater or equal to voltage before suspend to match the frequency chosen by cpufreq. If voltage (default value of voltage after reset) is lower than one set by cpufreq before suspend, then system will hang during resuming. Signed-off-by: Krzysztof Kozlowski Reported-by: Bartlomiej Zolnierkiewicz Tested-by: Bartlomiej Zolnierkiewicz Signed-off-by: Lee Jones --- include/linux/mfd/samsung/core.h | 2 ++ include/linux/mfd/samsung/s2mps13.h | 1 + 2 files changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/samsung/core.h b/include/linux/mfd/samsung/core.h index aa78957e092f..a06098639399 100644 --- a/include/linux/mfd/samsung/core.h +++ b/include/linux/mfd/samsung/core.h @@ -134,6 +134,8 @@ struct sec_platform_data { int buck4_init; /* Whether or not manually set PWRHOLD to low during shutdown. */ bool manual_poweroff; + /* Disable the WRSTBI (buck voltage warm reset) when probing? */ + bool disable_wrstbi; }; /** diff --git a/include/linux/mfd/samsung/s2mps13.h b/include/linux/mfd/samsung/s2mps13.h index b1fd675fa36f..239e977ba45d 100644 --- a/include/linux/mfd/samsung/s2mps13.h +++ b/include/linux/mfd/samsung/s2mps13.h @@ -184,5 +184,6 @@ enum s2mps13_regulators { * Let's assume that default value will be set. */ #define S2MPS13_BUCK_RAMP_DELAY 12500 +#define S2MPS13_REG_WRSTBI_MASK BIT(5) #endif /* __LINUX_MFD_S2MPS13_H */ -- cgit v1.2.3 From a76caf55e5b356ba20a5a43ac4d9f7a04b20941d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?=C3=98rjan=20Eide?= Date: Thu, 10 Sep 2015 18:09:30 +0100 Subject: thermal: Add devfreq cooling MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add a generic thermal cooling device for devfreq, that is similar to cpu_cooling. The device must use devfreq. In order to use the power extension of the cooling device, it must have registered its OPPs using the OPP library. Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Javi Merino Signed-off-by: Ørjan Eide Signed-off-by: Eduardo Valentin --- include/linux/devfreq_cooling.h | 81 +++++++++++++++++++++++++++++++++++++++++ 1 file changed, 81 insertions(+) create mode 100644 include/linux/devfreq_cooling.h (limited to 'include/linux') diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h new file mode 100644 index 000000000000..ee5f0ec9290b --- /dev/null +++ b/include/linux/devfreq_cooling.h @@ -0,0 +1,81 @@ +/* + * devfreq_cooling: Thermal cooling device implementation for devices using + * devfreq + * + * Copyright (C) 2014-2015 ARM Limited + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#ifndef __DEVFREQ_COOLING_H__ +#define __DEVFREQ_COOLING_H__ + +#include +#include + +#ifdef CONFIG_DEVFREQ_THERMAL + +/** + * struct devfreq_cooling_power - Devfreq cooling power ops + * @get_static_power: Take voltage, in mV, and return the static power + * in mW. If NULL, the static power is assumed + * to be 0. + * @get_dynamic_power: Take voltage, in mV, and frequency, in HZ, and + * return the dynamic power draw in mW. If NULL, + * a simple power model is used. + * @dyn_power_coeff: Coefficient for the simple dynamic power model in + * mW/(MHz mV mV). + * If get_dynamic_power() is NULL, then the + * dynamic power is calculated as + * @dyn_power_coeff * frequency * voltage^2 + */ +struct devfreq_cooling_power { + unsigned long (*get_static_power)(unsigned long voltage); + unsigned long (*get_dynamic_power)(unsigned long freq, + unsigned long voltage); + unsigned long dyn_power_coeff; +}; + +struct devfreq_cooling_device * +of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, + struct devfreq_cooling_power *dfc_power); +struct devfreq_cooling_device * +of_devfreq_cooling_register(struct device_node *np, struct devfreq *df); +struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df); +void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc); + +#else /* !CONFIG_DEVFREQ_THERMAL */ + +struct devfreq_cooling_device * +of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, + struct devfreq_cooling_power *dfc_power) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct devfreq_cooling_device * +of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) +{ + return ERR_PTR(-EINVAL); +} + +static inline struct devfreq_cooling_device * +devfreq_cooling_register(struct devfreq *df) +{ + return ERR_PTR(-EINVAL); +} + +static inline void +devfreq_cooling_unregister(struct devfreq_cooling_device *dfc) +{ +} + +#endif /* CONFIG_DEVFREQ_THERMAL */ +#endif /* __DEVFREQ_COOLING_H__ */ -- cgit v1.2.3 From df5c7386f62d2db95ca48005087195e9a15e2b1f Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 13 Oct 2015 20:09:19 +0300 Subject: dmaengine: dw: some Intel devices has no memcpy support Provide a flag to choose if the device does support memory-to-memory transfers. At least this is not true for iDMA32 controller that might be supported in the future. Besides that Intel BayTrail and Braswell users should not try this feature due to HW specific behaviour. Signed-off-by: Andy Shevchenko Acked-by: Viresh Kumar Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-dw.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-dw.h b/include/linux/platform_data/dma-dw.h index 87ac14c584f2..03b6095d3b18 100644 --- a/include/linux/platform_data/dma-dw.h +++ b/include/linux/platform_data/dma-dw.h @@ -37,6 +37,7 @@ struct dw_dma_slave { * @nr_channels: Number of channels supported by hardware (max 8) * @is_private: The device channels should be marked as private and not for * by the general purpose DMA channel allocator. + * @is_memcpy: The device channels do support memory-to-memory transfers. * @chan_allocation_order: Allocate channels starting from 0 or 7 * @chan_priority: Set channel priority increasing from 0 to 7 or 7 to 0. * @block_size: Maximum block size supported by the controller @@ -47,6 +48,7 @@ struct dw_dma_slave { struct dw_dma_platform_data { unsigned int nr_channels; bool is_private; + bool is_memcpy; #define CHAN_ALLOCATION_ASCENDING 0 /* zero to seven */ #define CHAN_ALLOCATION_DESCENDING 1 /* seven to zero */ unsigned char chan_allocation_order; -- cgit v1.2.3 From e56f81e0b01ef4e45292d8c1e19edd4d09724e14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Oct 2015 14:10:50 +0200 Subject: dm: refactor ioctl handling This moves the call to blkdev_ioctl and the argument checking to DM core code, and only leaves a callout to find the block device to operate on in the targets. This simplifies the code and allows us to pass through ioctl-like command using other methods in the next patch. Also split out a helper around calling the prepare_ioctl method that will be reused for persistent reservation handling. Signed-off-by: Christoph Hellwig Signed-off-by: Mike Snitzer --- include/linux/device-mapper.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 76d23fa8c7d3..ec1c61c87d89 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -79,8 +79,8 @@ typedef void (*dm_status_fn) (struct dm_target *ti, status_type_t status_type, typedef int (*dm_message_fn) (struct dm_target *ti, unsigned argc, char **argv); -typedef int (*dm_ioctl_fn) (struct dm_target *ti, unsigned int cmd, - unsigned long arg); +typedef int (*dm_prepare_ioctl_fn) (struct dm_target *ti, + struct block_device **bdev, fmode_t *mode); /* * These iteration functions are typically used to check (and combine) @@ -156,7 +156,7 @@ struct target_type { dm_resume_fn resume; dm_status_fn status; dm_message_fn message; - dm_ioctl_fn ioctl; + dm_prepare_ioctl_fn prepare_ioctl; dm_busy_fn busy; dm_iterate_devices_fn iterate_devices; dm_io_hints_fn io_hints; -- cgit v1.2.3 From f3f86e33dc3da437fa4f204588ce7c78ea756982 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Fri, 30 Oct 2015 16:53:57 -0700 Subject: vfs: Fix pathological performance case for __alloc_fd() Al Viro points out that: > > * [Linux-specific aside] our __alloc_fd() can degrade quite badly > > with some use patterns. The cacheline pingpong in the bitmap is probably > > inevitable, unless we accept considerably heavier memory footprint, > > but we also have a case when alloc_fd() takes O(n) and it's _not_ hard > > to trigger - close(3);open(...); will have the next open() after that > > scanning the entire in-use bitmap. And Eric Dumazet has a somewhat realistic multithreaded microbenchmark that opens and closes a lot of sockets with minimal work per socket. This patch largely fixes it. We keep a 2nd-level bitmap of the open file bitmaps, showing which words are already full. So then we can traverse that second-level bitmap to efficiently skip already allocated file descriptors. On his benchmark, this improves performance by up to an order of magnitude, by avoiding the excessive open file bitmap scanning. Tested-and-acked-by: Eric Dumazet Cc: Al Viro Signed-off-by: Linus Torvalds --- include/linux/fdtable.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 674e3e226465..5295535b60c6 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -26,6 +26,7 @@ struct fdtable { struct file __rcu **fd; /* current fd array */ unsigned long *close_on_exec; unsigned long *open_fds; + unsigned long *full_fds_bits; struct rcu_head rcu; }; @@ -59,6 +60,7 @@ struct files_struct { int next_fd; unsigned long close_on_exec_init[1]; unsigned long open_fds_init[1]; + unsigned long full_fds_bits_init[1]; struct file __rcu * fd_array[NR_OPEN_DEFAULT]; }; -- cgit v1.2.3 From a5ad88ce8c7fae7ddc72ee49a11a75aa837788e0 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 1 Nov 2015 17:09:15 -0800 Subject: mm: get rid of 'vmalloc_info' from /proc/meminfo It turns out that at least some versions of glibc end up reading /proc/meminfo at every single startup, because glibc wants to know the amount of memory the machine has. And while that's arguably insane, it's just how things are. And it turns out that it's not all that expensive most of the time, but the vmalloc information statistics (amount of virtual memory used in the vmalloc space, and the biggest remaining chunk) can be rather expensive to compute. The 'get_vmalloc_info()' function actually showed up on my profiles as 4% of the CPU usage of "make test" in the git source repository, because the git tests are lots of very short-lived shell-scripts etc. It turns out that apparently this same silly vmalloc info gathering shows up on the facebook servers too, according to Dave Jones. So it's not just "make test" for git. We had two patches to just cache the information (one by me, one by Ingo) to mitigate this issue, but the whole vmalloc information of of rather dubious value to begin with, and people who *actually* want to know what the situation is wrt the vmalloc area should just look at the much more complete /proc/vmallocinfo instead. In fact, according to my testing - and perhaps more importantly, according to that big search engine in the sky: Google - there is nothing out there that actually cares about those two expensive fields: VmallocUsed and VmallocChunk. So let's try to just remove them entirely. Actually, this just removes the computation and reports the numbers as zero for now, just to try to be minimally intrusive. If this breaks anything, we'll obviously have to re-introduce the code to compute this all and add the caching patches on top. But if given the option, I'd really prefer to just remove this bad idea entirely rather than add even more code to work around our historical mistake that likely nobody really cares about. Signed-off-by: Linus Torvalds --- include/linux/vmalloc.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmalloc.h b/include/linux/vmalloc.h index 0ec598381f97..3bff87a25a42 100644 --- a/include/linux/vmalloc.h +++ b/include/linux/vmalloc.h @@ -182,22 +182,10 @@ pcpu_free_vm_areas(struct vm_struct **vms, int nr_vms) # endif #endif -struct vmalloc_info { - unsigned long used; - unsigned long largest_chunk; -}; - #ifdef CONFIG_MMU #define VMALLOC_TOTAL (VMALLOC_END - VMALLOC_START) -extern void get_vmalloc_info(struct vmalloc_info *vmi); #else - #define VMALLOC_TOTAL 0UL -#define get_vmalloc_info(vmi) \ -do { \ - (vmi)->used = 0; \ - (vmi)->largest_chunk = 0; \ -} while (0) #endif #endif /* _LINUX_VMALLOC_H */ -- cgit v1.2.3 From 42e5c3e2725ba0c0affc1fc8a6aa1d5cf31ecb75 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 24 Oct 2015 17:27:35 -0400 Subject: SUNRPC: Abstract backchannel operations xprt_{setup,destroy}_backchannel() won't be adequate for RPC/RMDA bi-direction. In particular, receive buffers have to be pre- registered and posted in order to receive incoming backchannel requests. Add a virtual function call to allow the insertion of appropriate backchannel setup and destruction methods for each transport. In addition, freeing a backchannel request is a little different for RPC/RDMA. Introduce an rpc_xprt_op to handle the difference. Signed-off-by: Chuck Lever Reviewed-by: Sagi Grimberg Tested-By: Devesh Sharma Signed-off-by: Anna Schumaker --- include/linux/sunrpc/bc_xprt.h | 5 +++++ include/linux/sunrpc/xprt.h | 5 +++++ 2 files changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 8df43c9f11dc..4397a4824c81 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -38,6 +38,11 @@ void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); +/* Socket backchannel transport methods */ +int xprt_setup_bc(struct rpc_xprt *xprt, unsigned int min_reqs); +void xprt_destroy_bc(struct rpc_xprt *xprt, unsigned int max_reqs); +void xprt_free_bc_rqst(struct rpc_rqst *req); + /* * Determine if a shared backchannel is in use */ diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 0fb9acbb4780..3f79c4a4ce74 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -136,6 +136,11 @@ struct rpc_xprt_ops { int (*enable_swap)(struct rpc_xprt *xprt); void (*disable_swap)(struct rpc_xprt *xprt); void (*inject_disconnect)(struct rpc_xprt *xprt); + int (*bc_setup)(struct rpc_xprt *xprt, + unsigned int min_reqs); + void (*bc_free_rqst)(struct rpc_rqst *rqst); + void (*bc_destroy)(struct rpc_xprt *xprt, + unsigned int max_reqs); }; /* -- cgit v1.2.3 From 9468431962616c2449d47c482208a5967e011bf9 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 24 Oct 2015 17:28:16 -0400 Subject: svcrdma: Add backward direction service for RPC/RDMA transport On NFSv4.1 mount points, the Linux NFS client uses this transport endpoint to receive backward direction calls and route replies back to the NFSv4.1 server. Signed-off-by: Chuck Lever Acked-by: "J. Bruce Fields" Reviewed-by: Sagi Grimberg Tested-By: Devesh Sharma Signed-off-by: Anna Schumaker --- include/linux/sunrpc/svc_rdma.h | 6 +++++- include/linux/sunrpc/xprt.h | 1 + 2 files changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svc_rdma.h b/include/linux/sunrpc/svc_rdma.h index 7ccc961f33e9..fb4013edcf57 100644 --- a/include/linux/sunrpc/svc_rdma.h +++ b/include/linux/sunrpc/svc_rdma.h @@ -228,9 +228,13 @@ extern void svc_rdma_put_frmr(struct svcxprt_rdma *, struct svc_rdma_fastreg_mr *); extern void svc_sq_reap(struct svcxprt_rdma *); extern void svc_rq_reap(struct svcxprt_rdma *); -extern struct svc_xprt_class svc_rdma_class; extern void svc_rdma_prep_reply_hdr(struct svc_rqst *); +extern struct svc_xprt_class svc_rdma_class; +#ifdef CONFIG_SUNRPC_BACKCHANNEL +extern struct svc_xprt_class svc_rdma_bc_class; +#endif + /* svc_rdma.c */ extern int svc_rdma_init(void); extern void svc_rdma_cleanup(void); diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 3f79c4a4ce74..82c083946ef0 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -158,6 +158,7 @@ enum xprt_transports { XPRT_TRANSPORT_TCP = IPPROTO_TCP, XPRT_TRANSPORT_BC_TCP = IPPROTO_TCP | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_RDMA = 256, + XPRT_TRANSPORT_BC_RDMA = XPRT_TRANSPORT_RDMA | XPRT_TRANSPORT_BC, XPRT_TRANSPORT_LOCAL = 257, }; -- cgit v1.2.3 From 3d4e204d81eec30abffe55d01912e07ce81eef12 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Tue, 29 Sep 2015 22:43:32 +0800 Subject: ring_buffer: ring_buffer_empty{cpu}() can return boolean Make ring_buffer_empty() and ring_buffer_empty_cpu() return bool. No functional change. Link: http://lkml.kernel.org/r/1443537816-5788-5-git-send-email-bywxiaobai@163.com Signed-off-by: Yaowei Bai Signed-off-by: Steven Rostedt --- include/linux/ring_buffer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ring_buffer.h b/include/linux/ring_buffer.h index e2c13cd863bd..4acc552e9279 100644 --- a/include/linux/ring_buffer.h +++ b/include/linux/ring_buffer.h @@ -154,8 +154,8 @@ ring_buffer_swap_cpu(struct ring_buffer *buffer_a, } #endif -int ring_buffer_empty(struct ring_buffer *buffer); -int ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); +bool ring_buffer_empty(struct ring_buffer *buffer); +bool ring_buffer_empty_cpu(struct ring_buffer *buffer, int cpu); void ring_buffer_record_disable(struct ring_buffer *buffer); void ring_buffer_record_enable(struct ring_buffer *buffer); -- cgit v1.2.3 From 3c99c2cef75eb5bfc05c5728e4560f3ee656d47e Mon Sep 17 00:00:00 2001 From: Javi Merino Date: Mon, 2 Nov 2015 19:03:03 +0000 Subject: thermal: devfreq_cooling: use a thermal_cooling_device for register and unregister Be consistent with what other cooling devices do and return a struct thermal_cooling_device * on register. Also, for the unregister, accept a struct thermal_cooling_device * as parameter. Cc: Zhang Rui Cc: Eduardo Valentin Signed-off-by: Javi Merino Signed-off-by: Eduardo Valentin --- include/linux/devfreq_cooling.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devfreq_cooling.h b/include/linux/devfreq_cooling.h index ee5f0ec9290b..7adf6cc4b305 100644 --- a/include/linux/devfreq_cooling.h +++ b/include/linux/devfreq_cooling.h @@ -43,37 +43,37 @@ struct devfreq_cooling_power { unsigned long dyn_power_coeff; }; -struct devfreq_cooling_device * +struct thermal_cooling_device * of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, struct devfreq_cooling_power *dfc_power); -struct devfreq_cooling_device * +struct thermal_cooling_device * of_devfreq_cooling_register(struct device_node *np, struct devfreq *df); -struct devfreq_cooling_device *devfreq_cooling_register(struct devfreq *df); -void devfreq_cooling_unregister(struct devfreq_cooling_device *dfc); +struct thermal_cooling_device *devfreq_cooling_register(struct devfreq *df); +void devfreq_cooling_unregister(struct thermal_cooling_device *dfc); #else /* !CONFIG_DEVFREQ_THERMAL */ -struct devfreq_cooling_device * +struct thermal_cooling_device * of_devfreq_cooling_register_power(struct device_node *np, struct devfreq *df, struct devfreq_cooling_power *dfc_power) { return ERR_PTR(-EINVAL); } -static inline struct devfreq_cooling_device * +static inline struct thermal_cooling_device * of_devfreq_cooling_register(struct device_node *np, struct devfreq *df) { return ERR_PTR(-EINVAL); } -static inline struct devfreq_cooling_device * +static inline struct thermal_cooling_device * devfreq_cooling_register(struct devfreq *df) { return ERR_PTR(-EINVAL); } static inline void -devfreq_cooling_unregister(struct devfreq_cooling_device *dfc) +devfreq_cooling_unregister(struct thermal_cooling_device *dfc) { } -- cgit v1.2.3 From 76566773a1f1c2295ed901b6f1241cfe10d99029 Mon Sep 17 00:00:00 2001 From: Chuck Lever Date: Sat, 24 Oct 2015 17:28:32 -0400 Subject: NFS: Enable client side NFSv4.1 backchannel to use other transports Forechannel transports get their own "bc_up" method to create an endpoint for the backchannel service. Signed-off-by: Chuck Lever [Anna Schumaker: Add forward declaration of struct net to xprt.h] Signed-off-by: Anna Schumaker --- include/linux/sunrpc/xprt.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 82c083946ef0..69ef5b3ab038 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -54,6 +54,8 @@ enum rpc_display_format_t { struct rpc_task; struct rpc_xprt; struct seq_file; +struct svc_serv; +struct net; /* * This describes a complete RPC request @@ -138,6 +140,7 @@ struct rpc_xprt_ops { void (*inject_disconnect)(struct rpc_xprt *xprt); int (*bc_setup)(struct rpc_xprt *xprt, unsigned int min_reqs); + int (*bc_up)(struct svc_serv *serv, struct net *net); void (*bc_free_rqst)(struct rpc_rqst *rqst); void (*bc_destroy)(struct rpc_xprt *xprt, unsigned int max_reqs); -- cgit v1.2.3 From 79dbd1baa651cece408e68a1b445f3628c4b5bdc Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 26 Oct 2015 22:23:56 +0100 Subject: libceph: msg signing callouts don't need con argument We can use msg->con instead - at the point we sign an outgoing message or check the signature on the incoming one, msg->con is always set. We wouldn't know how to sign a message without an associated session (i.e. msg->con == NULL) and being able to sign a message using an explicitly provided authorizer is of no use. Signed-off-by: Ilya Dryomov --- include/linux/ceph/messenger.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index b2371d9b51fa..3687ff0f0133 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -43,10 +43,9 @@ struct ceph_connection_operations { struct ceph_msg * (*alloc_msg) (struct ceph_connection *con, struct ceph_msg_header *hdr, int *skip); - int (*sign_message) (struct ceph_connection *con, struct ceph_msg *msg); - int (*check_message_signature) (struct ceph_connection *con, - struct ceph_msg *msg); + int (*sign_message) (struct ceph_msg *msg); + int (*check_message_signature) (struct ceph_msg *msg); }; /* use format string %s%d */ -- cgit v1.2.3 From 859bff51dc5e92ddfb5eb6f17b8040d9311095bb Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 28 Oct 2015 23:50:58 +0100 Subject: libceph: stop duplicating client fields in messenger supported_features and required_features serve no purpose at all, while nocrc and tcp_nodelay belong to ceph_options::flags. Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 1 + include/linux/ceph/messenger.h | 11 +---------- 2 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index 397c5cd09794..a7caafe03d3c 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -137,6 +137,7 @@ struct ceph_client { #endif }; +#define from_msgr(ms) container_of(ms, struct ceph_client, msgr) /* diff --git a/include/linux/ceph/messenger.h b/include/linux/ceph/messenger.h index 3687ff0f0133..71b1d6cdcb5d 100644 --- a/include/linux/ceph/messenger.h +++ b/include/linux/ceph/messenger.h @@ -57,8 +57,6 @@ struct ceph_messenger { atomic_t stopping; possible_net_t net; - bool nocrc; - bool tcp_nodelay; /* * the global_seq counts connections i (attempt to) initiate @@ -66,9 +64,6 @@ struct ceph_messenger { */ u32 global_seq; spinlock_t global_seq_lock; - - u64 supported_features; - u64 required_features; }; enum ceph_msg_data_type { @@ -267,11 +262,7 @@ extern void ceph_msgr_exit(void); extern void ceph_msgr_flush(void); extern void ceph_messenger_init(struct ceph_messenger *msgr, - struct ceph_entity_addr *myaddr, - u64 supported_features, - u64 required_features, - bool nocrc, - bool tcp_nodelay); + struct ceph_entity_addr *myaddr); extern void ceph_messenger_fini(struct ceph_messenger *msgr); extern void ceph_con_init(struct ceph_connection *con, void *private, -- cgit v1.2.3 From a51983e4dd2d4d63912aab939f657c4cd476e21a Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 28 Oct 2015 23:52:06 +0100 Subject: libceph: add nocephx_sign_messages option Support for message signing was merged into 3.19, along with nocephx_require_signatures option. But, all that option does is allow the kernel client to talk to clusters that don't support MSG_AUTH feature bit. That's pretty useless, given that it's been supported since bobtail. Meanwhile, if one disables message signing on the server side with "cephx sign messages = false", it becomes impossible to use the kernel client since it expects messages to be signed if MSG_AUTH was negotiated. Add nocephx_sign_messages option to support this use case. Signed-off-by: Ilya Dryomov --- include/linux/ceph/libceph.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index a7caafe03d3c..3e3799cdc6e6 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -29,8 +29,9 @@ #define CEPH_OPT_NOSHARE (1<<1) /* don't share client with other sbs */ #define CEPH_OPT_MYIP (1<<2) /* specified my ip */ #define CEPH_OPT_NOCRC (1<<3) /* no data crc on writes */ -#define CEPH_OPT_NOMSGAUTH (1<<4) /* not require cephx message signature */ +#define CEPH_OPT_NOMSGAUTH (1<<4) /* don't require msg signing feat */ #define CEPH_OPT_TCP_NODELAY (1<<5) /* TCP_NODELAY on TCP sockets */ +#define CEPH_OPT_NOMSGSIGN (1<<6) /* don't sign msgs */ #define CEPH_OPT_DEFAULT (CEPH_OPT_TCP_NODELAY) -- cgit v1.2.3 From a15920bea0428cd22291637f6c72542b1843e65f Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Mon, 2 Nov 2015 17:42:42 -0500 Subject: tracepoints: Fix documentation of RCU lockdep checks The documentation on top of __DECLARE_TRACE() does not match its implementation since the condition check has been added to the RCU lockdep checks. Update the documentation to match its implementation. Link: http://lkml.kernel.org/r/1446504164-21563-1-git-send-email-mathieu.desnoyers@efficios.com CC: Dave Hansen Fixes: a05d59a56733 "tracing: Add condition check to RCU lockdep checks" Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 6b79537a42b1..696a339c592c 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -184,10 +184,11 @@ extern void syscall_unregfunc(void); * structure. Force alignment to the same alignment as the section start. * * When lockdep is enabled, we make sure to always do the RCU portions of - * the tracepoint code, regardless of whether tracing is on or we match the - * condition. This lets us find RCU issues triggered with tracepoints even - * when this tracepoint is off. This code has no purpose other than poking - * RCU a bit. + * the tracepoint code, regardless of whether tracing is on. However, + * don't check if the condition is false, due to interaction with idle + * instrumentation. This lets us find RCU issues triggered with tracepoints + * even when this tracepoint is off. This code has no purpose other than + * poking RCU a bit. */ #define __DECLARE_TRACE(name, proto, args, cond, data_proto, data_args) \ extern struct tracepoint __tracepoint_##name; \ -- cgit v1.2.3 From c210129760a010b555372ef74f4e1a46d4eb8a22 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 Oct 2015 14:58:07 +0100 Subject: bpf: align and clean bpf_{map,prog}_get helpers Add a bpf_map_get() function that we're going to use later on and align/clean the remaining helpers a bit so that we have them a bit more consistent: - __bpf_map_get() and __bpf_prog_get() that both work on the fd struct, check whether the descriptor is eBPF and return the pointer to the map/prog stored in the private data. Also, we can return f.file->private_data directly, the function signature is enough of a documentation already. - bpf_map_get() and bpf_prog_get() that both work on u32 user fd, call their respective __bpf_map_get()/__bpf_prog_get() variants, and take a reference. Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/linux/bpf.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 75718fa28260..0b5fb6acef64 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -167,7 +167,7 @@ struct bpf_prog *bpf_prog_get(u32 ufd); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog); -struct bpf_map *bpf_map_get(struct fd f); +struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); extern int sysctl_unprivileged_bpf_disabled; -- cgit v1.2.3 From b2197755b2633e164a439682fb05a9b5ea48f706 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 29 Oct 2015 14:58:09 +0100 Subject: bpf: add support for persistent maps/progs This work adds support for "persistent" eBPF maps/programs. The term "persistent" is to be understood that maps/programs have a facility that lets them survive process termination. This is desired by various eBPF subsystem users. Just to name one example: tc classifier/action. Whenever tc parses the ELF object, extracts and loads maps/progs into the kernel, these file descriptors will be out of reach after the tc instance exits. So a subsequent tc invocation won't be able to access/relocate on this resource, and therefore maps cannot easily be shared, f.e. between the ingress and egress networking data path. The current workaround is that Unix domain sockets (UDS) need to be instrumented in order to pass the created eBPF map/program file descriptors to a third party management daemon through UDS' socket passing facility. This makes it a bit complicated to deploy shared eBPF maps or programs (programs f.e. for tail calls) among various processes. We've been brainstorming on how we could tackle this issue and various approches have been tried out so far, which can be read up further in the below reference. The architecture we eventually ended up with is a minimal file system that can hold map/prog objects. The file system is a per mount namespace singleton, and the default mount point is /sys/fs/bpf/. Any subsequent mounts within a given namespace will point to the same instance. The file system allows for creating a user-defined directory structure. The objects for maps/progs are created/fetched through bpf(2) with two new commands (BPF_OBJ_PIN/BPF_OBJ_GET). I.e. a bpf file descriptor along with a pathname is being passed to bpf(2) that in turn creates (we call it eBPF object pinning) the file system nodes. Only the pathname is being passed to bpf(2) for getting a new BPF file descriptor to an existing node. The user can use that to access maps and progs later on, through bpf(2). Removal of file system nodes is being managed through normal VFS functions such as unlink(2), etc. The file system code is kept to a very minimum and can be further extended later on. The next step I'm working on is to add dump eBPF map/prog commands to bpf(2), so that a specification from a given file descriptor can be retrieved. This can be used by things like CRIU but also applications can inspect the meta data after calling BPF_OBJ_GET. Big thanks also to Alexei and Hannes who significantly contributed in the design discussion that eventually let us end up with this architecture here. Reference: https://lkml.org/lkml/2015/10/15/925 Signed-off-by: Daniel Borkmann Signed-off-by: Alexei Starovoitov Signed-off-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/linux/bpf.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 0b5fb6acef64..de464e6683b6 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -167,11 +167,18 @@ struct bpf_prog *bpf_prog_get(u32 ufd); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog); +struct bpf_map *bpf_map_get(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); void bpf_map_put(struct bpf_map *map); extern int sysctl_unprivileged_bpf_disabled; +int bpf_map_new_fd(struct bpf_map *map); +int bpf_prog_new_fd(struct bpf_prog *prog); + +int bpf_obj_pin_user(u32 ufd, const char __user *pathname); +int bpf_obj_get_user(const char __user *pathname); + /* verify correctness of eBPF program */ int bpf_check(struct bpf_prog **fp, union bpf_attr *attr); #else -- cgit v1.2.3 From fd867d51f889aec11cca235ebb008578780d052d Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Mon, 2 Nov 2015 21:55:59 -0500 Subject: net/core: generic support for disabling netdev features down stack There are some netdev features, which when disabled on an upper device, such as a bonding master or a bridge, must be disabled and cannot be re-enabled on underlying devices. This is a rework of an earlier more heavy-handed appraoch, which simply disables and prevents re-enabling of netdev features listed in a new define in include/net/netdev_features.h, NETIF_F_UPPER_DISABLES. Any upper device that disables a flag in that feature mask, the disabling will propagate down the stack, and any lower device that has any upper device with one of those flags disabled should not be able to enable said flag. Initially, only LRO is included for proof of concept, and because this code effectively does the same thing as dev_disable_lro(), though it will also activate from the ethtool path, which was one of the goals here. [root@dell-per730-01 ~]# ethtool -k bond0 |grep large large-receive-offload: on [root@dell-per730-01 ~]# ethtool -k p5p1 |grep large large-receive-offload: on [root@dell-per730-01 ~]# ethtool -K bond0 lro off [root@dell-per730-01 ~]# ethtool -k bond0 |grep large large-receive-offload: off [root@dell-per730-01 ~]# ethtool -k p5p1 |grep large large-receive-offload: off dmesg dump: [ 1033.277986] bond0: Disabling feature 0x0000000000008000 on lower dev p5p2. [ 1034.067949] bnx2x 0000:06:00.1 p5p2: using MSI-X IRQs: sp 74 fp[0] 76 ... fp[7] 83 [ 1034.753612] bond0: Disabling feature 0x0000000000008000 on lower dev p5p1. [ 1035.591019] bnx2x 0000:06:00.0 p5p1: using MSI-X IRQs: sp 62 fp[0] 64 ... fp[7] 71 This has been successfully tested with bnx2x, qlcnic and netxen network cards as slaves in a bond interface. Turning LRO on or off on the master also turns it on or off on each of the slaves, new slaves are added with LRO in the same state as the master, and LRO can't be toggled on the slaves. Also, this should largely remove the need for dev_disable_lro(), and most, if not all, of its call sites can be replaced by simply making sure NETIF_F_LRO isn't included in the relevant device's feature flags. Note that this patch is driven by bug reports from users saying it was confusing that bonds and slaves had different settings for the same features, and while it won't be 100% in sync if a lower device doesn't support a feature like LRO, I think this is a good step in the right direction. CC: "David S. Miller" CC: Eric Dumazet CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: Jiri Pirko CC: Nikolay Aleksandrov CC: Michal Kubecek CC: Alexander Duyck CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 9672781c593d..0f5837a9b1ba 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -125,6 +125,11 @@ enum { #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) +#define for_each_netdev_feature(mask_addr, feature) \ + int bit; \ + for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) \ + feature = __NETIF_F_BIT(bit); + /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ #define NETIF_F_NEVER_CHANGE (NETIF_F_VLAN_CHALLENGED | \ @@ -167,6 +172,12 @@ enum { */ #define NETIF_F_ALL_FOR_ALL (NETIF_F_NOCACHE_COPY | NETIF_F_FSO) +/* + * If upper/master device has these features disabled, they must be disabled + * on all lower/slave devices as well. + */ +#define NETIF_F_UPPER_DISABLES NETIF_F_LRO + /* changeable features with no special hardware requirements */ #define NETIF_F_SOFT_FEATURES (NETIF_F_GSO | NETIF_F_GRO) -- cgit v1.2.3 From 2976b17989094e97567510be3ea91fc2f0c7aab3 Mon Sep 17 00:00:00 2001 From: Simon Guinot Date: Sat, 26 Sep 2015 23:02:34 +0200 Subject: leds: netxbig: add device tree binding This patch adds device tree support for the netxbig LEDs. This also introduces a additionnal DT binding for the GPIO extension bus (netxbig-gpio-ext) used to configure the LEDs. Since this bus could also be used to control other devices, then it seems more suitable to have it in a separate DT binding. Signed-off-by: Simon Guinot Acked-by: Linus Walleij Signed-off-by: Jacek Anaszewski --- include/linux/platform_data/leds-kirkwood-netxbig.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/platform_data/leds-kirkwood-netxbig.h b/include/linux/platform_data/leds-kirkwood-netxbig.h index d2be19a51acd..3c85a735c380 100644 --- a/include/linux/platform_data/leds-kirkwood-netxbig.h +++ b/include/linux/platform_data/leds-kirkwood-netxbig.h @@ -40,6 +40,7 @@ struct netxbig_led { int mode_addr; int *mode_val; int bright_addr; + int bright_max; }; struct netxbig_led_platform_data { -- cgit v1.2.3 From 1e935949111e77b2b1b6fa550e88ff0573c2f4c7 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Tue, 29 Sep 2015 01:27:24 -0700 Subject: watchdog: Always evaluate new timeout against min_timeout MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Up to now, a new timeout value is only evaluated against min_timeout if max_timeout is provided. This does not really make sense; a driver can have a minimum timeout even if it does not have a maximum timeout. Ensure that it is not smaller than min_timeout, even if max_timeout is not set. Signed-off-by: Guenter Roeck Acked-by: Uwe Kleine-König Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index d74a0e907b9e..e90e3ea5ebeb 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -119,8 +119,15 @@ static inline void watchdog_set_nowayout(struct watchdog_device *wdd, bool noway /* Use the following function to check if a timeout value is invalid */ static inline bool watchdog_timeout_invalid(struct watchdog_device *wdd, unsigned int t) { - return ((wdd->max_timeout != 0) && - (t < wdd->min_timeout || t > wdd->max_timeout)); + /* + * The timeout is invalid if + * - the requested value is smaller than the configured minimum timeout, + * or + * - a maximum timeout is configured, and the requested value is larger + * than the maximum timeout. + */ + return t < wdd->min_timeout || + (wdd->max_timeout && t > wdd->max_timeout); } /* Use the following functions to manipulate watchdog driver specific data */ -- cgit v1.2.3 From 5f94c943d5dd4b51f3248193e622dd5fcdbb8b11 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Stefan=20S=C3=B8rensen?= Date: Tue, 3 Nov 2015 09:34:07 +0100 Subject: ptp: Change ptp_class to a proper bitmask MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Change the definition of PTP_CLASS_L2 to not have any bits overlapping with the other defined protocol values, allowing the PTP_CLASS_* definitions to be for simple filtering on packet type. Signed-off-by: Stefan Sørensen Signed-off-by: David S. Miller --- include/linux/ptp_classify.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ptp_classify.h b/include/linux/ptp_classify.h index 159c987b1853..a079656b614c 100644 --- a/include/linux/ptp_classify.h +++ b/include/linux/ptp_classify.h @@ -32,9 +32,9 @@ #define PTP_CLASS_VMASK 0x0f /* max protocol version is 15 */ #define PTP_CLASS_IPV4 0x10 /* event in an IPV4 UDP packet */ #define PTP_CLASS_IPV6 0x20 /* event in an IPV6 UDP packet */ -#define PTP_CLASS_L2 0x30 /* event in a L2 packet */ -#define PTP_CLASS_PMASK 0x30 /* mask for the packet type field */ -#define PTP_CLASS_VLAN 0x40 /* event in a VLAN tagged packet */ +#define PTP_CLASS_L2 0x40 /* event in a L2 packet */ +#define PTP_CLASS_PMASK 0x70 /* mask for the packet type field */ +#define PTP_CLASS_VLAN 0x80 /* event in a VLAN tagged packet */ #define PTP_CLASS_V1_IPV4 (PTP_CLASS_V1 | PTP_CLASS_IPV4) #define PTP_CLASS_V1_IPV6 (PTP_CLASS_V1 | PTP_CLASS_IPV6) /* probably DNE */ @@ -42,6 +42,7 @@ #define PTP_CLASS_V2_IPV6 (PTP_CLASS_V2 | PTP_CLASS_IPV6) #define PTP_CLASS_V2_L2 (PTP_CLASS_V2 | PTP_CLASS_L2) #define PTP_CLASS_V2_VLAN (PTP_CLASS_V2 | PTP_CLASS_VLAN) +#define PTP_CLASS_L4 (PTP_CLASS_IPV4 | PTP_CLASS_IPV6) #define PTP_EV_PORT 319 #define PTP_GEN_BIT 0x08 /* indicates general message, if set in message type */ -- cgit v1.2.3 From 5ba3f7d61a3a9e6d94462b207d302931b53d8c61 Mon Sep 17 00:00:00 2001 From: Jarod Wilson Date: Tue, 3 Nov 2015 10:15:59 -0500 Subject: net/core: fix for_each_netdev_feature As pointed out by Nikolay and further explained by Geert, the initial for_each_netdev_feature macro was broken, as feature would get set outside of the block of code it was intended to run in, thus only ever working for the first feature bit in the mask. While less pretty this way, this is tested and confirmed functional with multiple feature bits set in NETIF_F_UPPER_DISABLES. [root@dell-per730-01 ~]# ethtool -K bond0 lro off ... [ 242.761394] bond0: Disabling feature 0x0000000000008000 on lower dev p5p2. [ 243.552178] bnx2x 0000:06:00.1 p5p2: using MSI-X IRQs: sp 74 fp[0] 76 ... fp[7] 83 [ 244.353978] bond0: Disabling feature 0x0000000000008000 on lower dev p5p1. [ 245.147420] bnx2x 0000:06:00.0 p5p1: using MSI-X IRQs: sp 62 fp[0] 64 ... fp[7] 71 [root@dell-per730-01 ~]# ethtool -K bond0 gro off ... [ 251.925645] bond0: Disabling feature 0x0000000000004000 on lower dev p5p2. [ 252.713693] bnx2x 0000:06:00.1 p5p2: using MSI-X IRQs: sp 74 fp[0] 76 ... fp[7] 83 [ 253.499085] bond0: Disabling feature 0x0000000000004000 on lower dev p5p1. [ 254.290922] bnx2x 0000:06:00.0 p5p1: using MSI-X IRQs: sp 62 fp[0] 64 ... fp[7] 71 Fixes: fd867d51f ("net/core: generic support for disabling netdev features down stack") CC: "David S. Miller" CC: Eric Dumazet CC: Jay Vosburgh CC: Veaceslav Falico CC: Andy Gospodarek CC: Jiri Pirko CC: Nikolay Aleksandrov CC: Michal Kubecek CC: Alexander Duyck CC: Geert Uytterhoeven CC: netdev@vger.kernel.org Signed-off-by: Jarod Wilson Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 0f5837a9b1ba..f0d87347df19 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -125,10 +125,8 @@ enum { #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) #define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) -#define for_each_netdev_feature(mask_addr, feature) \ - int bit; \ - for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) \ - feature = __NETIF_F_BIT(bit); +#define for_each_netdev_feature(mask_addr, bit) \ + for_each_set_bit(bit, (unsigned long *)mask_addr, NETDEV_FEATURE_COUNT) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ -- cgit v1.2.3 From b7ceb7d50048d0dd4830f106f0fb7f5424031598 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Mon, 2 Nov 2015 17:12:27 +0100 Subject: lightnvm: refactor phys addrs type to u64 MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For cases where CONFIG_LBDAF is not set. The struct ppa_addr exceeds its type on 32 bit architectures. ppa_addr requires a 64bit integer to hold the generic ppa format. We therefore refactor it to u64 and replaces the sector_t usages with u64 for physical addresses. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 122b176600fa..5ebd70d12f35 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -134,26 +134,26 @@ struct ppa_addr { union { /* Channel-based PPA format in nand 4x2x2x2x8x10 */ struct { - sector_t ch : 4; - sector_t sec : 2; /* 4 sectors per page */ - sector_t pl : 2; /* 4 planes per LUN */ - sector_t lun : 2; /* 4 LUNs per channel */ - sector_t pg : 8; /* 256 pages per block */ - sector_t blk : 10;/* 1024 blocks per plane */ - sector_t resved : 36; + u64 ch : 4; + u64 sec : 2; /* 4 sectors per page */ + u64 pl : 2; /* 4 planes per LUN */ + u64 lun : 2; /* 4 LUNs per channel */ + u64 pg : 8; /* 256 pages per block */ + u64 blk : 10;/* 1024 blocks per plane */ + u64 resved : 36; } chnl; /* Generic structure for all addresses */ struct { - sector_t sec : NVM_SEC_BITS; - sector_t pl : NVM_PL_BITS; - sector_t pg : NVM_PG_BITS; - sector_t blk : NVM_BLK_BITS; - sector_t lun : NVM_LUN_BITS; - sector_t ch : NVM_CH_BITS; + u64 sec : NVM_SEC_BITS; + u64 pl : NVM_PL_BITS; + u64 pg : NVM_PG_BITS; + u64 blk : NVM_BLK_BITS; + u64 lun : NVM_LUN_BITS; + u64 ch : NVM_CH_BITS; } g; - sector_t ppa; + u64 ppa; }; } __packed; -- cgit v1.2.3 From 8fbcf237439f841e7e9c4675790e08ea1c295bd3 Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Tue, 3 Nov 2015 18:25:34 +0100 Subject: nfs: Remove unused xdr page offsets in getacl/setacl arguments The arguments passed around for getacl and setacl xdr encoding, struct nfs_setaclargs and struct nfs_getaclargs, both contain an array of pages, an offset into the first page, and the length of the page data. The offset is unused as it is always zero; remove it. Signed-off-by: Andreas Gruenbacher Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 4728e7e5fc49..570d630f98ae 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -705,7 +705,6 @@ struct nfs_setaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; - unsigned int acl_pgbase; struct page ** acl_pages; }; @@ -717,7 +716,6 @@ struct nfs_getaclargs { struct nfs4_sequence_args seq_args; struct nfs_fh * fh; size_t acl_len; - unsigned int acl_pgbase; struct page ** acl_pages; }; -- cgit v1.2.3 From bb99d8ccec7f83a2730a29d1ae7eee5ffa446a9e Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Fri, 30 Oct 2015 14:25:39 +0900 Subject: tracing: Allow arch-specific stack tracer A stack frame may be used in a different way depending on cpu architecture. Thus it is not always appropriate to slurp the stack contents, as current check_stack() does, in order to calcurate a stack index (height) at a given function call. At least not on arm64. In addition, there is a possibility that we will mistakenly detect a stale stack frame which has not been overwritten. This patch makes check_stack() a weak function so as to later implement arch-specific version. Link: http://lkml.kernel.org/r/1446182741-31019-5-git-send-email-takahiro.akashi@linaro.org Signed-off-by: AKASHI Takahiro Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index 6cd8c0ee4b6f..b4c92ab9e08b 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -263,7 +263,18 @@ static inline void ftrace_kill(void) { } #endif /* CONFIG_FUNCTION_TRACER */ #ifdef CONFIG_STACK_TRACER + +#define STACK_TRACE_ENTRIES 500 + +struct stack_trace; + +extern unsigned stack_trace_index[]; +extern struct stack_trace stack_trace_max; +extern unsigned long stack_trace_max_size; +extern arch_spinlock_t max_stack_lock; + extern int stack_tracer_enabled; +void stack_trace_print(void); int stack_trace_sysctl(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, -- cgit v1.2.3 From 80220fa72b917c64675f3ba4008d2c5a7b50b281 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 3 Nov 2015 09:00:15 +0100 Subject: watchdog: include: fix some typos Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index e90e3ea5ebeb..5f18dd9ec224 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -25,7 +25,7 @@ struct watchdog_device; * @ping: The routine that sends a keepalive ping to the watchdog device. * @status: The routine that shows the status of the watchdog device. * @set_timeout:The routine for setting the watchdog devices timeout value. - * @get_timeleft:The routine that get's the time that's left before a reset. + * @get_timeleft:The routine that gets the time left before a reset. * @ref: The ref operation for dyn. allocated watchdog_device structs * @unref: The unref operation for dyn. allocated watchdog_device structs * @ioctl: The routines that handles extra ioctl calls. @@ -33,7 +33,7 @@ struct watchdog_device; * The watchdog_ops structure contains a list of low-level operations * that control a watchdog device. It also contains the module that owns * these operations. The start and stop function are mandatory, all other - * functions are optonal. + * functions are optional. */ struct watchdog_ops { struct module *owner; -- cgit v1.2.3 From 760d280084f8805e5de73e3591912d5db9da9dbe Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Tue, 3 Nov 2015 09:00:16 +0100 Subject: watchdog: include: add units for timeout values in kerneldoc Signed-off-by: Wolfram Sang Reviewed-by: Guenter Roeck Signed-off-by: Wim Van Sebroeck --- include/linux/watchdog.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/watchdog.h b/include/linux/watchdog.h index 5f18dd9ec224..027b1f43f12d 100644 --- a/include/linux/watchdog.h +++ b/include/linux/watchdog.h @@ -24,8 +24,8 @@ struct watchdog_device; * @stop: The routine for stopping the watchdog device. * @ping: The routine that sends a keepalive ping to the watchdog device. * @status: The routine that shows the status of the watchdog device. - * @set_timeout:The routine for setting the watchdog devices timeout value. - * @get_timeleft:The routine that gets the time left before a reset. + * @set_timeout:The routine for setting the watchdog devices timeout value (in seconds). + * @get_timeleft:The routine that gets the time left before a reset (in seconds). * @ref: The ref operation for dyn. allocated watchdog_device structs * @unref: The unref operation for dyn. allocated watchdog_device structs * @ioctl: The routines that handles extra ioctl calls. @@ -59,9 +59,9 @@ struct watchdog_ops { * @info: Pointer to a watchdog_info structure. * @ops: Pointer to the list of watchdog operations. * @bootstatus: Status of the watchdog device at boot. - * @timeout: The watchdog devices timeout value. - * @min_timeout:The watchdog devices minimum timeout value. - * @max_timeout:The watchdog devices maximum timeout value. + * @timeout: The watchdog devices timeout value (in seconds). + * @min_timeout:The watchdog devices minimum timeout value (in seconds). + * @max_timeout:The watchdog devices maximum timeout value (in seconds). * @driver-data:Pointer to the drivers private data. * @lock: Lock for watchdog core internal use only. * @status: Field that contains the devices internal status bits. -- cgit v1.2.3 From d332736df0c277905de06311ae084e2c76580a3f Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 3 Nov 2015 14:50:15 -0500 Subject: tracing: Rename max_stack_lock to stack_trace_max_lock Now that max_stack_lock is a global variable, it requires a naming convention that is unlikely to collide. Rename it to the same naming convention that the other stack_trace variables have. Signed-off-by: Steven Rostedt --- include/linux/ftrace.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ftrace.h b/include/linux/ftrace.h index b4c92ab9e08b..eae6548efbf0 100644 --- a/include/linux/ftrace.h +++ b/include/linux/ftrace.h @@ -271,7 +271,7 @@ struct stack_trace; extern unsigned stack_trace_index[]; extern struct stack_trace stack_trace_max; extern unsigned long stack_trace_max_size; -extern arch_spinlock_t max_stack_lock; +extern arch_spinlock_t stack_trace_max_lock; extern int stack_tracer_enabled; void stack_trace_print(void); -- cgit v1.2.3 From 105ff3cbf225036b75a6a46c96d1ddce8e7bdc66 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Tue, 3 Nov 2015 17:22:17 -0800 Subject: atomic: remove all traces of READ_ONCE_CTRL() and atomic*_read_ctrl() This seems to be a mis-reading of how alpha memory ordering works, and is not backed up by the alpha architecture manual. The helper functions don't do anything special on any other architectures, and the arguments that support them being safe on other architectures also argue that they are safe on alpha. Basically, the "control dependency" is between a previous read and a subsequent write that is dependent on the value read. Even if the subsequent write is actually done speculatively, there is no way that such a speculative write could be made visible to other cpu's until it has been committed, which requires validating the speculation. Note that most weakely ordered architectures (very much including alpha) do not guarantee any ordering relationship between two loads that depend on each other on a control dependency: read A if (val == 1) read B because the conditional may be predicted, and the "read B" may be speculatively moved up to before reading the value A. So we require the user to insert a smp_rmb() between the two accesses to be correct: read A; if (A == 1) smp_rmb() read B Alpha is further special in that it can break that ordering even if the *address* of B depends on the read of A, because the cacheline that is read later may be stale unless you have a memory barrier in between the pointer read and the read of the value behind a pointer: read ptr read offset(ptr) whereas all other weakly ordered architectures guarantee that the data dependency (as opposed to just a control dependency) will order the two accesses. As a result, alpha needs a "smp_read_barrier_depends()" in between those two reads for them to be ordered. The coontrol dependency that "READ_ONCE_CTRL()" and "atomic_read_ctrl()" had was a control dependency to a subsequent *write*, however, and nobody can finalize such a subsequent write without having actually done the read. And were you to write such a value to a "stale" cacheline (the way the unordered reads came to be), that would seem to lose the write entirely. So the things that make alpha able to re-order reads even more aggressively than other weak architectures do not seem to be relevant for a subsequent write. Alpha memory ordering may be strange, but there's no real indication that it is *that* strange. Also, the alpha architecture reference manual very explicitly talks about the definition of "Dependence Constraints" in section 5.6.1.7, where a preceding read dominates a subsequent write. Such a dependence constraint admittedly does not impose a BEFORE (alpha architecture term for globally visible ordering), but it does guarantee that there can be no "causal loop". I don't see how you could avoid such a loop if another cpu could see the stored value and then impact the value of the first read. Put another way: the read and the write could not be seen as being out of order wrt other cpus. So I do not see how these "x_ctrl()" functions can currently be necessary. I may have to eat my words at some point, but in the absense of clear proof that alpha actually needs this, or indeed even an explanation of how alpha could _possibly_ need it, I do not believe these functions are called for. And if it turns out that alpha really _does_ need a barrier for this case, that barrier still should not be "smp_read_barrier_depends()". We'd have to make up some new speciality barrier just for alpha, along with the documentation for why it really is necessary. Cc: Peter Zijlstra Cc: Paul E McKenney Cc: Dmitry Vyukov Cc: Will Deacon Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/atomic.h | 18 ------------------ include/linux/compiler.h | 16 ---------------- 2 files changed, 34 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 27e580d232ca..301de78d65f7 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -4,15 +4,6 @@ #include #include -#ifndef atomic_read_ctrl -static inline int atomic_read_ctrl(const atomic_t *v) -{ - int val = atomic_read(v); - smp_read_barrier_depends(); /* Enforce control dependency. */ - return val; -} -#endif - /* * Relaxed variants of xchg, cmpxchg and some atomic operations. * @@ -561,15 +552,6 @@ static inline int atomic_dec_if_positive(atomic_t *v) #include #endif -#ifndef atomic64_read_ctrl -static inline long long atomic64_read_ctrl(const atomic64_t *v) -{ - long long val = atomic64_read(v); - smp_read_barrier_depends(); /* Enforce control dependency. */ - return val; -} -#endif - #ifndef atomic64_andnot static inline void atomic64_andnot(long long i, atomic64_t *v) { diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 3d7810341b57..fe817432190c 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -299,22 +299,6 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s __u.__val; \ }) -/** - * READ_ONCE_CTRL - Read a value heading a control dependency - * @x: The value to be read, heading the control dependency - * - * Control dependencies are tricky. See Documentation/memory-barriers.txt - * for important information on how to use them. Note that in many cases, - * use of smp_load_acquire() will be much simpler. Control dependencies - * should be avoided except on the hottest of hotpaths. - */ -#define READ_ONCE_CTRL(x) \ -({ \ - typeof(x) __val = READ_ONCE(x); \ - smp_read_barrier_depends(); /* Enforce control dependency. */ \ - __val; \ -}) - #endif /* __KERNEL__ */ #endif /* __ASSEMBLY__ */ -- cgit v1.2.3 From 36734810488e618d48cc14782f7111b3dfaffb83 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Wed, 4 Nov 2015 08:23:51 -0500 Subject: audit: audit_dummy_context can be boolean This patch makes audit_dummy_context return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai [PM: subject line tweak] Signed-off-by: Paul Moore --- include/linux/audit.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index b2abc996c25d..69844b680fcc 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -143,7 +143,7 @@ extern void __audit_inode_child(const struct inode *parent, extern void __audit_seccomp(unsigned long syscall, long signr, int code); extern void __audit_ptrace(struct task_struct *t); -static inline int audit_dummy_context(void) +static inline bool audit_dummy_context(void) { void *p = current->audit_context; return !p || *(int *)p; @@ -345,9 +345,9 @@ static inline void audit_syscall_entry(int major, unsigned long a0, { } static inline void audit_syscall_exit(void *pt_regs) { } -static inline int audit_dummy_context(void) +static inline bool audit_dummy_context(void) { - return 1; + return true; } static inline struct filename *audit_reusename(const __user char *name) { -- cgit v1.2.3 From 9fcf836b215ca5685030ecab3e35ecc14ee3bcfb Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Wed, 4 Nov 2015 08:23:51 -0500 Subject: audit: audit_string_contains_control can be boolean This patch makes audit_string_contains_control return bool to improve readability due to this particular function only using either one or zero as its return value. Signed-off-by: Yaowei Bai [PM: tweaked subject line] Signed-off-by: Paul Moore --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 69844b680fcc..20eba1eb0a3c 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -457,7 +457,7 @@ extern struct audit_buffer *audit_log_start(struct audit_context *ctx, gfp_t gfp extern __printf(2, 3) void audit_log_format(struct audit_buffer *ab, const char *fmt, ...); extern void audit_log_end(struct audit_buffer *ab); -extern int audit_string_contains_control(const char *string, +extern bool audit_string_contains_control(const char *string, size_t len); extern void audit_log_n_hex(struct audit_buffer *ab, const unsigned char *buf, -- cgit v1.2.3 From c75efa974e013640496620f26f0b532cb5cb17f9 Mon Sep 17 00:00:00 2001 From: Andrey Smetanin Date: Fri, 16 Oct 2015 10:07:50 +0300 Subject: drivers/hv: share Hyper-V SynIC constants with userspace Moved Hyper-V synic contants from guest Hyper-V drivers private header into x86 arch uapi Hyper-V header. Added Hyper-V synic msr's flags into x86 arch uapi Hyper-V header. Signed-off-by: Andrey Smetanin Reviewed-by: Roman Kagan Signed-off-by: Denis V. Lunev CC: Vitaly Kuznetsov CC: "K. Y. Srinivasan" CC: Gleb Natapov CC: Paolo Bonzini Signed-off-by: Paolo Bonzini --- include/linux/hyperv.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 54733d5b503e..8fdc17b84739 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -26,6 +26,7 @@ #define _HYPERV_H #include +#include #include #include -- cgit v1.2.3 From b97e6de9c96cefaa02a6a7464731ea504b45e150 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 19:16:47 +0100 Subject: KVM: x86: merge kvm_arch_set_irq with kvm_set_msi_inatomic MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit We do not want to do too much work in atomic context, in particular not walking all the VCPUs of the virtual machine. So we want to distinguish the architecture-specific injection function for irqfd from kvm_set_msi. Since it's still empty, reuse the newly added kvm_arch_set_irq and rename it to kvm_arch_set_irq_inatomic. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 87189a41d904..15c78f320678 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -830,10 +830,9 @@ int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); - -int kvm_arch_set_irq(struct kvm_kernel_irq_routing_entry *irq, struct kvm *kvm, - int irq_source_id, int level, bool line_status); - +int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, + struct kvm *kvm, int irq_source_id, + int level, bool line_status); bool kvm_irq_has_notifier(struct kvm *kvm, unsigned irqchip, unsigned pin); void kvm_notify_acked_gsi(struct kvm *kvm, int gsi); void kvm_notify_acked_irq(struct kvm *kvm, unsigned irqchip, unsigned pin); -- cgit v1.2.3 From 8a22f234a81ab4d1de5d948c3478608f08a9b844 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 18:52:02 +0100 Subject: KVM: x86: move kvm_set_irq_inatomic to legacy device assignment MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The function is not used outside device assignment, and kvm_arch_set_irq_inatomic has a different prototype. Move it here and make it static to avoid confusion. Reviewed-by: Radim Krčmář Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 15c78f320678..242a6d2b53ff 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -827,7 +827,6 @@ int kvm_irq_map_chip_pin(struct kvm *kvm, unsigned irqchip, unsigned pin); int kvm_set_irq(struct kvm *kvm, int irq_source_id, u32 irq, int level, bool line_status); -int kvm_set_irq_inatomic(struct kvm *kvm, int irq_source_id, u32 irq, int level); int kvm_set_msi(struct kvm_kernel_irq_routing_entry *irq_entry, struct kvm *kvm, int irq_source_id, int level, bool line_status); int kvm_arch_set_irq_inatomic(struct kvm_kernel_irq_routing_entry *e, -- cgit v1.2.3 From 033291eccbdb1b70ffc02641edae19ac825dc75d Mon Sep 17 00:00:00 2001 From: Alex Williamson Date: Thu, 15 Oct 2015 15:08:48 -0600 Subject: vfio: Include No-IOMMU mode There is really no way to safely give a user full access to a DMA capable device without an IOMMU to protect the host system. There is also no way to provide DMA translation, for use cases such as device assignment to virtual machines. However, there are still those users that want userspace drivers even under those conditions. The UIO driver exists for this use case, but does not provide the degree of device access and programming that VFIO has. In an effort to avoid code duplication, this introduces a No-IOMMU mode for VFIO. This mode requires building VFIO with CONFIG_VFIO_NOIOMMU and enabling the "enable_unsafe_noiommu_mode" option on the vfio driver. This should make it very clear that this mode is not safe. Additionally, CAP_SYS_RAWIO privileges are necessary to work with groups and containers using this mode. Groups making use of this support are named /dev/vfio/noiommu-$GROUP and can only make use of the special VFIO_NOIOMMU_IOMMU for the container. Use of this mode, specifically binding a device without a native IOMMU group to a VFIO bus driver will taint the kernel and should therefore not be considered supported. This patch includes no-iommu support for the vfio-pci bus driver only. Signed-off-by: Alex Williamson Acked-by: Michael S. Tsirkin --- include/linux/vfio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/vfio.h b/include/linux/vfio.h index ddb440975382..610a86a892b8 100644 --- a/include/linux/vfio.h +++ b/include/linux/vfio.h @@ -44,6 +44,9 @@ struct vfio_device_ops { void (*request)(void *device_data, unsigned int count); }; +extern struct iommu_group *vfio_iommu_group_get(struct device *dev); +extern void vfio_iommu_group_put(struct iommu_group *group, struct device *dev); + extern int vfio_add_group_dev(struct device *dev, const struct vfio_device_ops *ops, void *device_data); -- cgit v1.2.3 From d618382ba5f1a4905db63f4980bf7b0a5826de9d Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Wed, 4 Nov 2015 11:30:57 -0800 Subject: iommu-common: Fix error code used in iommu_tbl_range_{alloc,free}(). The value returned from iommu_tbl_range_alloc() (and the one passed in as a fourth argument to iommu_tbl_range_free) is not a DMA address, it is rather an index into the IOMMU page table. Therefore using DMA_ERROR_CODE is not appropriate. Use a more type matching error code define, IOMMU_ERROR_CODE, and update all users of this interface. Reported-by: Andre Przywara Signed-off-by: David S. Miller --- include/linux/iommu-common.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/iommu-common.h b/include/linux/iommu-common.h index bbced83b32ee..376a27c9cc6a 100644 --- a/include/linux/iommu-common.h +++ b/include/linux/iommu-common.h @@ -7,6 +7,7 @@ #define IOMMU_POOL_HASHBITS 4 #define IOMMU_NR_POOLS (1 << IOMMU_POOL_HASHBITS) +#define IOMMU_ERROR_CODE (~(unsigned long) 0) struct iommu_pool { unsigned long start; -- cgit v1.2.3 From e02328f47bd75fde9decf9657ec7d769b370f857 Mon Sep 17 00:00:00 2001 From: Lukas Wunner Date: Tue, 8 Sep 2015 14:17:47 +0200 Subject: vga_switcheroo: Drop client power state VGA_SWITCHEROO_INIT hda_intel.c:azx_probe() defers initialization of an audio controller on the discrete GPU if the GPU is powered off. The power state of the GPU is determined by calling vga_switcheroo_get_client_state(). vga_switcheroo_get_client_state() returns VGA_SWITCHEROO_INIT if vga_switcheroo is not enabled, i.e. if no second GPU or no handler has registered. This can go wrong in the following scenario: - Driver for the integrated GPU is not loaded. - Driver for the discrete GPU registers with vga_switcheroo, uses driver power control to power down the GPU, handler cuts power to the GPU. - Driver for the audio controller gets loaded after the GPU was powered down, calls vga_switcheroo_get_client_state() which returns VGA_SWITCHEROO_INIT instead of VGA_SWITCHEROO_OFF. - Consequence: azx_probe() tries to initialize the audio controller even though the GPU is powered down. The power state VGA_SWITCHEROO_INIT was introduced by c8e9cf7bb240 ("vga_switcheroo: Add a helper function to get the client state"). It is not apparent what its benefit might be. The idea seems to be to initialize the audio controller even if the power state is VGA_SWITCHEROO_OFF (were vga_switcheroo enabled), but as shown above this can fail. Drop VGA_SWITCHEROO_INIT to solve this. Acked-by: Takashi Iwai Signed-off-by: Lukas Wunner Signed-off-by: Dave Airlie --- include/linux/vga_switcheroo.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vga_switcheroo.h b/include/linux/vga_switcheroo.h index 786bc931dbd1..69e1d4a1f1b3 100644 --- a/include/linux/vga_switcheroo.h +++ b/include/linux/vga_switcheroo.h @@ -39,10 +39,6 @@ struct pci_dev; * enum vga_switcheroo_state - client power state * @VGA_SWITCHEROO_OFF: off * @VGA_SWITCHEROO_ON: on - * @VGA_SWITCHEROO_INIT: client has registered with vga_switcheroo but - * vga_switcheroo is not enabled, i.e. no second client or no handler - * has registered. Only used in vga_switcheroo_get_client_state() which - * in turn is only called from hda_intel.c * @VGA_SWITCHEROO_NOT_FOUND: client has not registered with vga_switcheroo. * Only used in vga_switcheroo_get_client_state() which in turn is only * called from hda_intel.c @@ -53,7 +49,6 @@ enum vga_switcheroo_state { VGA_SWITCHEROO_OFF, VGA_SWITCHEROO_ON, /* below are referred only from vga_switcheroo_get_client_state() */ - VGA_SWITCHEROO_INIT, VGA_SWITCHEROO_NOT_FOUND, }; -- cgit v1.2.3 From 8f25348b65cd073f77945f559ab1e5de83422cd1 Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 4 Nov 2015 14:59:06 +0100 Subject: net: add forgotten IFF_L3MDEV_SLAVE define Fixes: fee6d4c77 ("net: Add netif_is_l3_slave") Signed-off-by: Jiri Pirko Acked-by: David Ahern Signed-off-by: David S. Miller --- include/linux/netdevice.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 4ac653b7b8ac..2c00772bd136 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -1322,6 +1322,7 @@ enum netdev_priv_flags { #define IFF_L3MDEV_MASTER IFF_L3MDEV_MASTER #define IFF_NO_QUEUE IFF_NO_QUEUE #define IFF_OPENVSWITCH IFF_OPENVSWITCH +#define IFF_L3MDEV_SLAVE IFF_L3MDEV_SLAVE /** * struct net_device - The DEVICE structure. -- cgit v1.2.3 From 9154301a47b33bdc273d8254c407792524367558 Mon Sep 17 00:00:00 2001 From: Dmitry Torokhov Date: Tue, 29 Sep 2015 15:52:59 -0700 Subject: HID: hid-input: allow input_configured callback return errors MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit When configuring input device via input_configured callback we may encounter errors (for example input_mt_init_slots() may fail). Instead of continuing with half-initialized input device let's allow driver indicate failures. Signed-off-by: Jaikumar Ganesh Signed-off-by: Arve Hjønnevåg Reviewed-by: Benjamin Tissoires Reviewed-by: David Herrmann Acked-by: Nikolai Kondrashov Acked-by: Andrew Duggan Signed-off-by: Dmitry Torokhov Signed-off-by: Jiri Kosina --- include/linux/hid.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index f17980de2662..251a1d382e23 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -698,8 +698,8 @@ struct hid_driver { int (*input_mapped)(struct hid_device *hdev, struct hid_input *hidinput, struct hid_field *field, struct hid_usage *usage, unsigned long **bit, int *max); - void (*input_configured)(struct hid_device *hdev, - struct hid_input *hidinput); + int (*input_configured)(struct hid_device *hdev, + struct hid_input *hidinput); void (*feature_mapping)(struct hid_device *hdev, struct hid_field *field, struct hid_usage *usage); -- cgit v1.2.3 From 805c4bc05705fb2b71ec970960b456eee9900953 Mon Sep 17 00:00:00 2001 From: Eric Dumazet Date: Thu, 5 Nov 2015 11:07:13 -0800 Subject: tcp: fix req->saved_syn race For the reasons explained in commit ce1050089c96 ("tcp/dccp: fix ireq->pktopts race"), we need to make sure we do not access req->saved_syn unless we own the request sock. This fixes races for listeners using TCP_SAVE_SYN option. Fixes: e994b2f0fb92 ("tcp: do not lock listener to process SYN packets") Fixes: 079096f103fa ("tcp/dccp: install syn_recv requests into ehash table") Signed-off-by: Eric Dumazet Reported-by: Ying Cai Signed-off-by: David S. Miller --- include/linux/tcp.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tcp.h b/include/linux/tcp.h index c906f4534581..b386361ba3e8 100644 --- a/include/linux/tcp.h +++ b/include/linux/tcp.h @@ -397,6 +397,13 @@ static inline void fastopen_queue_tune(struct sock *sk, int backlog) queue->fastopenq.max_qlen = min_t(unsigned int, backlog, somaxconn); } +static inline void tcp_move_syn(struct tcp_sock *tp, + struct request_sock *req) +{ + tp->saved_syn = req->saved_syn; + req->saved_syn = NULL; +} + static inline void tcp_saved_syn_free(struct tcp_sock *tp) { kfree(tp->saved_syn); -- cgit v1.2.3 From 720abae3d68ae966044497dd9ee5ccf3b16e700c Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 5 Nov 2015 18:44:18 -0800 Subject: rcu: force alignment on struct callback_head/rcu_head Make struct callback_head aligned to size of pointer. On most architectures it happens naturally due ABI requirements, but some architectures (like CRIS) have weird ABI and we need to ask it explicitly. The alignment is required to guarantee that bits 0 and 1 of @next will be clear under normal conditions -- as long as we use call_rcu(), call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. This guarantee is important for few reasons: - future call_rcu_lazy() will make use of lower bits in the pointer; - the structure shares storage spacer in struct page with @compound_head, which encode PageTail() in bit 0. The guarantee is needed to avoid false-positive PageTail(). False postive PageTail() caused crash on crisv32[1]. It happend due misaligned task_struct->rcu, which was byte-aligned. [1] http://lkml.kernel.org/r/55FAEA67.9000102@roeck-us.net Signed-off-by: Kirill A. Shutemov Reported-by: Guenter Roeck Tested-by: Guenter Roeck Acked-by: Paul E. McKenney Cc: Mikael Starvik Cc: Jesper Nilsson Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/types.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/types.h b/include/linux/types.h index c314989d9158..70d8500bddf1 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -205,11 +205,25 @@ struct ustat { * struct callback_head - callback structure for use with RCU and task_work * @next: next update requests in a list * @func: actual update function to call after the grace period. + * + * The struct is aligned to size of pointer. On most architectures it happens + * naturally due ABI requirements, but some architectures (like CRIS) have + * weird ABI and we need to ask it explicitly. + * + * The alignment is required to guarantee that bits 0 and 1 of @next will be + * clear under normal conditions -- as long as we use call_rcu(), + * call_rcu_bh(), call_rcu_sched(), or call_srcu() to queue callback. + * + * This guarantee is important for few reasons: + * - future call_rcu_lazy() will make use of lower bits in the pointer; + * - the structure shares storage spacer in struct page with @compound_head, + * which encode PageTail() in bit 0. The guarantee is needed to avoid + * false-positive PageTail(). */ struct callback_head { struct callback_head *next; void (*func)(struct callback_head *head); -}; +} __attribute__((aligned(sizeof(void *)))); #define rcu_head callback_head typedef void (*rcu_callback_t)(struct rcu_head *head); -- cgit v1.2.3 From 55537871ef666b4153fd1ef8782e4a13fee142cc Mon Sep 17 00:00:00 2001 From: Jiri Kosina Date: Thu, 5 Nov 2015 18:44:41 -0800 Subject: kernel/watchdog.c: perform all-CPU backtrace in case of hard lockup In many cases of hardlockup reports, it's actually not possible to know why it triggered, because the CPU that got stuck is usually waiting on a resource (with IRQs disabled) in posession of some other CPU is holding. IOW, we are often looking at the stacktrace of the victim and not the actual offender. Introduce sysctl / cmdline parameter that makes it possible to have hardlockup detector perform all-CPU backtrace. Signed-off-by: Jiri Kosina Reviewed-by: Aaron Tomlin Cc: Ulrich Obergfell Acked-by: Don Zickus Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/nmi.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nmi.h b/include/linux/nmi.h index 78488e099ce7..7ec5b86735f3 100644 --- a/include/linux/nmi.h +++ b/include/linux/nmi.h @@ -73,6 +73,7 @@ extern int watchdog_user_enabled; extern int watchdog_thresh; extern unsigned long *watchdog_cpumask_bits; extern int sysctl_softlockup_all_cpu_backtrace; +extern int sysctl_hardlockup_all_cpu_backtrace; struct ctl_table; extern int proc_watchdog(struct ctl_table *, int , void __user *, size_t *, loff_t *); -- cgit v1.2.3 From ac1f591249d95372f3a5ab3828d4af5dfbf5efd3 Mon Sep 17 00:00:00 2001 From: Don Zickus Date: Thu, 5 Nov 2015 18:44:44 -0800 Subject: kernel/watchdog.c: add sysctl knob hardlockup_panic The only way to enable a hardlockup to panic the machine is to set 'nmi_watchdog=panic' on the kernel command line. This makes it awkward for end users and folks who want to run automate tests (like myself). Mimic the softlockup_panic knob and create a /proc/sys/kernel/hardlockup_panic knob. Signed-off-by: Don Zickus Cc: Ulrich Obergfell Acked-by: Jiri Kosina Reviewed-by: Aaron Tomlin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index c115d617739d..5423b9c82fee 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -384,6 +384,7 @@ extern int proc_dowatchdog_thresh(struct ctl_table *table, int write, void __user *buffer, size_t *lenp, loff_t *ppos); extern unsigned int softlockup_panic; +extern unsigned int hardlockup_panic; void lockup_detector_init(void); #else static inline void touch_softlockup_watchdog(void) -- cgit v1.2.3 From fda901241fb89449244537db4fb27b06e491b74f Mon Sep 17 00:00:00 2001 From: Denis Kirjanov Date: Thu, 5 Nov 2015 18:44:59 -0800 Subject: slab: convert slab_is_available() to boolean A good candidate to return a boolean result. Signed-off-by: Denis Kirjanov Cc: Christoph Lameter Reviewed-by: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 7e37d448ed91..7c82e3b307a3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -111,7 +111,7 @@ struct mem_cgroup; * struct kmem_cache related prototypes */ void __init kmem_cache_init(void); -int slab_is_available(void); +bool slab_is_available(void); struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, -- cgit v1.2.3 From a744fd17b5233360681ce03e43804406745b680b Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 5 Nov 2015 18:45:02 -0800 Subject: compiler.h: add support for function attribute assume_aligned gcc 4.9 added the function attribute assume_aligned, indicating to the caller that the returned pointer may be assumed to have a certain minimal alignment. This is useful if, for example, the return value is passed to memset(). Add a shorthand macro for that. Signed-off-by: Rasmus Villemoes Cc: Christoph Lameter Cc: David Rientjes Cc: Pekka Enberg Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 17 +++++++++++++++++ include/linux/compiler.h | 8 ++++++++ 2 files changed, 25 insertions(+) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 8efb40e61d6e..02fa6176120d 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -210,6 +210,23 @@ #define __visible __attribute__((externally_visible)) #endif + +#if GCC_VERSION >= 40900 +/* + * __assume_aligned(n, k): Tell the optimizer that the returned + * pointer can be assumed to be k modulo n. The second argument is + * optional (default 0), so we use a variadic macro to make the + * shorthand. + * + * Beware: Do not apply this to functions which may return + * ERR_PTRs. Also, it is probably unwise to apply it to functions + * returning extra information in the low bits (but in that case the + * compiler should see some alignment anyway, when the return value is + * massaged by 'flags = ptr & 3; ptr &= ~3;'). + */ +#define __assume_aligned(a, ...) __attribute__((__assume_aligned__(a, ## __VA_ARGS__))) +#endif + /* * GCC 'asm goto' miscompiles certain code sequences: * diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 52a459ff75f4..4dac1036594f 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -417,6 +417,14 @@ static __always_inline void __write_once_size(volatile void *p, void *res, int s #define __visible #endif +/* + * Assume alignment of return value. + */ +#ifndef __assume_aligned +#define __assume_aligned(a, ...) +#endif + + /* Are two types/vars the same type (ignoring qualifiers)? */ #ifndef __same_type # define __same_type(a, b) __builtin_types_compatible_p(typeof(a), typeof(b)) -- cgit v1.2.3 From 8748dd5c98f7fe506ccb31a094833401ed120915 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Thu, 5 Nov 2015 18:45:05 -0800 Subject: include/linux/compiler-gcc.h: hide assume_aligned attribute from sparse The patch "slab.h: sprinkle __assume_aligned attributes" causes *tons* of whinges if you do 'make C=2' with sparse 0.5.0: CHECK drivers/media/usb/pwc/pwc-if.c include/linux/slab.h:307:43: error: attribute '__assume_aligned__': unknown attribute include/linux/slab.h:308:58: error: attribute '__assume_aligned__': unknown attribute include/linux/slab.h:337:73: error: attribute '__assume_aligned__': unknown attribute include/linux/slab.h:375:74: error: attribute '__assume_aligned__': unknown attribute include/linux/slab.h:378:80: error: attribute '__assume_aligned__': unknown attribute sparse apparently pretends to be gcc >= 4.9, yet isn't prepared to handle all the function attributes supported by those gccs and complains loudly. So hide the definition of __assume_aligned from it (so that the generic one in compiler.h gets used). Signed-off-by: Rasmus Villemoes Reported-by: Valdis Kletnieks Tested-By: Valdis Kletnieks Cc: Christopher Li Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 02fa6176120d..0e3110a0b771 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -211,7 +211,7 @@ #endif -#if GCC_VERSION >= 40900 +#if GCC_VERSION >= 40900 && !defined(__CHECKER__) /* * __assume_aligned(n, k): Tell the optimizer that the returned * pointer can be assumed to be k modulo n. The second argument is -- cgit v1.2.3 From 0ab32b6f1b88444524e52429fab334ff96683a3f Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 5 Nov 2015 18:46:03 -0800 Subject: uaccess: reimplement probe_kernel_address() using probe_kernel_read() probe_kernel_address() is basically the same as the (later added) probe_kernel_read(). The return value on EFAULT is a bit different: probe_kernel_address() returns number-of-bytes-not-copied whereas probe_kernel_read() returns -EFAULT. All callers have been checked, none cared. probe_kernel_read() can be overridden by the architecture whereas probe_kernel_address() cannot. parisc, blackfin and um do this, to insert additional checking. Hence this patch possibly fixes obscure bugs, although there are only two probe_kernel_address() callsites outside arch/. My first attempt involved removing probe_kernel_address() entirely and converting all callsites to use probe_kernel_read() directly, but that got tiresome. This patch shrinks mm/slab_common.o by 218 bytes. For a single probe_kernel_address() callsite. Cc: Steven Miao Cc: Jeff Dike Cc: Richard Weinberger Cc: "James E.J. Bottomley" Cc: Helge Deller Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/uaccess.h | 40 ++++++++++------------------------------ 1 file changed, 10 insertions(+), 30 deletions(-) (limited to 'include/linux') diff --git a/include/linux/uaccess.h b/include/linux/uaccess.h index d6f2c2c5b043..558129af828a 100644 --- a/include/linux/uaccess.h +++ b/include/linux/uaccess.h @@ -75,36 +75,6 @@ static inline unsigned long __copy_from_user_nocache(void *to, #endif /* ARCH_HAS_NOCACHE_UACCESS */ -/** - * probe_kernel_address(): safely attempt to read from a location - * @addr: address to read from - its type is type typeof(retval)* - * @retval: read into this variable - * - * Safely read from address @addr into variable @revtal. If a kernel fault - * happens, handle that and return -EFAULT. - * We ensure that the __get_user() is executed in atomic context so that - * do_page_fault() doesn't attempt to take mmap_sem. This makes - * probe_kernel_address() suitable for use within regions where the caller - * already holds mmap_sem, or other locks which nest inside mmap_sem. - * This must be a macro because __get_user() needs to know the types of the - * args. - * - * We don't include enough header files to be able to do the set_fs(). We - * require that the probe_kernel_address() caller will do that. - */ -#define probe_kernel_address(addr, retval) \ - ({ \ - long ret; \ - mm_segment_t old_fs = get_fs(); \ - \ - set_fs(KERNEL_DS); \ - pagefault_disable(); \ - ret = __copy_from_user_inatomic(&(retval), (__force typeof(retval) __user *)(addr), sizeof(retval)); \ - pagefault_enable(); \ - set_fs(old_fs); \ - ret; \ - }) - /* * probe_kernel_read(): safely attempt to read from a location * @dst: pointer to the buffer that shall take the data @@ -131,4 +101,14 @@ extern long notrace __probe_kernel_write(void *dst, const void *src, size_t size extern long strncpy_from_unsafe(char *dst, const void *unsafe_addr, long count); +/** + * probe_kernel_address(): safely attempt to read from a location + * @addr: address to read from + * @retval: read into this variable + * + * Returns 0 on success, or -EFAULT. + */ +#define probe_kernel_address(addr, retval) \ + probe_kernel_read(&retval, addr, sizeof(retval)) + #endif /* __LINUX_UACCESS_H__ */ -- cgit v1.2.3 From 626ebc4100285be56fe3546f29b6afeb36b6871a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 5 Nov 2015 18:46:09 -0800 Subject: memcg: flatten task_struct->memcg_oom task_struct->memcg_oom is a sub-struct containing fields which are used for async memcg oom handling. Most task_struct fields aren't packaged this way and it can lead to unnecessary alignment paddings. This patch flattens it. * task.memcg_oom.memcg -> task.memcg_in_oom * task.memcg_oom.gfp_mask -> task.memcg_oom_gfp_mask * task.memcg_oom.order -> task.memcg_oom_order * task.memcg_oom.may_oom -> task.memcg_may_oom In addition, task.memcg_may_oom is relocated to where other bitfields are which reduces the size of task_struct. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Reviewed-by: Vladimir Davydov Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 10 +++++----- include/linux/sched.h | 13 ++++++------- 2 files changed, 11 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 3e3318ddfc0e..56174c7199ee 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -406,19 +406,19 @@ void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, static inline void mem_cgroup_oom_enable(void) { - WARN_ON(current->memcg_oom.may_oom); - current->memcg_oom.may_oom = 1; + WARN_ON(current->memcg_may_oom); + current->memcg_may_oom = 1; } static inline void mem_cgroup_oom_disable(void) { - WARN_ON(!current->memcg_oom.may_oom); - current->memcg_oom.may_oom = 0; + WARN_ON(!current->memcg_may_oom); + current->memcg_may_oom = 0; } static inline bool task_in_memcg_oom(struct task_struct *p) { - return p->memcg_oom.memcg; + return p->memcg_in_oom; } bool mem_cgroup_oom_synchronize(bool wait); diff --git a/include/linux/sched.h b/include/linux/sched.h index 5423b9c82fee..17bf8b845aa0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1473,7 +1473,9 @@ struct task_struct { unsigned sched_reset_on_fork:1; unsigned sched_contributes_to_load:1; unsigned sched_migrated:1; - +#ifdef CONFIG_MEMCG + unsigned memcg_may_oom:1; +#endif #ifdef CONFIG_MEMCG_KMEM unsigned memcg_kmem_skip_account:1; #endif @@ -1804,12 +1806,9 @@ struct task_struct { unsigned long trace_recursion; #endif /* CONFIG_TRACING */ #ifdef CONFIG_MEMCG - struct memcg_oom_info { - struct mem_cgroup *memcg; - gfp_t gfp_mask; - int order; - unsigned int may_oom:1; - } memcg_oom; + struct mem_cgroup *memcg_in_oom; + gfp_t memcg_oom_gfp_mask; + int memcg_oom_order; #endif #ifdef CONFIG_UPROBES struct uprobe_task *utask; -- cgit v1.2.3 From b23afb93d317c65cef553b804f08dec8a7a0f7e1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 5 Nov 2015 18:46:11 -0800 Subject: memcg: punt high overage reclaim to return-to-userland path Currently, try_charge() tries to reclaim memory synchronously when the high limit is breached; however, if the allocation doesn't have __GFP_WAIT, synchronous reclaim is skipped. If a process performs only speculative allocations, it can blow way past the high limit. This is actually easily reproducible by simply doing "find /". slab/slub allocator tries speculative allocations first, so as long as there's memory which can be consumed without blocking, it can keep allocating memory regardless of the high limit. This patch makes try_charge() always punt the over-high reclaim to the return-to-userland path. If try_charge() detects that high limit is breached, it adds the overage to current->memcg_nr_pages_over_high and schedules execution of mem_cgroup_handle_over_high() which performs synchronous reclaim from the return-to-userland path. As long as kernel doesn't have a run-away allocation spree, this should provide enough protection while making kmemcg behave more consistently. It also has the following benefits. - All over-high reclaims can use GFP_KERNEL regardless of the specific gfp mask in use, e.g. GFP_NOFS, when the limit was breached. - It copes with prio inversion. Previously, a low-prio task with small memory.high might perform over-high reclaim with a bunch of locks held. If a higher prio task needed any of these locks, it would have to wait until the low prio task finished reclaim and released the locks. By handing over-high reclaim to the task exit path this issue can be avoided. Signed-off-by: Tejun Heo Acked-by: Michal Hocko Reviewed-by: Vladimir Davydov Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++++++ include/linux/sched.h | 3 +++ include/linux/tracehook.h | 3 +++ 3 files changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 56174c7199ee..77bf42966200 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -401,6 +401,8 @@ static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) return inactive * inactive_ratio < active; } +void mem_cgroup_handle_over_high(void); + void mem_cgroup_print_oom_info(struct mem_cgroup *memcg, struct task_struct *p); @@ -620,6 +622,10 @@ static inline void mem_cgroup_end_page_stat(struct mem_cgroup *memcg) { } +static inline void mem_cgroup_handle_over_high(void) +{ +} + static inline void mem_cgroup_oom_enable(void) { } diff --git a/include/linux/sched.h b/include/linux/sched.h index 17bf8b845aa0..055f2ee3b0f0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1809,6 +1809,9 @@ struct task_struct { struct mem_cgroup *memcg_in_oom; gfp_t memcg_oom_gfp_mask; int memcg_oom_order; + + /* number of pages to reclaim on returning to userland */ + unsigned int memcg_nr_pages_over_high; #endif #ifdef CONFIG_UPROBES struct uprobe_task *utask; diff --git a/include/linux/tracehook.h b/include/linux/tracehook.h index 84d497297c5f..26c152122a42 100644 --- a/include/linux/tracehook.h +++ b/include/linux/tracehook.h @@ -50,6 +50,7 @@ #include #include #include +#include struct linux_binprm; /* @@ -188,6 +189,8 @@ static inline void tracehook_notify_resume(struct pt_regs *regs) smp_mb__after_atomic(); if (unlikely(current->task_works)) task_work_run(); + + mem_cgroup_handle_over_high(); } #endif /* */ -- cgit v1.2.3 From cbfb479809c1b8d871cb9a31832e065e900a24c1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 5 Nov 2015 18:46:14 -0800 Subject: memcg: collect kmem bypass conditions into __memcg_kmem_bypass() memcg_kmem_newpage_charge() and memcg_kmem_get_cache() are testing the same series of conditions to decide whether to bypass kmem accounting. Collect the tests into __memcg_kmem_bypass(). This is pure refactoring. Signed-off-by: Tejun Heo Reviewed-by: Vladimir Davydov Acked-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 46 ++++++++++++++++++++++------------------------ 1 file changed, 22 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 77bf42966200..d8174e8d6ab4 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -777,20 +777,7 @@ int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, unsigned long nr_pages); void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages); -/** - * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. - * @gfp: the gfp allocation flags. - * @memcg: a pointer to the memcg this was charged against. - * @order: allocation order. - * - * returns true if the memcg where the current task belongs can hold this - * allocation. - * - * We return true automatically if this allocation is not to be accounted to - * any memcg. - */ -static inline bool -memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) +static inline bool __memcg_kmem_bypass(gfp_t gfp) { if (!memcg_kmem_enabled()) return true; @@ -812,6 +799,26 @@ memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) if (unlikely(fatal_signal_pending(current))) return true; + return false; +} + +/** + * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. + * @gfp: the gfp allocation flags. + * @memcg: a pointer to the memcg this was charged against. + * @order: allocation order. + * + * returns true if the memcg where the current task belongs can hold this + * allocation. + * + * We return true automatically if this allocation is not to be accounted to + * any memcg. + */ +static inline bool +memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) +{ + if (__memcg_kmem_bypass(gfp)) + return true; return __memcg_kmem_newpage_charge(gfp, memcg, order); } @@ -854,17 +861,8 @@ memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order) static __always_inline struct kmem_cache * memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) { - if (!memcg_kmem_enabled()) - return cachep; - if (gfp & __GFP_NOACCOUNT) - return cachep; - if (gfp & __GFP_NOFAIL) + if (__memcg_kmem_bypass(gfp)) return cachep; - if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) - return cachep; - if (unlikely(fatal_signal_pending(current))) - return cachep; - return __memcg_kmem_get_cache(cachep); } -- cgit v1.2.3 From 7f822c24c2b32a9feafb33e3b9a9e23ef4278c2c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 5 Nov 2015 18:46:20 -0800 Subject: memcg: drop unnecessary cold-path tests from __memcg_kmem_bypass() __memcg_kmem_bypass() decides whether a kmem allocation should be bypassed to the root memcg. Some conditions that it tests are valid criteria regarding who should be held accountable; however, there are a couple unnecessary tests for cold paths - __GFP_FAIL and fatal_signal_pending(). The previous patch updated try_charge() to handle both __GFP_FAIL and dying tasks correctly and the only thing these two tests are doing is making accounting less accurate and sprinkling tests for cold path conditions in the hot paths. There's nothing meaningful gained by these extra tests. This patch removes the two unnecessary tests from __memcg_kmem_bypass(). Signed-off-by: Tejun Heo Reviewed-by: Vladimir Davydov Acked-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index d8174e8d6ab4..641bb60dd7db 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -781,24 +781,10 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp) { if (!memcg_kmem_enabled()) return true; - if (gfp & __GFP_NOACCOUNT) return true; - /* - * __GFP_NOFAIL allocations will move on even if charging is not - * possible. Therefore we don't even try, and have this allocation - * unaccounted. We could in theory charge it forcibly, but we hope - * those allocations are rare, and won't be worth the trouble. - */ - if (gfp & __GFP_NOFAIL) - return true; if (in_interrupt() || (!current->mm) || (current->flags & PF_KTHREAD)) return true; - - /* If the test is dying, just let it go. */ - if (unlikely(fatal_signal_pending(current))) - return true; - return false; } -- cgit v1.2.3 From 35bd16a227534cb6ffc9b26a33061c2dcf91934b Mon Sep 17 00:00:00 2001 From: Alexander Kuleshov Date: Thu, 5 Nov 2015 18:47:00 -0800 Subject: mm/memblock: make memblock_remove_range() static memblock_remove_range() is only used in the mm/memblock.c, so we can make it static. Signed-off-by: Alexander Kuleshov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memblock.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memblock.h b/include/linux/memblock.h index c518eb589260..24daf8fc4d7c 100644 --- a/include/linux/memblock.h +++ b/include/linux/memblock.h @@ -89,10 +89,6 @@ int memblock_add_range(struct memblock_type *type, phys_addr_t base, phys_addr_t size, int nid, unsigned long flags); -int memblock_remove_range(struct memblock_type *type, - phys_addr_t base, - phys_addr_t size); - void __next_mem_range(u64 *idx, int nid, ulong flags, struct memblock_type *type_a, struct memblock_type *type_b, phys_addr_t *out_start, -- cgit v1.2.3 From b171e4093017d4d6e411f5e97823e5e4a21266a2 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 5 Nov 2015 18:47:06 -0800 Subject: mm/page_alloc: remove unused parameter in init_currently_empty_zone() Commit a2f3aa025766 ("[PATCH] Fix sparsemem on Cell") fixed an oops experienced on the Cell architecture when init-time functions, early_*(), are called at runtime by introducing an 'enum memmap_context' parameter to memmap_init_zone() and init_currently_empty_zone(). This parameter is intended to be used to tell whether the call of these two functions is being made on behalf of a hotplug event, or happening at boot-time. However, init_currently_empty_zone() does not use this parameter at all, so remove it. Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d94347737292..2d7e660cdefe 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -823,8 +823,7 @@ enum memmap_context { MEMMAP_HOTPLUG, }; extern int init_currently_empty_zone(struct zone *zone, unsigned long start_pfn, - unsigned long size, - enum memmap_context context); + unsigned long size); extern void lruvec_init(struct lruvec *lruvec); -- cgit v1.2.3 From 600e19afc5f8a6c18ea49cee9511c5797db02391 Mon Sep 17 00:00:00 2001 From: Roman Gushchin Date: Thu, 5 Nov 2015 18:47:08 -0800 Subject: mm: use only per-device readahead limit Maximal readahead size is limited now by two values: 1) by global 2Mb constant (MAX_READAHEAD in max_sane_readahead()) 2) by configurable per-device value* (bdi->ra_pages) There are devices, which require custom readahead limit. For instance, for RAIDs it's calculated as number of devices multiplied by chunk size times 2. Readahead size can never be larger than bdi->ra_pages * 2 value (POSIX_FADV_SEQUNTIAL doubles readahead size). If so, why do we need two limits? I suggest to completely remove this max_sane_readahead() stuff and use per-device readahead limit everywhere. Also, using right readahead size for RAID disks can significantly increase i/o performance: before: dd if=/dev/md2 of=/dev/null bs=100M count=100 100+0 records in 100+0 records out 10485760000 bytes (10 GB) copied, 12.9741 s, 808 MB/s after: $ dd if=/dev/md2 of=/dev/null bs=100M count=100 100+0 records in 100+0 records out 10485760000 bytes (10 GB) copied, 8.91317 s, 1.2 GB/s (It's an 8-disks RAID5 storage). This patch doesn't change sys_readahead and madvise(MADV_WILLNEED) behavior introduced by 6d2be915e589b58 ("mm/readahead.c: fix readahead failure for memoryless NUMA nodes and limit readahead pages"). Signed-off-by: Roman Gushchin Cc: Raghavendra K T Cc: Jan Kara Cc: Wu Fengguang Cc: David Rientjes Cc: onstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 80001de019ba..fa08f3cf0f22 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2036,8 +2036,6 @@ void page_cache_async_readahead(struct address_space *mapping, pgoff_t offset, unsigned long size); -unsigned long max_sane_readahead(unsigned long nr); - /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); -- cgit v1.2.3 From 5d317b2b6536592a9b51fe65faed43d65ca9158e Mon Sep 17 00:00:00 2001 From: Naoya Horiguchi Date: Thu, 5 Nov 2015 18:47:14 -0800 Subject: mm: hugetlb: proc: add HugetlbPages field to /proc/PID/status Currently there's no easy way to get per-process usage of hugetlb pages, which is inconvenient because userspace applications which use hugetlb typically want to control their processes on the basis of how much memory (including hugetlb) they use. So this patch simply provides easy access to the info via /proc/PID/status. Signed-off-by: Naoya Horiguchi Acked-by: Joern Engel Acked-by: David Rientjes Acked-by: Michal Hocko Cc: Mike Kravetz Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb.h | 19 +++++++++++++++++++ include/linux/mm_types.h | 3 +++ 2 files changed, 22 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hugetlb.h b/include/linux/hugetlb.h index 5e35379f58a5..685c262e0be8 100644 --- a/include/linux/hugetlb.h +++ b/include/linux/hugetlb.h @@ -483,6 +483,17 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h, #define hugepages_supported() (HPAGE_SHIFT != 0) #endif +void hugetlb_report_usage(struct seq_file *m, struct mm_struct *mm); + +static inline void hugetlb_count_add(long l, struct mm_struct *mm) +{ + atomic_long_add(l, &mm->hugetlb_usage); +} + +static inline void hugetlb_count_sub(long l, struct mm_struct *mm) +{ + atomic_long_sub(l, &mm->hugetlb_usage); +} #else /* CONFIG_HUGETLB_PAGE */ struct hstate {}; #define alloc_huge_page(v, a, r) NULL @@ -519,6 +530,14 @@ static inline spinlock_t *huge_pte_lockptr(struct hstate *h, { return &mm->page_table_lock; } + +static inline void hugetlb_report_usage(struct seq_file *f, struct mm_struct *m) +{ +} + +static inline void hugetlb_count_sub(long l, struct mm_struct *mm) +{ +} #endif /* CONFIG_HUGETLB_PAGE */ static inline spinlock_t *huge_pte_lock(struct hstate *h, diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 3d6baa7d4534..0a85da25a822 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -486,6 +486,9 @@ struct mm_struct { /* address of the bounds directory */ void __user *bd_addr; #endif +#ifdef CONFIG_HUGETLB_PAGE + atomic_long_t hugetlb_usage; +#endif }; static inline void mm_init_cpumask(struct mm_struct *mm) -- cgit v1.2.3 From aa750fd71c242dba02ee2034e15fbd7d0cdb2461 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Thu, 5 Nov 2015 18:47:23 -0800 Subject: mm/filemap.c: make global sync not clear error status of individual inodes filemap_fdatawait() is a function to wait for on-going writeback to complete but also consume and clear error status of the mapping set during writeback. The latter functionality is critical for applications to detect writeback error with system calls like fsync(2)/fdatasync(2). However filemap_fdatawait() is also used by sync(2) or FIFREEZE ioctl, which don't check error status of individual mappings. As a result, fsync() may not be able to detect writeback error if events happen in the following order: Application System admin ---------------------------------------------------------- write data on page cache Run sync command writeback completes with error filemap_fdatawait() clears error fsync returns success (but the data is not on disk) This patch adds filemap_fdatawait_keep_errors() for call sites where writeback error is not handled so that they don't clear error status. Signed-off-by: Jun'ichi Nomura Acked-by: Andi Kleen Reviewed-by: Tejun Heo Cc: Fengguang Wu Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 72d8a844c692..9355f377fd46 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2422,6 +2422,7 @@ extern int write_inode_now(struct inode *, int); extern int filemap_fdatawrite(struct address_space *); extern int filemap_flush(struct address_space *); extern int filemap_fdatawait(struct address_space *); +extern void filemap_fdatawait_keep_errors(struct address_space *); extern int filemap_fdatawait_range(struct address_space *, loff_t lstart, loff_t lend); extern int filemap_write_and_wait(struct address_space *mapping); -- cgit v1.2.3 From 13308ca9ef9acb325b52bd8562639b5844f3cbf2 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Thu, 5 Nov 2015 18:47:40 -0800 Subject: mm/memcontrol: make mem_cgroup_inactive_anon_is_low() return bool Make mem_cgroup_inactive_anon_is_low return bool due to this particular function only using either one or zero as its return value. No functional change. Signed-off-by: Yaowei Bai Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 641bb60dd7db..4142f94822ea 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -382,7 +382,7 @@ unsigned long mem_cgroup_get_lru_size(struct lruvec *lruvec, enum lru_list lru) return mz->lru_size[lru]; } -static inline int mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) +static inline bool mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) { unsigned long inactive_ratio; unsigned long inactive; @@ -585,10 +585,10 @@ static inline bool mem_cgroup_disabled(void) return true; } -static inline int +static inline bool mem_cgroup_inactive_anon_is_low(struct lruvec *lruvec) { - return 1; + return true; } static inline bool mem_cgroup_lruvec_online(struct lruvec *lruvec) -- cgit v1.2.3 From fa6c7b46aaa0cc00846703e8c0ec1e1636ff25ba Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 5 Nov 2015 18:47:56 -0800 Subject: mm, compaction: export tracepoints status strings to userspace Some compaction tracepoints convert the integer return values to strings using the compaction_status_string array. This works for in-kernel printing, but not userspace trace printing of raw captured trace such as via trace-cmd report. This patch converts the private array to appropriate tracepoint macros that result in proper userspace support. trace-cmd output before: transhuge-stres-4235 [000] 453.149280: mm_compaction_finished: node=0 zone=ffffffff81815d7a order=9 ret= after: transhuge-stres-4235 [000] 453.149280: mm_compaction_finished: node=0 zone=ffffffff81815d7a order=9 ret=partial Signed-off-by: Vlastimil Babka Reviewed-by: Steven Rostedt Cc: Joonsoo Kim Cc: Ingo Molnar Cc: Mel Gorman Cc: David Rientjes Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index aa8f61cf3a19..f14ba989092f 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -15,7 +15,7 @@ /* For more detailed tracepoint output */ #define COMPACT_NO_SUITABLE_PAGE 5 #define COMPACT_NOT_SUITABLE_ZONE 6 -/* When adding new state, please change compaction_status_string, too */ +/* When adding new states, please adjust include/trace/events/compaction.h */ /* Used to signal whether compaction detected need_sched() or lock contention */ /* No contention detected */ -- cgit v1.2.3 From 2d1e10412c2388ff9b6afc60536eaa195a419289 Mon Sep 17 00:00:00 2001 From: Vlastimil Babka Date: Thu, 5 Nov 2015 18:48:02 -0800 Subject: mm, compaction: distinguish contended status in tracepoints Compaction returns prematurely with COMPACT_PARTIAL when contended or has fatal signal pending. This is ok for the callers, but might be misleading in the traces, as the usual reason to return COMPACT_PARTIAL is that we think the allocation should succeed. After this patch we distinguish the premature ending condition in the mm_compaction_finished and mm_compaction_end tracepoints. The contended status covers the following reasons: - lock contention or need_resched() detected in async compaction - fatal signal pending - too many pages isolated in the zone (only for async compaction) Further distinguishing the exact reason seems unnecessary for now. Signed-off-by: Vlastimil Babka Cc: Joonsoo Kim Cc: Mel Gorman Cc: David Rientjes Cc: Steven Rostedt Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compaction.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/compaction.h b/include/linux/compaction.h index f14ba989092f..4cd4ddf64cc7 100644 --- a/include/linux/compaction.h +++ b/include/linux/compaction.h @@ -15,6 +15,7 @@ /* For more detailed tracepoint output */ #define COMPACT_NO_SUITABLE_PAGE 5 #define COMPACT_NOT_SUITABLE_ZONE 6 +#define COMPACT_CONTENDED 7 /* When adding new states, please adjust include/trace/events/compaction.h */ /* Used to signal whether compaction detected need_sched() or lock contention */ -- cgit v1.2.3 From da39da3a54fed88e29024f2f1f6cd7357cd03a44 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Thu, 5 Nov 2015 18:48:05 -0800 Subject: mm, oom: remove task_lock protecting comm printing The oom killer takes task_lock() in a couple of places solely to protect printing the task's comm. A process's comm, including current's comm, may change due to /proc/pid/comm or PR_SET_NAME. The comm will always be NULL-terminated, so the worst race scenario would only be during update. We can tolerate a comm being printed that is in the middle of an update to avoid taking the lock. Other locations in the kernel have already dropped task_lock() when printing comm, so this is consistent. Signed-off-by: David Rientjes Suggested-by: Oleg Nesterov Cc: Michal Hocko Cc: Vladimir Davydov Cc: Sergey Senozhatsky Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 1b357997cac5..5a1311942358 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -93,7 +93,7 @@ extern int current_cpuset_is_being_rebound(void); extern void rebuild_sched_domains(void); -extern void cpuset_print_task_mems_allowed(struct task_struct *p); +extern void cpuset_print_current_mems_allowed(void); /* * read_mems_allowed_begin is required when making decisions involving @@ -219,7 +219,7 @@ static inline void rebuild_sched_domains(void) partition_sched_domains(1, NULL, NULL); } -static inline void cpuset_print_task_mems_allowed(struct task_struct *p) +static inline void cpuset_print_current_mems_allowed(void) { } -- cgit v1.2.3 From 3ca65c19ddbb45f504edf92fe7126ecc94d56e36 Mon Sep 17 00:00:00 2001 From: Vineet Gupta Date: Thu, 5 Nov 2015 18:48:29 -0800 Subject: mm: optimize PageHighMem() check This came up when implementing HIHGMEM/PAE40 for ARC. The kmap() / kmap_atomic() generated code seemed needlessly bloated due to the way PageHighMem() macro is implemented. It derives the exact zone for page and then does pointer subtraction with first zone to infer the zone_type. The pointer arithmatic in turn generates the code bloat. PageHighMem(page) is_highmem(page_zone(page)) zone_off = (char *)zone - (char *)zone->zone_pgdat->node_zones Instead use is_highmem_idx() to work on zone_type available in page flags ----- Before ----- 80756348: mov_s r13,r0 8075634a: ld_s r2,[r13,0] 8075634c: lsr_s r2,r2,30 8075634e: mpy r2,r2,0x2a4 80756352: add_s r2,r2,0x80aef880 80756358: ld_s r3,[r2,28] 8075635a: sub_s r2,r2,r3 8075635c: breq r2,0x2a4,80756378 80756364: breq r2,0x548,80756378 ----- After ----- 80756330: mov_s r13,r0 80756332: ld_s r2,[r13,0] 80756334: lsr_s r2,r2,30 80756336: sub_s r2,r2,1 80756338: brlo r2,2,80756348 For x86 defconfig build (32 bit only) it saves around 900 bytes. For ARC defconfig with HIGHMEM, it saved around 2K bytes. ---->8------- ./scripts/bloat-o-meter x86/vmlinux-defconfig-pre x86/vmlinux-defconfig-post add/remove: 0/0 grow/shrink: 0/36 up/down: 0/-934 (-934) function old new delta saveable_page 162 154 -8 saveable_highmem_page 154 146 -8 skb_gro_reset_offset 147 131 -16 ... ... __change_page_attr_set_clr 1715 1678 -37 setup_data_read 434 394 -40 mon_bin_event 1967 1927 -40 swsusp_save 1148 1105 -43 _set_pages_array 549 493 -56 ---->8------- e.g. For ARC kmap() Signed-off-by: Vineet Gupta Acked-by: Michal Hocko Cc: Naoya Horiguchi Cc: Hugh Dickins Cc: "Kirill A. Shutemov" Cc: Michal Hocko Cc: Jennifer Herbert Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index 416509e26d6d..a525e5067484 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -256,7 +256,7 @@ PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim) * Must use a macro here due to header dependency issues. page_zone() is not * available at this point. */ -#define PageHighMem(__p) is_highmem(page_zone(__p)) +#define PageHighMem(__p) is_highmem_idx(page_zonenum(__p)) #else PAGEFLAG_FALSE(HighMem) #endif -- cgit v1.2.3 From c2d42c16ad83006a706d83e51a7268db04af733a Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Thu, 5 Nov 2015 18:48:43 -0800 Subject: mm/vmstat.c: uninline node_page_state() With x86_64 (config http://ozlabs.org/~akpm/config-akpm2.txt) and old gcc (4.4.4), drivers/base/node.c:node_read_meminfo() is using 2344 bytes of stack. Uninlining node_page_state() reduces this to 440 bytes. The stack consumption issue is fixed by newer gcc (4.8.4) however with that compiler this patch reduces the node.o text size from 7314 bytes to 4578. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 24 +----------------------- 1 file changed, 1 insertion(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 82e7db7f7100..49dfe40b3673 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -161,30 +161,8 @@ static inline unsigned long zone_page_state_snapshot(struct zone *zone, } #ifdef CONFIG_NUMA -/* - * Determine the per node value of a stat item. This function - * is called frequently in a NUMA machine, so try to be as - * frugal as possible. - */ -static inline unsigned long node_page_state(int node, - enum zone_stat_item item) -{ - struct zone *zones = NODE_DATA(node)->node_zones; - - return -#ifdef CONFIG_ZONE_DMA - zone_page_state(&zones[ZONE_DMA], item) + -#endif -#ifdef CONFIG_ZONE_DMA32 - zone_page_state(&zones[ZONE_DMA32], item) + -#endif -#ifdef CONFIG_HIGHMEM - zone_page_state(&zones[ZONE_HIGHMEM], item) + -#endif - zone_page_state(&zones[ZONE_NORMAL], item) + - zone_page_state(&zones[ZONE_MOVABLE], item); -} +extern unsigned long node_page_state(int node, enum zone_stat_item item); extern void zone_statistics(struct zone *, struct zone *, gfp_t gfp); #else -- cgit v1.2.3 From f7ae3a95eab4e6766768cef664d2d429f53bd5f4 Mon Sep 17 00:00:00 2001 From: yalin wang Date: Thu, 5 Nov 2015 18:48:50 -0800 Subject: include/linux/vm_event_item.h: change HIGHMEM_ZONE macro definition Change HIGHMEM_ZONE to be the same as the DMA_ZONE macro. Signed-off-by: yalin wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 9246d32dc973..e623d392db0c 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -14,12 +14,12 @@ #endif #ifdef CONFIG_HIGHMEM -#define HIGHMEM_ZONE(xx) , xx##_HIGH +#define HIGHMEM_ZONE(xx) xx##_HIGH, #else #define HIGHMEM_ZONE(xx) #endif -#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL HIGHMEM_ZONE(xx) , xx##_MOVABLE +#define FOR_ALL_ZONES(xx) DMA_ZONE(xx) DMA32_ZONE(xx) xx##_NORMAL, HIGHMEM_ZONE(xx) xx##_MOVABLE enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, FOR_ALL_ZONES(PGALLOC), -- cgit v1.2.3 From d05e83a6f861ad02c2fcba75d4c4cfe49e3bc90f Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 5 Nov 2015 18:48:59 -0800 Subject: memcg: simplify charging kmem pages Charging kmem pages proceeds in two steps. First, we try to charge the allocation size to the memcg the current task belongs to, then we allocate a page and "commit" the charge storing the pointer to the memcg in the page struct. Such a design looks overcomplicated, because there is not much sense in trying charging the allocation before actually allocating a page: we won't be able to consume much memory over the limit even if we charge after doing the actual allocation, besides we already charge user pages post factum, so being pedantic with kmem pages just looks pointless. So this patch simplifies the design by merging the "charge" and the "commit" steps into the same function, which takes the allocated page. Also, rename the charge and uncharge methods to memcg_kmem_charge and memcg_kmem_uncharge and make the charge method return error code instead of bool to conform to mem_cgroup_try_charge. Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 69 +++++++++++++--------------------------------- 1 file changed, 19 insertions(+), 50 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 4142f94822ea..c95246627f87 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -752,11 +752,8 @@ static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) * conditions, but because they are pretty simple, they are expected to be * fast. */ -bool __memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, - int order); -void __memcg_kmem_commit_charge(struct page *page, - struct mem_cgroup *memcg, int order); -void __memcg_kmem_uncharge_pages(struct page *page, int order); +int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order); +void __memcg_kmem_uncharge(struct page *page, int order); /* * helper for acessing a memcg's index. It will be used as an index in the @@ -789,52 +786,30 @@ static inline bool __memcg_kmem_bypass(gfp_t gfp) } /** - * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. - * @gfp: the gfp allocation flags. - * @memcg: a pointer to the memcg this was charged against. - * @order: allocation order. + * memcg_kmem_charge: charge a kmem page + * @page: page to charge + * @gfp: reclaim mode + * @order: allocation order * - * returns true if the memcg where the current task belongs can hold this - * allocation. - * - * We return true automatically if this allocation is not to be accounted to - * any memcg. + * Returns 0 on success, an error code on failure. */ -static inline bool -memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) +static __always_inline int memcg_kmem_charge(struct page *page, + gfp_t gfp, int order) { if (__memcg_kmem_bypass(gfp)) - return true; - return __memcg_kmem_newpage_charge(gfp, memcg, order); + return 0; + return __memcg_kmem_charge(page, gfp, order); } /** - * memcg_kmem_uncharge_pages: uncharge pages from memcg - * @page: pointer to struct page being freed - * @order: allocation order. + * memcg_kmem_uncharge: uncharge a kmem page + * @page: page to uncharge + * @order: allocation order */ -static inline void -memcg_kmem_uncharge_pages(struct page *page, int order) +static __always_inline void memcg_kmem_uncharge(struct page *page, int order) { if (memcg_kmem_enabled()) - __memcg_kmem_uncharge_pages(page, order); -} - -/** - * memcg_kmem_commit_charge: embeds correct memcg in a page - * @page: pointer to struct page recently allocated - * @memcg: the memcg structure we charged against - * @order: allocation order. - * - * Needs to be called after memcg_kmem_newpage_charge, regardless of success or - * failure of the allocation. if @page is NULL, this function will revert the - * charges. Otherwise, it will commit @page to @memcg. - */ -static inline void -memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order) -{ - if (memcg_kmem_enabled() && memcg) - __memcg_kmem_commit_charge(page, memcg, order); + __memcg_kmem_uncharge(page, order); } /** @@ -878,18 +853,12 @@ static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) return false; } -static inline bool -memcg_kmem_newpage_charge(gfp_t gfp, struct mem_cgroup **memcg, int order) -{ - return true; -} - -static inline void memcg_kmem_uncharge_pages(struct page *page, int order) +static inline int memcg_kmem_charge(struct page *page, gfp_t gfp, int order) { + return 0; } -static inline void -memcg_kmem_commit_charge(struct page *page, struct mem_cgroup *memcg, int order) +static inline void memcg_kmem_uncharge(struct page *page, int order) { } -- cgit v1.2.3 From f3ccb2c42297757d2e9b820ad37960462df7b7c1 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 5 Nov 2015 18:49:01 -0800 Subject: memcg: unify slab and other kmem pages charging We have memcg_kmem_charge and memcg_kmem_uncharge methods for charging and uncharging kmem pages to memcg, but currently they are not used for charging slab pages (i.e. they are only used for charging pages allocated with alloc_kmem_pages). The only reason why the slab subsystem uses special helpers, memcg_charge_slab and memcg_uncharge_slab, is that it needs to charge to the memcg of kmem cache while memcg_charge_kmem charges to the memcg that the current task belongs to. To remove this diversity, this patch adds an extra argument to __memcg_kmem_charge that can be a pointer to a memcg or NULL. If it is not NULL, the function tries to charge to the memcg it points to, otherwise it charge to the current context. Next, it makes the slab subsystem use this function to charge slab pages. Since memcg_charge_kmem and memcg_uncharge_kmem helpers are now used only in __memcg_kmem_charge and __memcg_kmem_uncharge, they are inlined. Since __memcg_kmem_charge stores a pointer to the memcg in the page struct, we don't need memcg_uncharge_slab anymore and can use free_kmem_pages. Besides, one can now detect which memcg a slab page belongs to by reading /proc/kpagecgroup. Note, this patch switches slab to charge-after-alloc design. Since this design is already used for all other memcg charges, it should not make any difference. [hannes@cmpxchg.org: better to have an outer function than a magic parameter for the memcg lookup] Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Signed-off-by: Johannes Weiner Cc: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index c95246627f87..0ba4c86d3b40 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -752,6 +752,8 @@ static inline bool memcg_kmem_is_active(struct mem_cgroup *memcg) * conditions, but because they are pretty simple, they are expected to be * fast. */ +int __memcg_kmem_charge_memcg(struct page *page, gfp_t gfp, int order, + struct mem_cgroup *memcg); int __memcg_kmem_charge(struct page *page, gfp_t gfp, int order); void __memcg_kmem_uncharge(struct page *page, int order); @@ -770,10 +772,6 @@ void __memcg_kmem_put_cache(struct kmem_cache *cachep); struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr); -int memcg_charge_kmem(struct mem_cgroup *memcg, gfp_t gfp, - unsigned long nr_pages); -void memcg_uncharge_kmem(struct mem_cgroup *memcg, unsigned long nr_pages); - static inline bool __memcg_kmem_bypass(gfp_t gfp) { if (!memcg_kmem_enabled()) -- cgit v1.2.3 From df4065516b0dbfa35ac0e9b8124d441221c0a285 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 5 Nov 2015 18:49:04 -0800 Subject: memcg: simplify and inline __mem_cgroup_from_kmem Before the previous patch ("memcg: unify slab and other kmem pages charging"), __mem_cgroup_from_kmem had to handle two types of kmem - slab pages and pages allocated with alloc_kmem_pages - memcg in the page struct. Now we can unify it. Since after it, this function becomes tiny we can fold it into mem_cgroup_from_kmem. [hughd@google.com: move mem_cgroup_from_kmem into list_lru.c] Signed-off-by: Vladimir Davydov Acked-by: Michal Hocko Cc: Johannes Weiner Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 0ba4c86d3b40..998311e0c70c 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -770,8 +770,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) struct kmem_cache *__memcg_kmem_get_cache(struct kmem_cache *cachep); void __memcg_kmem_put_cache(struct kmem_cache *cachep); -struct mem_cgroup *__mem_cgroup_from_kmem(void *ptr); - static inline bool __memcg_kmem_bypass(gfp_t gfp) { if (!memcg_kmem_enabled()) @@ -830,13 +828,6 @@ static __always_inline void memcg_kmem_put_cache(struct kmem_cache *cachep) if (memcg_kmem_enabled()) __memcg_kmem_put_cache(cachep); } - -static __always_inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) -{ - if (!memcg_kmem_enabled()) - return NULL; - return __mem_cgroup_from_kmem(ptr); -} #else #define for_each_memcg_cache_index(_idx) \ for (; NULL; ) @@ -882,11 +873,5 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) static inline void memcg_kmem_put_cache(struct kmem_cache *cachep) { } - -static inline struct mem_cgroup *mem_cgroup_from_kmem(void *ptr) -{ - return NULL; -} #endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ - -- cgit v1.2.3 From 706874e9096e9d468eed9c2b03c8374e806535f3 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Thu, 5 Nov 2015 18:49:27 -0800 Subject: mm: do not inc NR_PAGETABLE if ptlock_init failed If ALLOC_SPLIT_PTLOCKS is defined, ptlock_init may fail, in which case we shouldn't increment NR_PAGETABLE. Since small allocations, such as ptlock, normally do not fail (currently they can fail if kmemcg is used though), this patch does not really fix anything and should be considered as a code cleanup. Signed-off-by: Vladimir Davydov Acked-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index fa08f3cf0f22..3c258f8eb9ae 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1606,8 +1606,10 @@ static inline void pgtable_init(void) static inline bool pgtable_page_ctor(struct page *page) { + if (!ptlock_init(page)) + return false; inc_zone_page_state(page, NR_PAGETABLE); - return ptlock_init(page); + return true; } static inline void pgtable_page_dtor(struct page *page) -- cgit v1.2.3 From 45637bab30d6e7651737f51aa99417baef4d114a Mon Sep 17 00:00:00 2001 From: Hugh Dickins Date: Thu, 5 Nov 2015 18:49:40 -0800 Subject: mm: rename mem_cgroup_migrate to mem_cgroup_replace_page After v4.3's commit 0610c25daa3e ("memcg: fix dirty page migration") mem_cgroup_migrate() doesn't have much to offer in page migration: convert migrate_misplaced_transhuge_page() to set_page_memcg() instead. Then rename mem_cgroup_migrate() to mem_cgroup_replace_page(), since its remaining callers are replace_page_cache_page() and shmem_replace_page(): both of whom passed lrucare true, so just eliminate that argument. Signed-off-by: Hugh Dickins Cc: Christoph Lameter Cc: "Kirill A. Shutemov" Cc: Rik van Riel Cc: Vlastimil Babka Cc: Davidlohr Bueso Cc: Oleg Nesterov Cc: Sasha Levin Cc: Dmitry Vyukov Cc: KOSAKI Motohiro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 998311e0c70c..c06c70b4404e 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -297,8 +297,7 @@ void mem_cgroup_cancel_charge(struct page *page, struct mem_cgroup *memcg); void mem_cgroup_uncharge(struct page *page); void mem_cgroup_uncharge_list(struct list_head *page_list); -void mem_cgroup_migrate(struct page *oldpage, struct page *newpage, - bool lrucare); +void mem_cgroup_replace_page(struct page *oldpage, struct page *newpage); struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); struct lruvec *mem_cgroup_page_lruvec(struct page *, struct zone *); @@ -537,9 +536,7 @@ static inline void mem_cgroup_uncharge_list(struct list_head *page_list) { } -static inline void mem_cgroup_migrate(struct page *oldpage, - struct page *newpage, - bool lrucare) +static inline void mem_cgroup_replace_page(struct page *old, struct page *new) { } -- cgit v1.2.3 From 6071ca5201066f4b2a61cfb693dd186d6bc6e9f3 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 5 Nov 2015 18:50:26 -0800 Subject: mm: page_counter: let page_counter_try_charge() return bool page_counter_try_charge() currently returns 0 on success and -ENOMEM on failure, which is surprising behavior given the function name. Make it follow the expected pattern of try_stuff() functions that return a boolean true to indicate success, or false for failure. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Cc: Vladimir Davydov Signed-off-by: Linus Torvalds --- include/linux/page_counter.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page_counter.h b/include/linux/page_counter.h index 17fa4f8de3a6..7e62920a3a94 100644 --- a/include/linux/page_counter.h +++ b/include/linux/page_counter.h @@ -36,9 +36,9 @@ static inline unsigned long page_counter_read(struct page_counter *counter) void page_counter_cancel(struct page_counter *counter, unsigned long nr_pages); void page_counter_charge(struct page_counter *counter, unsigned long nr_pages); -int page_counter_try_charge(struct page_counter *counter, - unsigned long nr_pages, - struct page_counter **fail); +bool page_counter_try_charge(struct page_counter *counter, + unsigned long nr_pages, + struct page_counter **fail); void page_counter_uncharge(struct page_counter *counter, unsigned long nr_pages); int page_counter_limit(struct page_counter *counter, unsigned long limit); int page_counter_memparse(const char *buf, const char *max, -- cgit v1.2.3 From 5ba97bf9d8754bd984e81c1061fcda682681939e Mon Sep 17 00:00:00 2001 From: Tetsuo Handa Date: Thu, 5 Nov 2015 18:50:40 -0800 Subject: mm: remove refresh_cpu_vm_stats() definition for !SMP kernel refresh_cpu_vm_stats(int cpu) is no longer referenced by !SMP kernel since Linux 3.12. Signed-off-by: Tetsuo Handa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index 49dfe40b3673..5dbc8b0ee567 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -247,7 +247,6 @@ static inline void __dec_zone_page_state(struct page *page, #define set_pgdat_percpu_threshold(pgdat, callback) { } -static inline void refresh_cpu_vm_stats(int cpu) { } static inline void refresh_zone_stat_thresholds(void) { } static inline void cpu_vm_stats_fold(int cpu) { } -- cgit v1.2.3 From a8ca5d0ecbdde5cc3d7accacbd69968b0c98764e Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Thu, 5 Nov 2015 18:51:33 -0800 Subject: mm: mlock: add new mlock system call With the refactored mlock code, introduce a new system call for mlock. The new call will allow the user to specify what lock states are being added. mlock2 is trivial at the moment, but a follow on patch will add a new mlock state making it useful. Signed-off-by: Eric B Munson Acked-by: Michal Hocko Acked-by: Vlastimil Babka Cc: Heiko Carstens Cc: Geert Uytterhoeven Cc: Catalin Marinas Cc: Stephen Rothwell Cc: Guenter Roeck Cc: Jonathan Corbet Cc: Kirill A. Shutemov Cc: Michael Kerrisk Cc: Ralf Baechle Cc: Shuah Khan Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index a460e2ef2843..a156b82dd14c 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -887,4 +887,6 @@ asmlinkage long sys_execveat(int dfd, const char __user *filename, asmlinkage long sys_membarrier(int cmd, int flags); +asmlinkage long sys_mlock2(unsigned long start, size_t len, int flags); + #endif -- cgit v1.2.3 From de60f5f10c58d4f34b68622442c0e04180367f3f Mon Sep 17 00:00:00 2001 From: Eric B Munson Date: Thu, 5 Nov 2015 18:51:36 -0800 Subject: mm: introduce VM_LOCKONFAULT The cost of faulting in all memory to be locked can be very high when working with large mappings. If only portions of the mapping will be used this can incur a high penalty for locking. For the example of a large file, this is the usage pattern for a large statical language model (probably applies to other statical or graphical models as well). For the security example, any application transacting in data that cannot be swapped out (credit card data, medical records, etc). This patch introduces the ability to request that pages are not pre-faulted, but are placed on the unevictable LRU when they are finally faulted in. The VM_LOCKONFAULT flag will be used together with VM_LOCKED and has no effect when set without VM_LOCKED. Setting the VM_LOCKONFAULT flag for a VMA will cause pages faulted into that VMA to be added to the unevictable LRU when they are faulted or if they are already present, but will not cause any missing pages to be faulted in. Exposing this new lock state means that we cannot overload the meaning of the FOLL_POPULATE flag any longer. Prior to this patch it was used to mean that the VMA for a fault was locked. This means we need the new FOLL_MLOCK flag to communicate the locked state of a VMA. FOLL_POPULATE will now only control if the VMA should be populated and in the case of VM_LOCKONFAULT, it will not be set. Signed-off-by: Eric B Munson Acked-by: Kirill A. Shutemov Acked-by: Vlastimil Babka Cc: Michal Hocko Cc: Jonathan Corbet Cc: Catalin Marinas Cc: Geert Uytterhoeven Cc: Guenter Roeck Cc: Heiko Carstens Cc: Michael Kerrisk Cc: Ralf Baechle Cc: Shuah Khan Cc: Stephen Rothwell Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 3c258f8eb9ae..906c46a05707 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -139,6 +139,7 @@ extern unsigned int kobjsize(const void *objp); #define VM_DONTCOPY 0x00020000 /* Do not copy this vma on fork */ #define VM_DONTEXPAND 0x00040000 /* Cannot expand with mremap() */ +#define VM_LOCKONFAULT 0x00080000 /* Lock the pages covered when they are faulted in */ #define VM_ACCOUNT 0x00100000 /* Is a VM accounted object */ #define VM_NORESERVE 0x00200000 /* should the VM suppress accounting */ #define VM_HUGETLB 0x00400000 /* Huge TLB Page VM */ @@ -202,6 +203,9 @@ extern unsigned int kobjsize(const void *objp); /* This mask defines which mm->def_flags a process can inherit its parent */ #define VM_INIT_DEF_MASK VM_NOHUGEPAGE +/* This mask is used to clear all the VMA flags used by mlock */ +#define VM_LOCKED_CLEAR_MASK (~(VM_LOCKED | VM_LOCKONFAULT)) + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. @@ -2137,6 +2141,7 @@ static inline struct page *follow_page(struct vm_area_struct *vma, #define FOLL_NUMA 0x200 /* force NUMA hinting page fault */ #define FOLL_MIGRATION 0x400 /* wait for page to replace migration entry */ #define FOLL_TRIED 0x800 /* a retry, previous pass started an IO */ +#define FOLL_MLOCK 0x1000 /* lock present pages */ typedef int (*pte_fn_t)(pte_t *pte, pgtable_t token, unsigned long addr, void *data); -- cgit v1.2.3 From 61b590b9ee4221173ad6990a1150c5c9db73564e Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Fri, 23 Oct 2015 12:43:18 +0200 Subject: netfilter: ingress: don't use nf_hook_list_active nf_hook_list_active() always returns true once at least one device has NF_INGRESS hook enabled. Thus, don't use this function. Instead, inverse the test and use the static key to elide list_empty test if no NF_INGRESS hooks are active. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index 187feabe557c..ba7ce8805fe3 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -5,10 +5,13 @@ #include #ifdef CONFIG_NETFILTER_INGRESS -static inline int nf_hook_ingress_active(struct sk_buff *skb) +static inline bool nf_hook_ingress_active(const struct sk_buff *skb) { - return nf_hook_list_active(&skb->dev->nf_hooks_ingress, - NFPROTO_NETDEV, NF_NETDEV_INGRESS); +#ifdef HAVE_JUMP_LABEL + if (!static_key_false(&nf_hooks_needed[NFPROTO_NETDEV][NF_NETDEV_INGRESS])) + return false; +#endif + return !list_empty(&skb->dev->nf_hooks_ingress); } static inline int nf_hook_ingress(struct sk_buff *skb) -- cgit v1.2.3 From b4865988eab598e56e6e628b9b32441acd142b28 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Fri, 6 Nov 2015 18:35:57 +0100 Subject: netfilter: ingress: fix wrong input interface on hook The input and output interfaces in nf_hook_state_init() are flipped. This fixes iif matching on nftables. Reported-by: Patrick McHardy Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter_ingress.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter_ingress.h b/include/linux/netfilter_ingress.h index ba7ce8805fe3..5fcd375ef175 100644 --- a/include/linux/netfilter_ingress.h +++ b/include/linux/netfilter_ingress.h @@ -19,8 +19,8 @@ static inline int nf_hook_ingress(struct sk_buff *skb) struct nf_hook_state state; nf_hook_state_init(&state, &skb->dev->nf_hooks_ingress, - NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, NULL, - skb->dev, NULL, dev_net(skb->dev), NULL); + NF_NETDEV_INGRESS, INT_MIN, NFPROTO_NETDEV, + skb->dev, NULL, NULL, dev_net(skb->dev), NULL); return nf_hook_slow(skb, &state); } -- cgit v1.2.3 From 1b9863c6aa56d92126ec0d5c42eae25df52b7ca1 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:47 -0700 Subject: device property: Introducing enum dev_dma_attr A device could have one of the following DMA attributes: * DMA not supported * DMA non-coherent * DMA coherent So, this patch introduces enum dev_dma_attribute. This will be used by new APIs introduced in later patches. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 463de52fe891..8eecf200bae5 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -27,6 +27,12 @@ enum dev_prop_type { DEV_PROP_MAX, }; +enum dev_dma_attr { + DEV_DMA_NOT_SUPPORTED, + DEV_DMA_NON_COHERENT, + DEV_DMA_COHERENT, +}; + bool device_property_present(struct device *dev, const char *propname); int device_property_read_u8_array(struct device *dev, const char *propname, u8 *val, size_t nval); -- cgit v1.2.3 From b84f196d963c3159329f72ca1913b08679004a43 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:48 -0700 Subject: ACPI: Adding DMA Attribute APIs for ACPI Device Adding acpi_get_dma_attr() to query DMA attributes of ACPI devices. It returns the enum dev_dma_attr, which communicates DMA information more clearly. This API replaces the acpi_check_dma(), which will be removed in subsequent patch. This patch also provides a convenient function, acpi_dma_supported(), to check DMA support of the specified ACPI device. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 496265b0f527..292af3b69ede 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -579,6 +579,16 @@ static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) return false; } +static inline bool acpi_dma_supported(struct acpi_device *adev) +{ + return false; +} + +static inline enum dev_dma_attr acpi_get_dma_attr(struct acpi_device *adev) +{ + return DEV_DMA_NOT_SUPPORTED; +} + #define ACPI_PTR(_ptr) (NULL) #endif /* !CONFIG_ACPI */ -- cgit v1.2.3 From e5e558644bbb23cad03c586703331b8bcd9e0e6c Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:49 -0700 Subject: device property: Adding DMA Attribute APIs for Generic Devices The function device_dma_is_coherent() does not sufficiently communicate device DMA attributes. Instead, this patch introduces device_get_dma_attr(), which returns enum dev_dma_attr. It replaces the acpi_check_dma(), which will be removed in subsequent patch. This also provides a convenient function, device_dma_supported(), to check DMA support of the specified device. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/property.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/property.h b/include/linux/property.h index 8eecf200bae5..7200490b7e6f 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -176,6 +176,10 @@ void device_add_property_set(struct device *dev, struct property_set *pset); bool device_dma_is_coherent(struct device *dev); +bool device_dma_supported(struct device *dev); + +enum dev_dma_attr device_get_dma_attr(struct device *dev); + int device_get_phy_mode(struct device *dev); void *device_get_mac_address(struct device *dev, char *addr, int alen); -- cgit v1.2.3 From ab3d527329f01dd63dc852041006d1a24895d116 Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:51 -0700 Subject: device property: ACPI: Remove unused DMA APIs These DMA APIs are replaced with the newer versions, which return the enum dev_dma_attr. So, we can safely remove them. Signed-off-by: Suravee Suthikulpanit Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/acpi.h | 5 ----- include/linux/property.h | 2 -- 2 files changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi.h b/include/linux/acpi.h index 292af3b69ede..b5868300df75 100644 --- a/include/linux/acpi.h +++ b/include/linux/acpi.h @@ -574,11 +574,6 @@ static inline int acpi_device_modalias(struct device *dev, return -ENODEV; } -static inline bool acpi_check_dma(struct acpi_device *adev, bool *coherent) -{ - return false; -} - static inline bool acpi_dma_supported(struct acpi_device *adev) { return false; diff --git a/include/linux/property.h b/include/linux/property.h index 7200490b7e6f..0a3705a7c9f2 100644 --- a/include/linux/property.h +++ b/include/linux/property.h @@ -174,8 +174,6 @@ struct property_set { void device_add_property_set(struct device *dev, struct property_set *pset); -bool device_dma_is_coherent(struct device *dev); - bool device_dma_supported(struct device *dev); enum dev_dma_attr device_get_dma_attr(struct device *dev); -- cgit v1.2.3 From 50230713b63941f4b6b562eea0834f751aa0801e Mon Sep 17 00:00:00 2001 From: "Suthikulpanit, Suravee" Date: Wed, 28 Oct 2015 15:50:53 -0700 Subject: PCI: OF: Move of_pci_dma_configure() to pci_dma_configure() This patch move of_pci_dma_configure() to a more generic pci_dma_configure(), which can be extended by non-OF code (e.g. ACPI). This has no functional change. Signed-off-by: Suravee Suthikulpanit Acked-by: Rob Herring Acked-by: Bjorn Helgaas Reviewed-by: Hanjun Guo Signed-off-by: Rafael J. Wysocki --- include/linux/of_pci.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of_pci.h b/include/linux/of_pci.h index 29fd3fe1c035..ce0e5abeb454 100644 --- a/include/linux/of_pci.h +++ b/include/linux/of_pci.h @@ -16,7 +16,6 @@ int of_pci_get_devfn(struct device_node *np); int of_irq_parse_and_map_pci(const struct pci_dev *dev, u8 slot, u8 pin); int of_pci_parse_bus_range(struct device_node *node, struct resource *res); int of_get_pci_domain_nr(struct device_node *node); -void of_pci_dma_configure(struct pci_dev *pci_dev); #else static inline int of_irq_parse_pci(const struct pci_dev *pdev, struct of_phandle_args *out_irq) { @@ -51,8 +50,6 @@ of_get_pci_domain_nr(struct device_node *node) { return -1; } - -static inline void of_pci_dma_configure(struct pci_dev *pci_dev) { } #endif #if defined(CONFIG_OF_ADDRESS) -- cgit v1.2.3 From e2b19197ff9dc46f3e3888f273c4395f9e5a9856 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:09 -0800 Subject: mm, page_alloc: remove unnecessary parameter from zone_watermark_ok_safe Overall, the intent of this series is to remove the zonelist cache which was introduced to avoid high overhead in the page allocator. Once this is done, it is necessary to reduce the cost of watermark checks. The series starts with minor micro-optimisations. Next it notes that GFP flags that affect watermark checks are abused. __GFP_WAIT historically identified callers that could not sleep and could access reserves. This was later abused to identify callers that simply prefer to avoid sleeping and have other options. A patch distinguishes between atomic callers, high-priority callers and those that simply wish to avoid sleep. The zonelist cache has been around for a long time but it is of dubious merit with a lot of complexity and some issues that are explained. The most important issue is that a failed THP allocation can cause a zone to be treated as "full". This potentially causes unnecessary stalls, reclaim activity or remote fallbacks. The issues could be fixed but it's not worth it. The series places a small number of other micro-optimisations on top before examining GFP flags watermarks. High-order watermarks enforcement can cause high-order allocations to fail even though pages are free. The watermark checks both protect high-order atomic allocations and make kswapd aware of high-order pages but there is a much better way that can be handled using migrate types. This series uses page grouping by mobility to reserve pageblocks for high-order allocations with the size of the reservation depending on demand. kswapd awareness is maintained by examining the free lists. By patch 12 in this series, there are no high-order watermark checks while preserving the properties that motivated the introduction of the watermark checks. This patch (of 10): No user of zone_watermark_ok_safe() specifies alloc_flags. This patch removes the unnecessary parameter. Signed-off-by: Mel Gorman Acked-by: David Rientjes Acked-by: Vlastimil Babka Acked-by: Michal Hocko Reviewed-by: Christoph Lameter Acked-by: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 2d7e660cdefe..e326843c995a 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -817,7 +817,7 @@ void wakeup_kswapd(struct zone *zone, int order, enum zone_type classzone_idx); bool zone_watermark_ok(struct zone *z, unsigned int order, unsigned long mark, int classzone_idx, int alloc_flags); bool zone_watermark_ok_safe(struct zone *z, unsigned int order, - unsigned long mark, int classzone_idx, int alloc_flags); + unsigned long mark, int classzone_idx); enum memmap_context { MEMMAP_EARLY, MEMMAP_HOTPLUG, -- cgit v1.2.3 From 46e700abc44ce215acb4341d9702ce3972eda571 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:15 -0800 Subject: mm, page_alloc: remove unnecessary taking of a seqlock when cpusets are disabled There is a seqcounter that protects against spurious allocation failures when a task is changing the allowed nodes in a cpuset. There is no need to check the seqcounter until a cpuset exists. Signed-off-by: Mel Gorman Acked-by: Christoph Lameter Acked-by: David Rientjes Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/cpuset.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index 5a1311942358..85a868ccb493 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -104,6 +104,9 @@ extern void cpuset_print_current_mems_allowed(void); */ static inline unsigned int read_mems_allowed_begin(void) { + if (!cpusets_enabled()) + return 0; + return read_seqcount_begin(¤t->mems_allowed_seq); } @@ -115,6 +118,9 @@ static inline unsigned int read_mems_allowed_begin(void) */ static inline bool read_mems_allowed_retry(unsigned int seq) { + if (!cpusets_enabled()) + return false; + return read_seqcount_retry(¤t->mems_allowed_seq, seq); } -- cgit v1.2.3 From 016c13daa5c9e4827eca703e2f0621c131f2cca3 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:18 -0800 Subject: mm, page_alloc: use masks and shifts when converting GFP flags to migrate types This patch redefines which GFP bits are used for specifying mobility and the order of the migrate types. Once redefined it's possible to convert GFP flags to a migrate type with a simple mask and shift. The only downside is that readers of OOM kill messages and allocation failures may have been used to the existing values but scripts/gfp-translate will help. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Christoph Lameter Cc: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 12 +++++++----- include/linux/mmzone.h | 2 +- 2 files changed, 8 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index f92cbd2f4450..440fca3e7e5d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -14,7 +14,7 @@ struct vm_area_struct; #define ___GFP_HIGHMEM 0x02u #define ___GFP_DMA32 0x04u #define ___GFP_MOVABLE 0x08u -#define ___GFP_WAIT 0x10u +#define ___GFP_RECLAIMABLE 0x10u #define ___GFP_HIGH 0x20u #define ___GFP_IO 0x40u #define ___GFP_FS 0x80u @@ -29,7 +29,7 @@ struct vm_area_struct; #define ___GFP_NOMEMALLOC 0x10000u #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u -#define ___GFP_RECLAIMABLE 0x80000u +#define ___GFP_WAIT 0x80000u #define ___GFP_NOACCOUNT 0x100000u #define ___GFP_NOTRACK 0x200000u #define ___GFP_NO_KSWAPD 0x400000u @@ -126,6 +126,7 @@ struct vm_area_struct; /* This mask makes up all the page movable related flags */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) +#define GFP_MOVABLE_SHIFT 3 /* Control page allocator reclaim behavior */ #define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\ @@ -152,14 +153,15 @@ struct vm_area_struct; /* Convert GFP flags to their corresponding migrate type */ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) { - WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); + VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); + BUILD_BUG_ON((1UL << GFP_MOVABLE_SHIFT) != ___GFP_MOVABLE); + BUILD_BUG_ON((___GFP_MOVABLE >> GFP_MOVABLE_SHIFT) != MIGRATE_MOVABLE); if (unlikely(page_group_by_mobility_disabled)) return MIGRATE_UNMOVABLE; /* Group based on mobility */ - return (((gfp_flags & __GFP_MOVABLE) != 0) << 1) | - ((gfp_flags & __GFP_RECLAIMABLE) != 0); + return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; } #ifdef CONFIG_HIGHMEM diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index e326843c995a..38bed71758ab 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -37,8 +37,8 @@ enum { MIGRATE_UNMOVABLE, - MIGRATE_RECLAIMABLE, MIGRATE_MOVABLE, + MIGRATE_RECLAIMABLE, MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ MIGRATE_RESERVE = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA -- cgit v1.2.3 From d0164adc89f6bb374d304ffcc375c6d2652fe67d Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:21 -0800 Subject: mm, page_alloc: distinguish between being unable to sleep, unwilling to sleep and avoiding waking kswapd __GFP_WAIT has been used to identify atomic context in callers that hold spinlocks or are in interrupts. They are expected to be high priority and have access one of two watermarks lower than "min" which can be referred to as the "atomic reserve". __GFP_HIGH users get access to the first lower watermark and can be called the "high priority reserve". Over time, callers had a requirement to not block when fallback options were available. Some have abused __GFP_WAIT leading to a situation where an optimisitic allocation with a fallback option can access atomic reserves. This patch uses __GFP_ATOMIC to identify callers that are truely atomic, cannot sleep and have no alternative. High priority users continue to use __GFP_HIGH. __GFP_DIRECT_RECLAIM identifies callers that can sleep and are willing to enter direct reclaim. __GFP_KSWAPD_RECLAIM to identify callers that want to wake kswapd for background reclaim. __GFP_WAIT is redefined as a caller that is willing to enter direct reclaim and wake kswapd for background reclaim. This patch then converts a number of sites o __GFP_ATOMIC is used by callers that are high priority and have memory pools for those requests. GFP_ATOMIC uses this flag. o Callers that have a limited mempool to guarantee forward progress clear __GFP_DIRECT_RECLAIM but keep __GFP_KSWAPD_RECLAIM. bio allocations fall into this category where kswapd will still be woken but atomic reserves are not used as there is a one-entry mempool to guarantee progress. o Callers that are checking if they are non-blocking should use the helper gfpflags_allow_blocking() where possible. This is because checking for __GFP_WAIT as was done historically now can trigger false positives. Some exceptions like dm-crypt.c exist where the code intent is clearer if __GFP_DIRECT_RECLAIM is used instead of the helper due to flag manipulations. o Callers that built their own GFP flags instead of starting with GFP_KERNEL and friends now also need to specify __GFP_KSWAPD_RECLAIM. The first key hazard to watch out for is callers that removed __GFP_WAIT and was depending on access to atomic reserves for inconspicuous reasons. In some cases it may be appropriate for them to use __GFP_HIGH. The second key hazard is callers that assembled their own combination of GFP flags instead of starting with something like GFP_KERNEL. They may now wish to specify __GFP_KSWAPD_RECLAIM. It's almost certainly harmless if it's missed in most cases as other activity will wake kswapd. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Christoph Lameter Cc: David Rientjes Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 46 +++++++++++++++++++++++++++++++++------------- include/linux/skbuff.h | 6 +++--- 2 files changed, 36 insertions(+), 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 440fca3e7e5d..b56e811b6f7c 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -29,12 +29,13 @@ struct vm_area_struct; #define ___GFP_NOMEMALLOC 0x10000u #define ___GFP_HARDWALL 0x20000u #define ___GFP_THISNODE 0x40000u -#define ___GFP_WAIT 0x80000u +#define ___GFP_ATOMIC 0x80000u #define ___GFP_NOACCOUNT 0x100000u #define ___GFP_NOTRACK 0x200000u -#define ___GFP_NO_KSWAPD 0x400000u +#define ___GFP_DIRECT_RECLAIM 0x400000u #define ___GFP_OTHER_NODE 0x800000u #define ___GFP_WRITE 0x1000000u +#define ___GFP_KSWAPD_RECLAIM 0x2000000u /* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* @@ -71,7 +72,7 @@ struct vm_area_struct; * __GFP_MOVABLE: Flag that this page will be movable by the page migration * mechanism or reclaimed */ -#define __GFP_WAIT ((__force gfp_t)___GFP_WAIT) /* Can wait and reschedule? */ +#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) /* Caller cannot wait or reschedule */ #define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */ #define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */ #define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */ @@ -94,23 +95,37 @@ struct vm_area_struct; #define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */ #define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ -#define __GFP_NO_KSWAPD ((__force gfp_t)___GFP_NO_KSWAPD) #define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ #define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ +/* + * A caller that is willing to wait may enter direct reclaim and will + * wake kswapd to reclaim pages in the background until the high + * watermark is met. A caller may wish to clear __GFP_DIRECT_RECLAIM to + * avoid unnecessary delays when a fallback option is available but + * still allow kswapd to reclaim in the background. The kswapd flag + * can be cleared when the reclaiming of pages would cause unnecessary + * disruption. + */ +#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ +#define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ + /* * This may seem redundant, but it's a way of annotating false positives vs. * allocations that simply cannot be supported (e.g. page tables). */ #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) -#define __GFP_BITS_SHIFT 25 /* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 26 /* Room for N __GFP_FOO bits */ #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) -/* This equals 0, but use constants in case they ever change */ -#define GFP_NOWAIT (GFP_ATOMIC & ~__GFP_HIGH) -/* GFP_ATOMIC means both !wait (__GFP_WAIT not set) and use emergency pool */ -#define GFP_ATOMIC (__GFP_HIGH) +/* + * GFP_ATOMIC callers can not sleep, need the allocation to succeed. + * A lower watermark is applied to allow access to "atomic reserves" + */ +#define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) +#define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) #define GFP_NOIO (__GFP_WAIT) #define GFP_NOFS (__GFP_WAIT | __GFP_IO) #define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) @@ -119,10 +134,10 @@ struct vm_area_struct; #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) -#define GFP_IOFS (__GFP_IO | __GFP_FS) -#define GFP_TRANSHUGE (GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ - __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN | \ - __GFP_NO_KSWAPD) +#define GFP_IOFS (__GFP_IO | __GFP_FS | __GFP_KSWAPD_RECLAIM) +#define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ + __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ + ~__GFP_KSWAPD_RECLAIM) /* This mask makes up all the page movable related flags */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) @@ -164,6 +179,11 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; } +static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) +{ + return gfp_flags & __GFP_DIRECT_RECLAIM; +} + #ifdef CONFIG_HIGHMEM #define OPT_ZONE_HIGHMEM ZONE_HIGHMEM #else diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 24f4dfd94c51..4355129fff91 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1224,7 +1224,7 @@ static inline int skb_cloned(const struct sk_buff *skb) static inline int skb_unclone(struct sk_buff *skb, gfp_t pri) { - might_sleep_if(pri & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) return pskb_expand_head(skb, 0, 0, pri); @@ -1308,7 +1308,7 @@ static inline int skb_shared(const struct sk_buff *skb) */ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) { - might_sleep_if(pri & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_shared(skb)) { struct sk_buff *nskb = skb_clone(skb, pri); @@ -1344,7 +1344,7 @@ static inline struct sk_buff *skb_share_check(struct sk_buff *skb, gfp_t pri) static inline struct sk_buff *skb_unshare(struct sk_buff *skb, gfp_t pri) { - might_sleep_if(pri & __GFP_WAIT); + might_sleep_if(gfpflags_allow_blocking(pri)); if (skb_cloned(skb)) { struct sk_buff *nskb = skb_copy(skb, pri); -- cgit v1.2.3 From 40113370836e8e79befa585277296ed42781ef31 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:25 -0800 Subject: mm: page_alloc: remove GFP_IOFS GFP_IOFS was intended to be shorthand for clearing two flags, not a set of allocation flags. There is only one user of this flag combination now and there appears to be no reason why Lustre had to be protected from reclaim stalls. As none of the sites appear to be atomic, this patch simply deletes GFP_IOFS and converts Lustre to using GFP_KERNEL, GFP_NOFS or GFP_NOIO as appropriate. Signed-off-by: Mel Gorman Cc: Oleg Drokin Cc: Andreas Dilger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index b56e811b6f7c..86f9f7da86ea 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -134,7 +134,6 @@ struct vm_area_struct; #define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) -#define GFP_IOFS (__GFP_IO | __GFP_FS | __GFP_KSWAPD_RECLAIM) #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ ~__GFP_KSWAPD_RECLAIM) -- cgit v1.2.3 From 71baba4b92dc1fa1bc461742c6ab1942ec6034e9 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:28 -0800 Subject: mm, page_alloc: rename __GFP_WAIT to __GFP_RECLAIM __GFP_WAIT was used to signal that the caller was in atomic context and could not sleep. Now it is possible to distinguish between true atomic context and callers that are not willing to sleep. The latter should clear __GFP_DIRECT_RECLAIM so kswapd will still wake. As clearing __GFP_WAIT behaves differently, there is a risk that people will clear the wrong flags. This patch renames __GFP_WAIT to __GFP_RECLAIM to clearly indicate what it does -- setting it allows all reclaim activity, clearing them prevents it. [akpm@linux-foundation.org: fix build] [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Mel Gorman Acked-by: Michal Hocko Acked-by: Vlastimil Babka Acked-by: Johannes Weiner Cc: Christoph Lameter Acked-by: David Rientjes Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 86f9f7da86ea..369227202ac2 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -107,7 +107,7 @@ struct vm_area_struct; * can be cleared when the reclaiming of pages would cause unnecessary * disruption. */ -#define __GFP_WAIT ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ @@ -126,12 +126,12 @@ struct vm_area_struct; */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) -#define GFP_NOIO (__GFP_WAIT) -#define GFP_NOFS (__GFP_WAIT | __GFP_IO) -#define GFP_KERNEL (__GFP_WAIT | __GFP_IO | __GFP_FS) -#define GFP_TEMPORARY (__GFP_WAIT | __GFP_IO | __GFP_FS | \ +#define GFP_NOIO (__GFP_RECLAIM) +#define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) +#define GFP_TEMPORARY (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \ __GFP_RECLAIMABLE) -#define GFP_USER (__GFP_WAIT | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ @@ -143,12 +143,12 @@ struct vm_area_struct; #define GFP_MOVABLE_SHIFT 3 /* Control page allocator reclaim behavior */ -#define GFP_RECLAIM_MASK (__GFP_WAIT|__GFP_HIGH|__GFP_IO|__GFP_FS|\ +#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC) /* Control slab gfp mask during early boot */ -#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_WAIT|__GFP_IO|__GFP_FS)) +#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) /* Control allocation constraints */ #define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) -- cgit v1.2.3 From f77cf4e4cc9d40310a7224a1a67c733aeec78836 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:31 -0800 Subject: mm, page_alloc: delete the zonelist_cache The zonelist cache (zlc) was introduced to skip over zones that were recently known to be full. This avoided expensive operations such as the cpuset checks, watermark calculations and zone_reclaim. The situation today is different and the complexity of zlc is harder to justify. 1) The cpuset checks are no-ops unless a cpuset is active and in general are a lot cheaper. 2) zone_reclaim is now disabled by default and I suspect that was a large source of the cost that zlc wanted to avoid. When it is enabled, it's known to be a major source of stalling when nodes fill up and it's unwise to hit every other user with the overhead. 3) Watermark checks are expensive to calculate for high-order allocation requests. Later patches in this series will reduce the cost of the watermark checking. 4) The most important issue is that in the current implementation it is possible for a failed THP allocation to mark a zone full for order-0 allocations and cause a fallback to remote nodes. The last issue could be addressed with additional complexity but as the benefit of zlc is questionable, it is better to remove it. If stalls due to zone_reclaim are ever reported then an alternative would be to introduce deferring logic based on a timeout inside zone_reclaim itself and leave the page allocator fast paths alone. The impact on page-allocator microbenchmarks is negligible as they don't hit the paths where the zlc comes into play. Most page-reclaim related workloads showed no noticeable difference as a result of the removal. The impact was noticeable in a workload called "stutter". One part uses a lot of anonymous memory, a second measures mmap latency and a third copies a large file. In an ideal world the latency application would not notice the mmap latency. On a 2-node machine the results of this patch are stutter 4.3.0-rc1 4.3.0-rc1 baseline nozlc-v4 Min mmap 20.9243 ( 0.00%) 20.7716 ( 0.73%) 1st-qrtle mmap 22.0612 ( 0.00%) 22.0680 ( -0.03%) 2nd-qrtle mmap 22.3291 ( 0.00%) 22.3809 ( -0.23%) 3rd-qrtle mmap 25.2244 ( 0.00%) 25.2396 ( -0.06%) Max-90% mmap 48.0995 ( 0.00%) 28.3713 ( 41.02%) Max-93% mmap 52.5557 ( 0.00%) 36.0170 ( 31.47%) Max-95% mmap 55.8173 ( 0.00%) 47.3163 ( 15.23%) Max-99% mmap 67.3781 ( 0.00%) 70.1140 ( -4.06%) Max mmap 24447.6375 ( 0.00%) 12915.1356 ( 47.17%) Mean mmap 33.7883 ( 0.00%) 27.7944 ( 17.74%) Best99%Mean mmap 27.7825 ( 0.00%) 25.2767 ( 9.02%) Best95%Mean mmap 26.3912 ( 0.00%) 23.7994 ( 9.82%) Best90%Mean mmap 24.9886 ( 0.00%) 23.2251 ( 7.06%) Best50%Mean mmap 22.0157 ( 0.00%) 22.0261 ( -0.05%) Best10%Mean mmap 21.6705 ( 0.00%) 21.6083 ( 0.29%) Best5%Mean mmap 21.5581 ( 0.00%) 21.4611 ( 0.45%) Best1%Mean mmap 21.3079 ( 0.00%) 21.1631 ( 0.68%) Note that the maximum stall latency went from 24 seconds to 12 which is still bad but an improvement. The milage varies considerably 2-node machine on an earlier test went from 494 seconds to 47 seconds and a 4-node machine that tested an earlier version of this patch went from a worst case stall time of 6 seconds to 67ms. The nature of the benchmark is inherently unpredictable as it is hammering the system and the milage will vary between machines. There is a secondary impact with potentially more direct reclaim because zones are now being considered instead of being skipped by zlc. In this particular test run it did not occur so will not be described. However, in at least one test the following was observed 1. Direct reclaim rates were higher. This was likely due to direct reclaim being entered instead of the zlc disabling a zone and busy looping. Busy looping may have the effect of allowing kswapd to make more progress and in some cases may be better overall. If this is found then the correct action is to put direct reclaimers to sleep on a waitqueue and allow kswapd make forward progress. Busy looping on the zlc is even worse than when the allocator used to blindly call congestion_wait(). 2. There was higher swap activity as direct reclaim was active. 3. Direct reclaim efficiency was lower. This is related to 1 as more scanning activity also encountered more pages that could not be immediately reclaimed In that case, the direct page scan and reclaim rates are noticeable but it is not considered a problem for a few reasons 1. The test is primarily concerned with latency. The mmap attempts are also faulted which means there are THP allocation requests. The ZLC could cause zones to be disabled causing the process to busy loop instead of reclaiming. This looks like elevated direct reclaim activity but it's the correct action to take based on what processes requested. 2. The test hammers reclaim and compaction heavily. The number of successful THP faults is highly variable but affects the reclaim stats. It's not a realistic or reasonable measure of page reclaim activity. 3. No other page-reclaim intensive workload that was tested showed a problem. 4. If a workload is identified that benefitted from the busy looping then it should be fixed by having direct reclaimers sleep on a wait queue until woken by kswapd instead of busy looping. We had this class of problem before when congestion_waits() with a fixed timeout was a brain damaged decision but happened to benefit some workloads. If a workload is identified that relied on the zlc to busy loop then it should be fixed correctly and have a direct reclaimer sleep on a waitqueue until woken by kswapd. Signed-off-by: Mel Gorman Acked-by: David Rientjes Acked-by: Christoph Lameter Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 74 -------------------------------------------------- 1 file changed, 74 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 38bed71758ab..1e88aae329ff 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -589,75 +589,8 @@ static inline bool zone_is_empty(struct zone *zone) * [1] : No fallback (__GFP_THISNODE) */ #define MAX_ZONELISTS 2 - - -/* - * We cache key information from each zonelist for smaller cache - * footprint when scanning for free pages in get_page_from_freelist(). - * - * 1) The BITMAP fullzones tracks which zones in a zonelist have come - * up short of free memory since the last time (last_fullzone_zap) - * we zero'd fullzones. - * 2) The array z_to_n[] maps each zone in the zonelist to its node - * id, so that we can efficiently evaluate whether that node is - * set in the current tasks mems_allowed. - * - * Both fullzones and z_to_n[] are one-to-one with the zonelist, - * indexed by a zones offset in the zonelist zones[] array. - * - * The get_page_from_freelist() routine does two scans. During the - * first scan, we skip zones whose corresponding bit in 'fullzones' - * is set or whose corresponding node in current->mems_allowed (which - * comes from cpusets) is not set. During the second scan, we bypass - * this zonelist_cache, to ensure we look methodically at each zone. - * - * Once per second, we zero out (zap) fullzones, forcing us to - * reconsider nodes that might have regained more free memory. - * The field last_full_zap is the time we last zapped fullzones. - * - * This mechanism reduces the amount of time we waste repeatedly - * reexaming zones for free memory when they just came up low on - * memory momentarilly ago. - * - * The zonelist_cache struct members logically belong in struct - * zonelist. However, the mempolicy zonelists constructed for - * MPOL_BIND are intentionally variable length (and usually much - * shorter). A general purpose mechanism for handling structs with - * multiple variable length members is more mechanism than we want - * here. We resort to some special case hackery instead. - * - * The MPOL_BIND zonelists don't need this zonelist_cache (in good - * part because they are shorter), so we put the fixed length stuff - * at the front of the zonelist struct, ending in a variable length - * zones[], as is needed by MPOL_BIND. - * - * Then we put the optional zonelist cache on the end of the zonelist - * struct. This optional stuff is found by a 'zlcache_ptr' pointer in - * the fixed length portion at the front of the struct. This pointer - * both enables us to find the zonelist cache, and in the case of - * MPOL_BIND zonelists, (which will just set the zlcache_ptr to NULL) - * to know that the zonelist cache is not there. - * - * The end result is that struct zonelists come in two flavors: - * 1) The full, fixed length version, shown below, and - * 2) The custom zonelists for MPOL_BIND. - * The custom MPOL_BIND zonelists have a NULL zlcache_ptr and no zlcache. - * - * Even though there may be multiple CPU cores on a node modifying - * fullzones or last_full_zap in the same zonelist_cache at the same - * time, we don't lock it. This is just hint data - if it is wrong now - * and then, the allocator will still function, perhaps a bit slower. - */ - - -struct zonelist_cache { - unsigned short z_to_n[MAX_ZONES_PER_ZONELIST]; /* zone->nid */ - DECLARE_BITMAP(fullzones, MAX_ZONES_PER_ZONELIST); /* zone full? */ - unsigned long last_full_zap; /* when last zap'd (jiffies) */ -}; #else #define MAX_ZONELISTS 1 -struct zonelist_cache; #endif /* @@ -675,9 +608,6 @@ struct zoneref { * allocation, the other zones are fallback zones, in decreasing * priority. * - * If zlcache_ptr is not NULL, then it is just the address of zlcache, - * as explained above. If zlcache_ptr is NULL, there is no zlcache. - * * * To speed the reading of the zonelist, the zonerefs contain the zone index * of the entry being read. Helper functions to access information given * a struct zoneref are @@ -687,11 +617,7 @@ struct zoneref { * zonelist_node_idx() - Return the index of the node for an entry */ struct zonelist { - struct zonelist_cache *zlcache_ptr; // NULL or &zlcache struct zoneref _zonerefs[MAX_ZONES_PER_ZONELIST + 1]; -#ifdef CONFIG_NUMA - struct zonelist_cache zlcache; // optional ... -#endif }; #ifndef CONFIG_DISCONTIGMEM -- cgit v1.2.3 From 974a786e63c96a2401a78ddba926f34c128474f1 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:34 -0800 Subject: mm, page_alloc: remove MIGRATE_RESERVE MIGRATE_RESERVE preserves an old property of the buddy allocator that existed prior to fragmentation avoidance -- min_free_kbytes worth of pages tended to remain contiguous until the only alternative was to fail the allocation. At the time it was discovered that high-order atomic allocations relied on this property so MIGRATE_RESERVE was introduced. A later patch will introduce an alternative MIGRATE_HIGHATOMIC so this patch deletes MIGRATE_RESERVE and supporting code so it'll be easier to review. Note that this patch in isolation may look like a false regression if someone was bisecting high-order atomic allocation failures. Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Cc: Christoph Lameter Cc: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index 1e88aae329ff..b86cfa3313cf 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -39,8 +39,6 @@ enum { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, - MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ - MIGRATE_RESERVE = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way @@ -63,6 +61,8 @@ enum { MIGRATE_TYPES }; +#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1) + #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) #else @@ -429,12 +429,6 @@ struct zone { const char *name; - /* - * Number of MIGRATE_RESERVE page block. To maintain for just - * optimization. Protected by zone->lock. - */ - int nr_migrate_reserve_block; - #ifdef CONFIG_MEMORY_ISOLATION /* * Number of isolated pageblock. It is used to solve incorrect -- cgit v1.2.3 From 0aaa29a56e4fb0fc9e24edb649e2733a672ca099 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:37 -0800 Subject: mm, page_alloc: reserve pageblocks for high-order atomic allocations on demand High-order watermark checking exists for two reasons -- kswapd high-order awareness and protection for high-order atomic requests. Historically the kernel depended on MIGRATE_RESERVE to preserve min_free_kbytes as high-order free pages for as long as possible. This patch introduces MIGRATE_HIGHATOMIC that reserves pageblocks for high-order atomic allocations on demand and avoids using those blocks for order-0 allocations. This is more flexible and reliable than MIGRATE_RESERVE was. A MIGRATE_HIGHORDER pageblock is created when an atomic high-order allocation request steals a pageblock but limits the total number to 1% of the zone. Callers that speculatively abuse atomic allocations for long-lived high-order allocations to access the reserve will quickly fail. Note that SLUB is currently not such an abuser as it reclaims at least once. It is possible that the pageblock stolen has few suitable high-order pages and will need to steal again in the near future but there would need to be strong justification to search all pageblocks for an ideal candidate. The pageblocks are unreserved if an allocation fails after a direct reclaim attempt. The watermark checks account for the reserved pageblocks when the allocation request is not a high-order atomic allocation. The reserved pageblocks can not be used for order-0 allocations. This may allow temporary wastage until a failed reclaim reassigns the pageblock. This is deliberate as the intent of the reservation is to satisfy a limited number of atomic high-order short-lived requests if the system requires them. The stutter benchmark was used to evaluate this but while it was running there was a systemtap script that randomly allocated between 1 high-order page and 12.5% of memory's worth of order-3 pages using GFP_ATOMIC. This is much larger than the potential reserve and it does not attempt to be realistic. It is intended to stress random high-order allocations from an unknown source, show that there is a reduction in failures without introducing an anomaly where atomic allocations are more reliable than regular allocations. The amount of memory reserved varied throughout the workload as reserves were created and reclaimed under memory pressure. The allocation failures once the workload warmed up were as follows; 4.2-rc5-vanilla 70% 4.2-rc5-atomic-reserve 56% The failure rate was also measured while building multiple kernels. The failure rate was 14% but is 6% with this patch applied. Overall, this is a small reduction but the reserves are small relative to the number of allocation requests. In early versions of the patch, the failure rate reduced by a much larger amount but that required much larger reserves and perversely made atomic allocations seem more reliable than regular allocations. [yalin.wang2010@gmail.com: fix redundant check and a memory leak] Signed-off-by: Mel Gorman Acked-by: Vlastimil Babka Acked-by: Michal Hocko Acked-by: Johannes Weiner Cc: Christoph Lameter Cc: David Rientjes Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: yalin wang Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index b86cfa3313cf..d3bafe4ff32b 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -39,6 +39,8 @@ enum { MIGRATE_UNMOVABLE, MIGRATE_MOVABLE, MIGRATE_RECLAIMABLE, + MIGRATE_PCPTYPES, /* the number of types on the pcp lists */ + MIGRATE_HIGHATOMIC = MIGRATE_PCPTYPES, #ifdef CONFIG_CMA /* * MIGRATE_CMA migration type is designed to mimic the way @@ -61,8 +63,6 @@ enum { MIGRATE_TYPES }; -#define MIGRATE_PCPTYPES (MIGRATE_RECLAIMABLE+1) - #ifdef CONFIG_CMA # define is_migrate_cma(migratetype) unlikely((migratetype) == MIGRATE_CMA) #else @@ -334,6 +334,8 @@ struct zone { /* zone watermarks, access with *_wmark_pages(zone) macros */ unsigned long watermark[NR_WMARK]; + unsigned long nr_reserved_highatomic; + /* * We don't know if the memory that we're going to allocate will be freeable * or/and it will be released eventually, so to avoid totally wasting several -- cgit v1.2.3 From dd56b046426760aa0c852ad6e4b6b07891222d65 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Fri, 6 Nov 2015 16:28:43 -0800 Subject: mm: page_alloc: hide some GFP internals and document the bits and flag combinations Andrew stated the following We have quite a history of remote parts of the kernel using weird/wrong/inexplicable combinations of __GFP_ flags. I tend to think that this is because we didn't adequately explain the interface. And I don't think that gfp.h really improved much in this area as a result of this patchset. Could you go through it some time and decide if we've adequately documented all this stuff? This patches first moves some GFP flag combinations that are part of the MM internals to mm/internal.h. The rest of the patch documents the __GFP_FOO bits under various headings and then documents the flag combinations. It will not help callers that are brain damaged but the clarity might motivate some fixes and avoid future mistakes. Signed-off-by: Mel Gorman Cc: Johannes Weiner Cc: Rik van Riel Cc: Vlastimil Babka Cc: David Rientjes Cc: Joonsoo Kim Cc: Michal Hocko Cc: Vitaly Wool Cc: Rik van Riel Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 251 +++++++++++++++++++++++++++++++++++----------------- 1 file changed, 171 insertions(+), 80 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 369227202ac2..6523109e136d 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -39,9 +39,7 @@ struct vm_area_struct; /* If the above are modified, __GFP_BITS_SHIFT may need updating */ /* - * GFP bitmasks.. - * - * Zone modifiers (see linux/mmzone.h - low three bits) + * Physical address zone modifiers (see linux/mmzone.h - low four bits) * * Do not put any conditional on these. If necessary modify the definitions * without the underscores and use them consistently. The definitions here may @@ -51,120 +49,211 @@ struct vm_area_struct; #define __GFP_HIGHMEM ((__force gfp_t)___GFP_HIGHMEM) #define __GFP_DMA32 ((__force gfp_t)___GFP_DMA32) #define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* Page is movable */ +#define __GFP_MOVABLE ((__force gfp_t)___GFP_MOVABLE) /* ZONE_MOVABLE allowed */ #define GFP_ZONEMASK (__GFP_DMA|__GFP_HIGHMEM|__GFP_DMA32|__GFP_MOVABLE) + /* - * Action modifiers - doesn't change the zoning + * Page mobility and placement hints * - * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt - * _might_ fail. This depends upon the particular VM implementation. + * These flags provide hints about how mobile the page is. Pages with similar + * mobility are placed within the same pageblocks to minimise problems due + * to external fragmentation. * - * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller - * cannot handle allocation failures. New users should be evaluated carefully - * (and the flag should be used only when there is no reasonable failure policy) - * but it is definitely preferable to use the flag rather than opencode endless - * loop around allocator. + * __GFP_MOVABLE (also a zone modifier) indicates that the page can be + * moved by page migration during memory compaction or can be reclaimed. * - * __GFP_NORETRY: The VM implementation must not retry indefinitely and will - * return NULL when direct reclaim and memory compaction have failed to allow - * the allocation to succeed. The OOM killer is not called with the current - * implementation. + * __GFP_RECLAIMABLE is used for slab allocations that specify + * SLAB_RECLAIM_ACCOUNT and whose pages can be freed via shrinkers. + * + * __GFP_WRITE indicates the caller intends to dirty the page. Where possible, + * these pages will be spread between local zones to avoid all the dirty + * pages being in one zone (fair zone allocation policy). * - * __GFP_MOVABLE: Flag that this page will be movable by the page migration - * mechanism or reclaimed + * __GFP_HARDWALL enforces the cpuset memory allocation policy. + * + * __GFP_THISNODE forces the allocation to be satisified from the requested + * node with no fallbacks or placement policy enforcements. */ -#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) /* Caller cannot wait or reschedule */ -#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) /* Should access emergency pools? */ -#define __GFP_IO ((__force gfp_t)___GFP_IO) /* Can start physical IO? */ -#define __GFP_FS ((__force gfp_t)___GFP_FS) /* Can call down to low-level FS? */ -#define __GFP_COLD ((__force gfp_t)___GFP_COLD) /* Cache-cold page required */ -#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) /* Suppress page allocation failure warning */ -#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) /* See above */ -#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) /* See above */ -#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) /* See above */ -#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC)/* Allow access to emergency reserves */ -#define __GFP_COMP ((__force gfp_t)___GFP_COMP) /* Add compound page metadata */ -#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) /* Return zeroed page on success */ -#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) /* Don't use emergency reserves. - * This takes precedence over the - * __GFP_MEMALLOC flag if both are - * set - */ -#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) /* Enforce hardwall cpuset memory allocs */ -#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE)/* No fallback, no policies */ -#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) /* Page is reclaimable */ -#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) /* Don't account to kmemcg */ -#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) /* Don't track with kmemcheck */ - -#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) /* On behalf of other node */ -#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) /* Allocator intends to dirty page */ +#define __GFP_RECLAIMABLE ((__force gfp_t)___GFP_RECLAIMABLE) +#define __GFP_WRITE ((__force gfp_t)___GFP_WRITE) +#define __GFP_HARDWALL ((__force gfp_t)___GFP_HARDWALL) +#define __GFP_THISNODE ((__force gfp_t)___GFP_THISNODE) /* - * A caller that is willing to wait may enter direct reclaim and will - * wake kswapd to reclaim pages in the background until the high - * watermark is met. A caller may wish to clear __GFP_DIRECT_RECLAIM to - * avoid unnecessary delays when a fallback option is available but - * still allow kswapd to reclaim in the background. The kswapd flag - * can be cleared when the reclaiming of pages would cause unnecessary - * disruption. + * Watermark modifiers -- controls access to emergency reserves + * + * __GFP_HIGH indicates that the caller is high-priority and that granting + * the request is necessary before the system can make forward progress. + * For example, creating an IO context to clean pages. + * + * __GFP_ATOMIC indicates that the caller cannot reclaim or sleep and is + * high priority. Users are typically interrupt handlers. This may be + * used in conjunction with __GFP_HIGH + * + * __GFP_MEMALLOC allows access to all memory. This should only be used when + * the caller guarantees the allocation will allow more memory to be freed + * very shortly e.g. process exiting or swapping. Users either should + * be the MM or co-ordinating closely with the VM (e.g. swap over NFS). + * + * __GFP_NOMEMALLOC is used to explicitly forbid access to emergency reserves. + * This takes precedence over the __GFP_MEMALLOC flag if both are set. + * + * __GFP_NOACCOUNT ignores the accounting for kmemcg limit enforcement. */ -#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_ATOMIC ((__force gfp_t)___GFP_ATOMIC) +#define __GFP_HIGH ((__force gfp_t)___GFP_HIGH) +#define __GFP_MEMALLOC ((__force gfp_t)___GFP_MEMALLOC) +#define __GFP_NOMEMALLOC ((__force gfp_t)___GFP_NOMEMALLOC) +#define __GFP_NOACCOUNT ((__force gfp_t)___GFP_NOACCOUNT) + +/* + * Reclaim modifiers + * + * __GFP_IO can start physical IO. + * + * __GFP_FS can call down to the low-level FS. Clearing the flag avoids the + * allocator recursing into the filesystem which might already be holding + * locks. + * + * __GFP_DIRECT_RECLAIM indicates that the caller may enter direct reclaim. + * This flag can be cleared to avoid unnecessary delays when a fallback + * option is available. + * + * __GFP_KSWAPD_RECLAIM indicates that the caller wants to wake kswapd when + * the low watermark is reached and have it reclaim pages until the high + * watermark is reached. A caller may wish to clear this flag when fallback + * options are available and the reclaim is likely to disrupt the system. The + * canonical example is THP allocation where a fallback is cheap but + * reclaim/compaction may cause indirect stalls. + * + * __GFP_RECLAIM is shorthand to allow/forbid both direct and kswapd reclaim. + * + * __GFP_REPEAT: Try hard to allocate the memory, but the allocation attempt + * _might_ fail. This depends upon the particular VM implementation. + * + * __GFP_NOFAIL: The VM implementation _must_ retry infinitely: the caller + * cannot handle allocation failures. New users should be evaluated carefully + * (and the flag should be used only when there is no reasonable failure + * policy) but it is definitely preferable to use the flag rather than + * opencode endless loop around allocator. + * + * __GFP_NORETRY: The VM implementation must not retry indefinitely and will + * return NULL when direct reclaim and memory compaction have failed to allow + * the allocation to succeed. The OOM killer is not called with the current + * implementation. + */ +#define __GFP_IO ((__force gfp_t)___GFP_IO) +#define __GFP_FS ((__force gfp_t)___GFP_FS) #define __GFP_DIRECT_RECLAIM ((__force gfp_t)___GFP_DIRECT_RECLAIM) /* Caller can reclaim */ #define __GFP_KSWAPD_RECLAIM ((__force gfp_t)___GFP_KSWAPD_RECLAIM) /* kswapd can wake */ +#define __GFP_RECLAIM ((__force gfp_t)(___GFP_DIRECT_RECLAIM|___GFP_KSWAPD_RECLAIM)) +#define __GFP_REPEAT ((__force gfp_t)___GFP_REPEAT) +#define __GFP_NOFAIL ((__force gfp_t)___GFP_NOFAIL) +#define __GFP_NORETRY ((__force gfp_t)___GFP_NORETRY) /* - * This may seem redundant, but it's a way of annotating false positives vs. - * allocations that simply cannot be supported (e.g. page tables). + * Action modifiers + * + * __GFP_COLD indicates that the caller does not expect to be used in the near + * future. Where possible, a cache-cold page will be returned. + * + * __GFP_NOWARN suppresses allocation failure reports. + * + * __GFP_COMP address compound page metadata. + * + * __GFP_ZERO returns a zeroed page on success. + * + * __GFP_NOTRACK avoids tracking with kmemcheck. + * + * __GFP_NOTRACK_FALSE_POSITIVE is an alias of __GFP_NOTRACK. It's a means of + * distinguishing in the source between false positives and allocations that + * cannot be supported (e.g. page tables). + * + * __GFP_OTHER_NODE is for allocations that are on a remote node but that + * should not be accounted for as a remote allocation in vmstat. A + * typical user would be khugepaged collapsing a huge page on a remote + * node. */ +#define __GFP_COLD ((__force gfp_t)___GFP_COLD) +#define __GFP_NOWARN ((__force gfp_t)___GFP_NOWARN) +#define __GFP_COMP ((__force gfp_t)___GFP_COMP) +#define __GFP_ZERO ((__force gfp_t)___GFP_ZERO) +#define __GFP_NOTRACK ((__force gfp_t)___GFP_NOTRACK) #define __GFP_NOTRACK_FALSE_POSITIVE (__GFP_NOTRACK) +#define __GFP_OTHER_NODE ((__force gfp_t)___GFP_OTHER_NODE) -#define __GFP_BITS_SHIFT 26 /* Room for N __GFP_FOO bits */ +/* Room for N __GFP_FOO bits */ +#define __GFP_BITS_SHIFT 26 #define __GFP_BITS_MASK ((__force gfp_t)((1 << __GFP_BITS_SHIFT) - 1)) /* - * GFP_ATOMIC callers can not sleep, need the allocation to succeed. - * A lower watermark is applied to allow access to "atomic reserves" + * Useful GFP flag combinations that are commonly used. It is recommended + * that subsystems start with one of these combinations and then set/clear + * __GFP_FOO flags as necessary. + * + * GFP_ATOMIC users can not sleep and need the allocation to succeed. A lower + * watermark is applied to allow access to "atomic reserves" + * + * GFP_KERNEL is typical for kernel-internal allocations. The caller requires + * ZONE_NORMAL or a lower zone for direct access but can direct reclaim. + * + * GFP_NOWAIT is for kernel allocations that should not stall for direct + * reclaim, start physical IO or use any filesystem callback. + * + * GFP_NOIO will use direct reclaim to discard clean pages or slab pages + * that do not require the starting of any physical IO. + * + * GFP_NOFS will use direct reclaim but will not use any filesystem interfaces. + * + * GFP_USER is for userspace allocations that also need to be directly + * accessibly by the kernel or hardware. It is typically used by hardware + * for buffers that are mapped to userspace (e.g. graphics) that hardware + * still must DMA to. cpuset limits are enforced for these allocations. + * + * GFP_DMA exists for historical reasons and should be avoided where possible. + * The flags indicates that the caller requires that the lowest zone be + * used (ZONE_DMA or 16M on x86-64). Ideally, this would be removed but + * it would require careful auditing as some users really require it and + * others use the flag to avoid lowmem reserves in ZONE_DMA and treat the + * lowest zone as a type of emergency reserve. + * + * GFP_DMA32 is similar to GFP_DMA except that the caller requires a 32-bit + * address. + * + * GFP_HIGHUSER is for userspace allocations that may be mapped to userspace, + * do not need to be directly accessible by the kernel but that cannot + * move once in use. An example may be a hardware allocation that maps + * data directly into userspace but has no addressing limitations. + * + * GFP_HIGHUSER_MOVABLE is for userspace allocations that the kernel does not + * need direct access to but can use kmap() when access is required. They + * are expected to be movable via page reclaim or page migration. Typically, + * pages on the LRU would also be allocated with GFP_HIGHUSER_MOVABLE. + * + * GFP_TRANSHUGE is used for THP allocations. They are compound allocations + * that will fail quickly if memory is not available and will not wake + * kswapd on failure. */ #define GFP_ATOMIC (__GFP_HIGH|__GFP_ATOMIC|__GFP_KSWAPD_RECLAIM) +#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_NOWAIT (__GFP_KSWAPD_RECLAIM) #define GFP_NOIO (__GFP_RECLAIM) #define GFP_NOFS (__GFP_RECLAIM | __GFP_IO) -#define GFP_KERNEL (__GFP_RECLAIM | __GFP_IO | __GFP_FS) #define GFP_TEMPORARY (__GFP_RECLAIM | __GFP_IO | __GFP_FS | \ __GFP_RECLAIMABLE) #define GFP_USER (__GFP_RECLAIM | __GFP_IO | __GFP_FS | __GFP_HARDWALL) +#define GFP_DMA __GFP_DMA +#define GFP_DMA32 __GFP_DMA32 #define GFP_HIGHUSER (GFP_USER | __GFP_HIGHMEM) #define GFP_HIGHUSER_MOVABLE (GFP_HIGHUSER | __GFP_MOVABLE) #define GFP_TRANSHUGE ((GFP_HIGHUSER_MOVABLE | __GFP_COMP | \ __GFP_NOMEMALLOC | __GFP_NORETRY | __GFP_NOWARN) & \ ~__GFP_KSWAPD_RECLAIM) -/* This mask makes up all the page movable related flags */ +/* Convert GFP flags to their corresponding migrate type */ #define GFP_MOVABLE_MASK (__GFP_RECLAIMABLE|__GFP_MOVABLE) #define GFP_MOVABLE_SHIFT 3 -/* Control page allocator reclaim behavior */ -#define GFP_RECLAIM_MASK (__GFP_RECLAIM|__GFP_HIGH|__GFP_IO|__GFP_FS|\ - __GFP_NOWARN|__GFP_REPEAT|__GFP_NOFAIL|\ - __GFP_NORETRY|__GFP_MEMALLOC|__GFP_NOMEMALLOC) - -/* Control slab gfp mask during early boot */ -#define GFP_BOOT_MASK (__GFP_BITS_MASK & ~(__GFP_RECLAIM|__GFP_IO|__GFP_FS)) - -/* Control allocation constraints */ -#define GFP_CONSTRAINT_MASK (__GFP_HARDWALL|__GFP_THISNODE) - -/* Do not use these with a slab allocator */ -#define GFP_SLAB_BUG_MASK (__GFP_DMA32|__GFP_HIGHMEM|~__GFP_BITS_MASK) - -/* Flag - indicates that the buffer will be suitable for DMA. Ignored on some - platforms, used as appropriate on others */ - -#define GFP_DMA __GFP_DMA - -/* 4GB DMA on some platforms */ -#define GFP_DMA32 __GFP_DMA32 - -/* Convert GFP flags to their corresponding migrate type */ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) { VM_WARN_ON((gfp_flags & GFP_MOVABLE_MASK) == GFP_MOVABLE_MASK); @@ -177,6 +266,8 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) /* Group based on mobility */ return (gfp_flags & GFP_MOVABLE_MASK) >> GFP_MOVABLE_SHIFT; } +#undef GFP_MOVABLE_MASK +#undef GFP_MOVABLE_SHIFT static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { -- cgit v1.2.3 From 89903327607232de32f05100cf03f9390b858e0b Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 6 Nov 2015 16:28:46 -0800 Subject: include/linux/mmzone.h: reflow comment Someone has an 86 column display. Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmzone.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h index d3bafe4ff32b..e23a9e704536 100644 --- a/include/linux/mmzone.h +++ b/include/linux/mmzone.h @@ -337,12 +337,13 @@ struct zone { unsigned long nr_reserved_highatomic; /* - * We don't know if the memory that we're going to allocate will be freeable - * or/and it will be released eventually, so to avoid totally wasting several - * GB of ram we must reserve some of the lower zone memory (otherwise we risk - * to run OOM on the lower zones despite there's tons of freeable ram - * on the higher zones). This array is recalculated at runtime if the - * sysctl_lowmem_reserve_ratio sysctl changes. + * We don't know if the memory that we're going to allocate will be + * freeable or/and it will be released eventually, so to avoid totally + * wasting several GB of ram we must reserve some of the lower zone + * memory (otherwise we risk to run OOM on the lower zones despite + * there being tons of freeable ram on the higher zones). This array is + * recalculated at runtime if the sysctl_lowmem_reserve_ratio sysctl + * changes. */ long lowmem_reserve[MAX_NR_ZONES]; -- cgit v1.2.3 From c62d25556be6c965dc14288e796a576e8e39a7e9 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 6 Nov 2015 16:28:49 -0800 Subject: mm, fs: introduce mapping_gfp_constraint() There are many places which use mapping_gfp_mask to restrict a more generic gfp mask which would be used for allocations which are not directly related to the page cache but they are performed in the same context. Let's introduce a helper function which makes the restriction explicit and easier to track. This patch doesn't introduce any functional changes. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Michal Hocko Suggested-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index a6c78e00ea96..26eabf5ec718 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -69,6 +69,13 @@ static inline gfp_t mapping_gfp_mask(struct address_space * mapping) return (__force gfp_t)mapping->flags & __GFP_BITS_MASK; } +/* Restricts the given gfp_mask to what the mapping allows. */ +static inline gfp_t mapping_gfp_constraint(struct address_space *mapping, + gfp_t gfp_mask) +{ + return mapping_gfp_mask(mapping) & gfp_mask; +} + /* * This is non-atomic. Only to be used before the mapping is activated. * Probably needs a barrier... -- cgit v1.2.3 From 3d9c637f4ae74b45d95bb6cbd793fbffad0a709c Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 6 Nov 2015 16:29:12 -0800 Subject: module: export param_free_charp() Change the param_free_charp() function from static to exported. It is used by zswap in the next patch ("zswap: use charp for zswap param strings"). Signed-off-by: Dan Streetman Acked-by: Rusty Russell Cc: Seth Jennings Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/moduleparam.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index c12f2147c350..52666d90ca94 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -386,6 +386,7 @@ extern int param_get_ullong(char *buffer, const struct kernel_param *kp); extern const struct kernel_param_ops param_ops_charp; extern int param_set_charp(const char *val, const struct kernel_param *kp); extern int param_get_charp(char *buffer, const struct kernel_param *kp); +extern void param_free_charp(void *arg); #define param_check_charp(name, p) __param_check(name, p, char *) /* We used to allow int as well as bool. We're taking that away! */ -- cgit v1.2.3 From 69e18f4dbedfbf208452e9da9979c92da30d2442 Mon Sep 17 00:00:00 2001 From: Dan Streetman Date: Fri, 6 Nov 2015 16:29:18 -0800 Subject: zpool: remove redundant zpool->type string, const-ify zpool_get_type Make the return type of zpool_get_type const; the string belongs to the zpool driver and should not be modified. Remove the redundant type field in the struct zpool; it is private to zpool.c and isn't needed since ->driver->type can be used directly. Add comments indicating strings must be null-terminated. Signed-off-by: Dan Streetman Cc: Sergey Senozhatsky Cc: Seth Jennings Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zpool.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 42f8ec992452..1f405bee3cd5 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -41,7 +41,7 @@ bool zpool_has_pool(char *type); struct zpool *zpool_create_pool(char *type, char *name, gfp_t gfp, const struct zpool_ops *ops); -char *zpool_get_type(struct zpool *pool); +const char *zpool_get_type(struct zpool *pool); void zpool_destroy_pool(struct zpool *pool); -- cgit v1.2.3 From 6f3526d6db7cbe8b53e42d6bf0cad2072afcf3fe Mon Sep 17 00:00:00 2001 From: Sergey SENOZHATSKY Date: Fri, 6 Nov 2015 16:29:21 -0800 Subject: mm: zsmalloc: constify struct zs_pool name Constify `struct zs_pool' ->name. [akpm@inux-foundation.org: constify zpool_create_pool()'s `type' arg also] Signed-off-by: Sergey Senozhatsky Acked-by: Dan Streetman Cc: Minchan Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zpool.h | 6 ++++-- include/linux/zsmalloc.h | 2 +- 2 files changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zpool.h b/include/linux/zpool.h index 1f405bee3cd5..2e97b7707dff 100644 --- a/include/linux/zpool.h +++ b/include/linux/zpool.h @@ -38,7 +38,7 @@ enum zpool_mapmode { bool zpool_has_pool(char *type); -struct zpool *zpool_create_pool(char *type, char *name, +struct zpool *zpool_create_pool(const char *type, const char *name, gfp_t gfp, const struct zpool_ops *ops); const char *zpool_get_type(struct zpool *pool); @@ -83,7 +83,9 @@ struct zpool_driver { atomic_t refcount; struct list_head list; - void *(*create)(char *name, gfp_t gfp, const struct zpool_ops *ops, + void *(*create)(const char *name, + gfp_t gfp, + const struct zpool_ops *ops, struct zpool *zpool); void (*destroy)(void *pool); diff --git a/include/linux/zsmalloc.h b/include/linux/zsmalloc.h index 6398dfae53f1..34eb16098a33 100644 --- a/include/linux/zsmalloc.h +++ b/include/linux/zsmalloc.h @@ -41,7 +41,7 @@ struct zs_pool_stats { struct zs_pool; -struct zs_pool *zs_create_pool(char *name, gfp_t flags); +struct zs_pool *zs_create_pool(const char *name, gfp_t flags); void zs_destroy_pool(struct zs_pool *pool); unsigned long zs_malloc(struct zs_pool *pool, size_t size); -- cgit v1.2.3 From 474e4eeaf26b6c3298ca3ae9d0a705b0853efb2a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:40 -0800 Subject: mm: drop page->slab_page Since 8456a648cf44 ("slab: use struct page for slab management") nobody uses slab_page field in struct page. Let's drop it. Signed-off-by: Kirill A. Shutemov Acked-by: Christoph Lameter Acked-by: David Rientjes Acked-by: Vlastimil Babka Reviewed-by: Andrea Arcangeli Cc: Joonsoo Kim Cc: Andi Kleen Cc: "Paul E. McKenney" Cc: Aneesh Kumar K.V Cc: Hugh Dickins Cc: Michal Hocko Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 0a85da25a822..c0ec46df6c13 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -131,7 +131,6 @@ struct page { #endif }; - struct slab *slab_page; /* slab fields */ struct rcu_head rcu_head; /* Used by SLAB * when destroying via RCU */ -- cgit v1.2.3 From f1e61557f0230d51a3df8d825f2c156e75563bff Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:50 -0800 Subject: mm: pack compound_dtor and compound_order into one word in struct page The patch halves space occupied by compound_dtor and compound_order in struct page. For compound_order, it's trivial long -> short conversion. For get_compound_page_dtor(), we now use hardcoded table for destructor lookup and store its index in the struct page instead of direct pointer to destructor. It shouldn't be a big trouble to maintain the table: we have only two destructor and NULL currently. This patch free up one word in tail pages for reuse. This is preparation for the next patch. Signed-off-by: Kirill A. Shutemov Reviewed-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Andrea Arcangeli Cc: "Paul E. McKenney" Cc: Andi Kleen Cc: Aneesh Kumar K.V Cc: Christoph Lameter Cc: David Rientjes Cc: Hugh Dickins Cc: Joonsoo Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 24 +++++++++++++++++++----- include/linux/mm_types.h | 6 ++---- 2 files changed, 21 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 906c46a05707..6581c21320cb 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -568,18 +568,32 @@ int split_free_page(struct page *page); /* * Compound pages have a destructor function. Provide a * prototype for that function and accessor functions. - * These are _only_ valid on the head of a PG_compound page. + * These are _only_ valid on the head of a compound page. */ +typedef void compound_page_dtor(struct page *); + +/* Keep the enum in sync with compound_page_dtors array in mm/page_alloc.c */ +enum compound_dtor_id { + NULL_COMPOUND_DTOR, + COMPOUND_PAGE_DTOR, +#ifdef CONFIG_HUGETLB_PAGE + HUGETLB_PAGE_DTOR, +#endif + NR_COMPOUND_DTORS, +}; +extern compound_page_dtor * const compound_page_dtors[]; static inline void set_compound_page_dtor(struct page *page, - compound_page_dtor *dtor) + enum compound_dtor_id compound_dtor) { - page[1].compound_dtor = dtor; + VM_BUG_ON_PAGE(compound_dtor >= NR_COMPOUND_DTORS, page); + page[1].compound_dtor = compound_dtor; } static inline compound_page_dtor *get_compound_page_dtor(struct page *page) { - return page[1].compound_dtor; + VM_BUG_ON_PAGE(page[1].compound_dtor >= NR_COMPOUND_DTORS, page); + return compound_page_dtors[page[1].compound_dtor]; } static inline int compound_order(struct page *page) @@ -589,7 +603,7 @@ static inline int compound_order(struct page *page) return page[1].compound_order; } -static inline void set_compound_order(struct page *page, unsigned long order) +static inline void set_compound_order(struct page *page, unsigned int order) { page[1].compound_order = order; } diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index c0ec46df6c13..e334ef79cb43 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -28,8 +28,6 @@ struct mem_cgroup; IS_ENABLED(CONFIG_ARCH_ENABLE_SPLIT_PMD_PTLOCK)) #define ALLOC_SPLIT_PTLOCKS (SPINLOCK_SIZE > BITS_PER_LONG/8) -typedef void compound_page_dtor(struct page *); - /* * Each physical page in the system has a struct page associated with * it to keep track of whatever it is we are using the page for at the @@ -136,8 +134,8 @@ struct page { */ /* First tail page of compound page */ struct { - compound_page_dtor *compound_dtor; - unsigned long compound_order; + unsigned short int compound_dtor; + unsigned short int compound_order; }; #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS -- cgit v1.2.3 From 1d798ca3f16437c71ff63e36597ff07f9c12e4d6 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:54 -0800 Subject: mm: make compound_head() robust Hugh has pointed that compound_head() call can be unsafe in some context. There's one example: CPU0 CPU1 isolate_migratepages_block() page_count() compound_head() !!PageTail() == true put_page() tail->first_page = NULL head = tail->first_page alloc_pages(__GFP_COMP) prep_compound_page() tail->first_page = head __SetPageTail(p); !!PageTail() == true The race is pure theoretical. I don't it's possible to trigger it in practice. But who knows. We can fix the race by changing how encode PageTail() and compound_head() within struct page to be able to update them in one shot. The patch introduces page->compound_head into third double word block in front of compound_dtor and compound_order. Bit 0 encodes PageTail() and the rest bits are pointer to head page if bit zero is set. The patch moves page->pmd_huge_pte out of word, just in case if an architecture defines pgtable_t into something what can have the bit 0 set. hugetlb_cgroup uses page->lru.next in the second tail page to store pointer struct hugetlb_cgroup. The patch switch it to use page->private in the second tail page instead. The space is free since ->first_page is removed from the union. The patch also opens possibility to remove HUGETLB_CGROUP_MIN_ORDER limitation, since there's now space in first tail page to store struct hugetlb_cgroup pointer. But that's out of scope of the patch. That means page->compound_head shares storage space with: - page->lru.next; - page->next; - page->rcu_head.next; That's too long list to be absolutely sure, but looks like nobody uses bit 0 of the word. page->rcu_head.next guaranteed[1] to have bit 0 clean as long as we use call_rcu(), call_rcu_bh(), call_rcu_sched(), or call_srcu(). But future call_rcu_lazy() is not allowed as it makes use of the bit and we can get false positive PageTail(). [1] http://lkml.kernel.org/g/20150827163634.GD4029@linux.vnet.ibm.com Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Reviewed-by: Andrea Arcangeli Cc: Hugh Dickins Cc: David Rientjes Cc: Vlastimil Babka Acked-by: Paul E. McKenney Cc: Aneesh Kumar K.V Cc: Andi Kleen Cc: Christoph Lameter Cc: Joonsoo Kim Cc: Sergey Senozhatsky Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/hugetlb_cgroup.h | 4 +-- include/linux/mm.h | 53 ++-------------------------- include/linux/mm_types.h | 22 +++++++++--- include/linux/page-flags.h | 80 ++++++++++-------------------------------- 4 files changed, 41 insertions(+), 118 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hugetlb_cgroup.h b/include/linux/hugetlb_cgroup.h index 7edd30515298..24154c26d469 100644 --- a/include/linux/hugetlb_cgroup.h +++ b/include/linux/hugetlb_cgroup.h @@ -32,7 +32,7 @@ static inline struct hugetlb_cgroup *hugetlb_cgroup_from_page(struct page *page) if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return NULL; - return (struct hugetlb_cgroup *)page[2].lru.next; + return (struct hugetlb_cgroup *)page[2].private; } static inline @@ -42,7 +42,7 @@ int set_hugetlb_cgroup(struct page *page, struct hugetlb_cgroup *h_cg) if (compound_order(page) < HUGETLB_CGROUP_MIN_ORDER) return -1; - page[2].lru.next = (void *)h_cg; + page[2].private = (unsigned long)h_cg; return 0; } diff --git a/include/linux/mm.h b/include/linux/mm.h index 6581c21320cb..9671b6f23eda 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -430,46 +430,6 @@ static inline void compound_unlock_irqrestore(struct page *page, #endif } -static inline struct page *compound_head_by_tail(struct page *tail) -{ - struct page *head = tail->first_page; - - /* - * page->first_page may be a dangling pointer to an old - * compound page, so recheck that it is still a tail - * page before returning. - */ - smp_rmb(); - if (likely(PageTail(tail))) - return head; - return tail; -} - -/* - * Since either compound page could be dismantled asynchronously in THP - * or we access asynchronously arbitrary positioned struct page, there - * would be tail flag race. To handle this race, we should call - * smp_rmb() before checking tail flag. compound_head_by_tail() did it. - */ -static inline struct page *compound_head(struct page *page) -{ - if (unlikely(PageTail(page))) - return compound_head_by_tail(page); - return page; -} - -/* - * If we access compound page synchronously such as access to - * allocated page, there is no need to handle tail flag race, so we can - * check tail flag directly without any synchronization primitive. - */ -static inline struct page *compound_head_fast(struct page *page) -{ - if (unlikely(PageTail(page))) - return page->first_page; - return page; -} - /* * The atomic page->_mapcount, starts from -1: so that transitions * both from it and to it can be tracked, using atomic_inc_and_test @@ -518,7 +478,7 @@ static inline void get_huge_page_tail(struct page *page) VM_BUG_ON_PAGE(!PageTail(page), page); VM_BUG_ON_PAGE(page_mapcount(page) < 0, page); VM_BUG_ON_PAGE(atomic_read(&page->_count) != 0, page); - if (compound_tail_refcounted(page->first_page)) + if (compound_tail_refcounted(compound_head(page))) atomic_inc(&page->_mapcount); } @@ -541,13 +501,7 @@ static inline struct page *virt_to_head_page(const void *x) { struct page *page = virt_to_page(x); - /* - * We don't need to worry about synchronization of tail flag - * when we call virt_to_head_page() since it is only called for - * already allocated page and this page won't be freed until - * this virt_to_head_page() is finished. So use _fast variant. - */ - return compound_head_fast(page); + return compound_head(page); } /* @@ -1586,8 +1540,7 @@ static inline bool ptlock_init(struct page *page) * with 0. Make sure nobody took it in use in between. * * It can happen if arch try to use slab for page table allocation: - * slab code uses page->slab_cache and page->first_page (for tail - * pages), which share storage with page->ptl. + * slab code uses page->slab_cache, which share storage with page->ptl. */ VM_BUG_ON_PAGE(*(unsigned long *)&page->ptl, page); if (!ptlock_alloc(page)) diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index e334ef79cb43..bb91658c603f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -111,7 +111,13 @@ struct page { }; }; - /* Third double word block */ + /* + * Third double word block + * + * WARNING: bit 0 of the first word encode PageTail(). That means + * the rest users of the storage space MUST NOT use the bit to + * avoid collision and false-positive PageTail(). + */ union { struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! @@ -132,14 +138,23 @@ struct page { struct rcu_head rcu_head; /* Used by SLAB * when destroying via RCU */ - /* First tail page of compound page */ + /* Tail pages of compound page */ struct { + unsigned long compound_head; /* If bit zero is set */ + + /* First tail page only */ unsigned short int compound_dtor; unsigned short int compound_order; }; #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS - pgtable_t pmd_huge_pte; /* protected by page->ptl */ + struct { + unsigned long __pad; /* do not overlay pmd_huge_pte + * with compound_head to avoid + * possible bit 0 collision. + */ + pgtable_t pmd_huge_pte; /* protected by page->ptl */ + }; #endif }; @@ -160,7 +175,6 @@ struct page { #endif #endif struct kmem_cache *slab_cache; /* SL[AU]B: Pointer to slab */ - struct page *first_page; /* Compound tail pages */ }; #ifdef CONFIG_MEMCG diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index a525e5067484..bb53c7b86315 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -86,12 +86,7 @@ enum pageflags { PG_private, /* If pagecache, has fs-private data */ PG_private_2, /* If pagecache, has fs aux data */ PG_writeback, /* Page is under writeback */ -#ifdef CONFIG_PAGEFLAGS_EXTENDED PG_head, /* A head page */ - PG_tail, /* A tail page */ -#else - PG_compound, /* A compound page */ -#endif PG_swapcache, /* Swap page: swp_entry_t in private */ PG_mappedtodisk, /* Has blocks allocated on-disk */ PG_reclaim, /* To be reclaimed asap */ @@ -398,85 +393,46 @@ static inline void set_page_writeback_keepwrite(struct page *page) test_set_page_writeback_keepwrite(page); } -#ifdef CONFIG_PAGEFLAGS_EXTENDED -/* - * System with lots of page flags available. This allows separate - * flags for PageHead() and PageTail() checks of compound pages so that bit - * tests can be used in performance sensitive paths. PageCompound is - * generally not used in hot code paths except arch/powerpc/mm/init_64.c - * and arch/powerpc/kvm/book3s_64_vio_hv.c which use it to detect huge pages - * and avoid handling those in real mode. - */ __PAGEFLAG(Head, head) CLEARPAGEFLAG(Head, head) -__PAGEFLAG(Tail, tail) -static inline int PageCompound(struct page *page) -{ - return page->flags & ((1L << PG_head) | (1L << PG_tail)); - -} -#ifdef CONFIG_TRANSPARENT_HUGEPAGE -static inline void ClearPageCompound(struct page *page) +static inline int PageTail(struct page *page) { - BUG_ON(!PageHead(page)); - ClearPageHead(page); + return READ_ONCE(page->compound_head) & 1; } -#endif - -#define PG_head_mask ((1L << PG_head)) -#else -/* - * Reduce page flag use as much as possible by overlapping - * compound page flags with the flags used for page cache pages. Possible - * because PageCompound is always set for compound pages and not for - * pages on the LRU and/or pagecache. - */ -TESTPAGEFLAG(Compound, compound) -__SETPAGEFLAG(Head, compound) __CLEARPAGEFLAG(Head, compound) - -/* - * PG_reclaim is used in combination with PG_compound to mark the - * head and tail of a compound page. This saves one page flag - * but makes it impossible to use compound pages for the page cache. - * The PG_reclaim bit would have to be used for reclaim or readahead - * if compound pages enter the page cache. - * - * PG_compound & PG_reclaim => Tail page - * PG_compound & ~PG_reclaim => Head page - */ -#define PG_head_mask ((1L << PG_compound)) -#define PG_head_tail_mask ((1L << PG_compound) | (1L << PG_reclaim)) - -static inline int PageHead(struct page *page) +static inline void set_compound_head(struct page *page, struct page *head) { - return ((page->flags & PG_head_tail_mask) == PG_head_mask); + WRITE_ONCE(page->compound_head, (unsigned long)head + 1); } -static inline int PageTail(struct page *page) +static inline void clear_compound_head(struct page *page) { - return ((page->flags & PG_head_tail_mask) == PG_head_tail_mask); + WRITE_ONCE(page->compound_head, 0); } -static inline void __SetPageTail(struct page *page) +static inline struct page *compound_head(struct page *page) { - page->flags |= PG_head_tail_mask; + unsigned long head = READ_ONCE(page->compound_head); + + if (unlikely(head & 1)) + return (struct page *) (head - 1); + return page; } -static inline void __ClearPageTail(struct page *page) +static inline int PageCompound(struct page *page) { - page->flags &= ~PG_head_tail_mask; -} + return PageHead(page) || PageTail(page); +} #ifdef CONFIG_TRANSPARENT_HUGEPAGE static inline void ClearPageCompound(struct page *page) { - BUG_ON((page->flags & PG_head_tail_mask) != (1 << PG_compound)); - clear_bit(PG_compound, &page->flags); + BUG_ON(!PageHead(page)); + ClearPageHead(page); } #endif -#endif /* !PAGEFLAGS_EXTENDED */ +#define PG_head_mask ((1L << PG_head)) #ifdef CONFIG_HUGETLB_PAGE int PageHuge(struct page *page); -- cgit v1.2.3 From d00181b96eb86c914cb327d1de974a1b71366e1b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:29:57 -0800 Subject: mm: use 'unsigned int' for page order Let's try to be consistent about data type of page order. [sfr@canb.auug.org.au: fix build (type of pageblock_order)] [hughd@google.com: some configs end up with MAX_ORDER and pageblock_order having different types] Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Acked-by: Vlastimil Babka Reviewed-by: Andrea Arcangeli Cc: "Paul E. McKenney" Cc: Andi Kleen Cc: Aneesh Kumar K.V Cc: Christoph Lameter Cc: David Rientjes Cc: Joonsoo Kim Cc: Sergey Senozhatsky Signed-off-by: Stephen Rothwell Signed-off-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 5 +++-- include/linux/pageblock-flags.h | 2 +- 2 files changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9671b6f23eda..00bad7793788 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -550,7 +550,7 @@ static inline compound_page_dtor *get_compound_page_dtor(struct page *page) return compound_page_dtors[page[1].compound_dtor]; } -static inline int compound_order(struct page *page) +static inline unsigned int compound_order(struct page *page) { if (!PageHead(page)) return 0; @@ -1810,7 +1810,8 @@ extern void si_meminfo(struct sysinfo * val); extern void si_meminfo_node(struct sysinfo *val, int nid); extern __printf(3, 4) -void warn_alloc_failed(gfp_t gfp_mask, int order, const char *fmt, ...); +void warn_alloc_failed(gfp_t gfp_mask, unsigned int order, + const char *fmt, ...); extern void setup_per_cpu_pageset(void); diff --git a/include/linux/pageblock-flags.h b/include/linux/pageblock-flags.h index 2baeee12f48e..e942558b3585 100644 --- a/include/linux/pageblock-flags.h +++ b/include/linux/pageblock-flags.h @@ -44,7 +44,7 @@ enum pageblock_bits { #ifdef CONFIG_HUGETLB_PAGE_SIZE_VARIABLE /* Huge page sizes are variable */ -extern int pageblock_order; +extern unsigned int pageblock_order; #else /* CONFIG_HUGETLB_PAGE_SIZE_VARIABLE */ -- cgit v1.2.3 From 1965c8b7ac7dd147663faf77a66a693ac3ddcb85 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 6 Nov 2015 16:30:00 -0800 Subject: mm: use 'unsigned int' for compound_dtor/compound_order on 64BIT On 64 bit system we have enough space in struct page to encode compound_dtor and compound_order with unsigned int. On x86-64 it leads to slightly smaller code size due usesage of plain MOV instead of MOVZX (zero-extended move) or similar effect. allyesconfig: text data bss dec hex filename 159520446 48146736 72196096 279863278 10ae5fee vmlinux.pre 159520382 48146736 72196096 279863214 10ae5fae vmlinux.post On other architectures without native support of 16-bit data types the Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Reviewed-by: Andrea Arcangeli Cc: "Paul E. McKenney" Cc: Andi Kleen Cc: Aneesh Kumar K.V Cc: Christoph Lameter Cc: David Rientjes Cc: Hugh Dickins Cc: Joonsoo Kim Cc: Sergey Senozhatsky Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index bb91658c603f..f8d1492a114f 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -143,8 +143,19 @@ struct page { unsigned long compound_head; /* If bit zero is set */ /* First tail page only */ +#ifdef CONFIG_64BIT + /* + * On 64 bit system we have enough space in struct page + * to encode compound_dtor and compound_order with + * unsigned int. It can help compiler generate better or + * smaller code on some archtectures. + */ + unsigned int compound_dtor; + unsigned int compound_order; +#else unsigned short int compound_dtor; unsigned short int compound_order; +#endif }; #if defined(CONFIG_TRANSPARENT_HUGEPAGE) && USE_SPLIT_PMD_PTLOCKS -- cgit v1.2.3 From 9add850c211a39d5ab1a091d48795e21599a73d0 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Fri, 6 Nov 2015 16:30:09 -0800 Subject: include/linux/compiler-gcc.h: improve __visible documentation Cc: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/compiler-gcc.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 0e3110a0b771..22ab246feed3 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -205,7 +205,10 @@ #if GCC_VERSION >= 40600 /* - * Tell the optimizer that something else uses this function or variable. + * When used with Link Time Optimization, gcc can optimize away C functions or + * variables which are referenced only from assembly code. __visible tells the + * optimizer that something else uses this function or variable, thus preventing + * this. */ #define __visible __attribute__((externally_visible)) #endif -- cgit v1.2.3 From e2eb53aa96754b97d158eff884dde88abbad925e Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Fri, 6 Nov 2015 16:30:58 -0800 Subject: bitops.h: improve sign_extend32()'s documentation It is often overlooked that sign_extend32(), despite its name, is safe to use for 16 and 8 bit types as well. This should help prevent sign extension being done manually some other way. Signed-off-by: Martin Kepplinger Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: George Spelvin Cc: Rasmus Villemoes Cc: Maxime Coquelin Cc: Denys Vlasenko Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index e63553386ae7..5629923a8701 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -164,6 +164,8 @@ static inline __u8 ror8(__u8 word, unsigned int shift) * sign_extend32 - sign extend a 32-bit value using specified bit as sign-bit * @value: value to sign extend * @index: 0 based bit index (0<=index<32) to sign bit + * + * This is safe to use for 16- and 8-bit types as well. */ static inline __s32 sign_extend32(__u32 value, int index) { -- cgit v1.2.3 From 48e203e21b29cd4b2c58403fe8bca68e2e854895 Mon Sep 17 00:00:00 2001 From: Martin Kepplinger Date: Fri, 6 Nov 2015 16:31:02 -0800 Subject: bitops.h: add sign_extend64() Months back, this was discussed, see https://lkml.org/lkml/2015/1/18/289 The result was the 64-bit version being "likely fine", "valuable" and "correct". The discussion fell asleep but since there are possible users, let's add it. Signed-off-by: Martin Kepplinger Cc: Peter Zijlstra Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Cc: Thomas Gleixner Cc: "H. Peter Anvin" Cc: George Spelvin Cc: Rasmus Villemoes Cc: Maxime Coquelin Cc: Denys Vlasenko Cc: Yury Norov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/bitops.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bitops.h b/include/linux/bitops.h index 5629923a8701..2b8ed123ad36 100644 --- a/include/linux/bitops.h +++ b/include/linux/bitops.h @@ -173,6 +173,17 @@ static inline __s32 sign_extend32(__u32 value, int index) return (__s32)(value << shift) >> shift; } +/** + * sign_extend64 - sign extend a 64-bit value using specified bit as sign-bit + * @value: value to sign extend + * @index: 0 based bit index (0<=index<64) to sign bit + */ +static inline __s64 sign_extend64(__u64 value, int index) +{ + __u8 shift = 63 - index; + return (__s64)(value << shift) >> shift; +} + static inline unsigned fls_long(unsigned long l) { if (sizeof(l) == 4) -- cgit v1.2.3 From 0a9df786a6ae2f898114bdd242b64920dedf53bd Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 6 Nov 2015 16:31:20 -0800 Subject: lib/kasprintf.c: introduce kvasprintf_const This adds kvasprintf_const which tries to use kstrdup_const if possible: If the format string contains no % characters, or if the format string is exactly "%s", we delegate to kstrdup_const. Otherwise, we fall back to kvasprintf. Just as for kstrdup_const, the main motivation is to save memory by reusing .rodata when possible. The return value should be freed by kfree_const, just like for kstrdup_const. There is deliberately no kasprintf_const: In the vast majority of cases, the format string argument is a literal, so one can determine statically whether one could instead use kstrdup_const directly (which would also require one to change all corresponding kfree calls to kfree_const). Signed-off-by: Rasmus Villemoes Cc: Greg KH Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 5582410727cb..2c13f747ac2e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -413,6 +413,8 @@ extern __printf(2, 3) char *kasprintf(gfp_t gfp, const char *fmt, ...); extern __printf(2, 0) char *kvasprintf(gfp_t gfp, const char *fmt, va_list args); +extern __printf(2, 0) +const char *kvasprintf_const(gfp_t gfp, const char *fmt, va_list args); extern __scanf(2, 3) int sscanf(const char *, const char *, ...); -- cgit v1.2.3 From 8de1ee7ebfb4979c6444e81273e12e7a972c367d Mon Sep 17 00:00:00 2001 From: Cody P Schafer Date: Fri, 6 Nov 2015 16:31:28 -0800 Subject: rbtree: clarify documentation of rbtree_postorder_for_each_entry_safe() I noticed that commit a20135ffbc44 ("writeback: don't drain bdi_writeback_congested on bdi destruction") added a usage of rbtree_postorder_for_each_entry_safe() in mm/backing-dev.c which appears to try to rb_erase() elements from an rbtree while iterating over it using rbtree_postorder_for_each_entry_safe(). Doing this will cause random nodes to be missed by the iteration because rb_erase() may rebalance the tree, changing the ordering that we're trying to iterate over. The previous documentation for rbtree_postorder_for_each_entry_safe() wasn't clear that this wasn't allowed, it was taken from the docs for list_for_each_entry_safe(), where erasing isn't a problem due to list_del() not reordering. Explicitly warn developers about this potential pit-fall. Note that I haven't fixed the actual issue that (it appears) the commit referenced above introduced (not familiar enough with that code). In general (and in this case), the patterns to follow are: - switch to rb_first() + rb_erase(), don't use rbtree_postorder_for_each_entry_safe(). - keep the postorder iteration and don't rb_erase() at all. Instead just clear the fields of rb_node & cgwb_congested_tree as required by other users of those structures. [akpm@linux-foundation.org: tweak comments] Signed-off-by: Cody P Schafer Cc: John de la Garza Cc: Michel Lespinasse Cc: Peter Zijlstra Cc: Rusty Russell Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rbtree.h | 12 ++++++++++-- 1 file changed, 10 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/rbtree.h b/include/linux/rbtree.h index 830c4992088d..a5aa7ae671f4 100644 --- a/include/linux/rbtree.h +++ b/include/linux/rbtree.h @@ -101,13 +101,21 @@ static inline void rb_link_node_rcu(struct rb_node *node, struct rb_node *parent }) /** - * rbtree_postorder_for_each_entry_safe - iterate over rb_root in post order of - * given type safe against removal of rb_node entry + * rbtree_postorder_for_each_entry_safe - iterate in post-order over rb_root of + * given type allowing the backing memory of @pos to be invalidated * * @pos: the 'type *' to use as a loop cursor. * @n: another 'type *' to use as temporary storage * @root: 'rb_root *' of the rbtree. * @field: the name of the rb_node field within 'type'. + * + * rbtree_postorder_for_each_entry_safe() provides a similar guarantee as + * list_for_each_entry_safe() and allows the iteration to continue independent + * of changes to @pos by the body of the loop. + * + * Note, however, that it cannot handle other modifications that re-order the + * rbtree it is iterating over. This includes calling rb_erase() on @pos, as + * rb_erase() may rebalance the tree, causing us to miss some nodes. */ #define rbtree_postorder_for_each_entry_safe(pos, n, root, field) \ for (pos = rb_entry_safe(rb_first_postorder(root), typeof(*pos), field); \ -- cgit v1.2.3 From 2e01fabe67ccaff1d59bda01e60a61f5fb0aa7b6 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Nov 2015 16:32:19 -0800 Subject: signals: kill block_all_signals() and unblock_all_signals() It is hardly possible to enumerate all problems with block_all_signals() and unblock_all_signals(). Just for example, 1. block_all_signals(SIGSTOP/etc) simply can't help if the caller is multithreaded. Another thread can dequeue the signal and force the group stop. 2. Even is the caller is single-threaded, it will "stop" anyway. It will not sleep, but it will spin in kernel space until SIGCONT or SIGKILL. And a lot more. In short, this interface doesn't work at all, at least the last 10+ years. Daniel said: Yeah the only times I played around with the DRM_LOCK stuff was when old drivers accidentally deadlocked - my impression is that the entire DRM_LOCK thing was never really tested properly ;-) Hence I'm all for purging where this leaks out of the drm subsystem. Signed-off-by: Oleg Nesterov Acked-by: Daniel Vetter Acked-by: Dave Airlie Cc: Richard Weinberger Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index eeb5066a44fb..923ec1a9b2b4 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1570,9 +1570,7 @@ struct task_struct { unsigned long sas_ss_sp; size_t sas_ss_size; - int (*notifier)(void *priv); - void *notifier_data; - sigset_t *notifier_mask; + struct callback_head *task_works; struct audit_context *audit_context; @@ -2476,9 +2474,6 @@ static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, s return ret; } -extern void block_all_signals(int (*notifier)(void *priv), void *priv, - sigset_t *mask); -extern void unblock_all_signals(void); extern void release_task(struct task_struct * p); extern int send_sig_info(int, struct siginfo *, struct task_struct *); extern int force_sigsegv(int, struct task_struct *); -- cgit v1.2.3 From be0e6f290f78b84a3b21b8c8c46819c4514fe632 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Nov 2015 16:32:22 -0800 Subject: signal: turn dequeue_signal_lock() into kernel_dequeue_signal() 1. Rename dequeue_signal_lock() to kernel_dequeue_signal(). This matches another "for kthreads only" kernel_sigaction() helper. 2. Remove the "tsk" and "mask" arguments, they are always current and current->blocked. And it is simply wrong if tsk != current. 3. We could also remove the 3rd "siginfo_t *info" arg but it looks potentially useful. However we can simplify the callers if we change kernel_dequeue_signal() to accept info => NULL. 4. Remove _irqsave, it is never called from atomic context. Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo Cc: David Woodhouse Cc: Felipe Balbi Cc: Markus Pargmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 923ec1a9b2b4..3d54924b4b86 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2462,14 +2462,15 @@ extern void ignore_signals(struct task_struct *); extern void flush_signal_handlers(struct task_struct *, int force_default); extern int dequeue_signal(struct task_struct *tsk, sigset_t *mask, siginfo_t *info); -static inline int dequeue_signal_lock(struct task_struct *tsk, sigset_t *mask, siginfo_t *info) +static inline int kernel_dequeue_signal(siginfo_t *info) { - unsigned long flags; + struct task_struct *tsk = current; + siginfo_t __info; int ret; - spin_lock_irqsave(&tsk->sighand->siglock, flags); - ret = dequeue_signal(tsk, mask, info); - spin_unlock_irqrestore(&tsk->sighand->siglock, flags); + spin_lock_irq(&tsk->sighand->siglock); + ret = dequeue_signal(tsk, &tsk->blocked, info ?: &__info); + spin_unlock_irq(&tsk->sighand->siglock); return ret; } -- cgit v1.2.3 From 9a13049e83f346cb1cbd60c64e520a73c396af16 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Fri, 6 Nov 2015 16:32:25 -0800 Subject: signal: introduce kernel_signal_stop() to fix jffs2_garbage_collect_thread() jffs2_garbage_collect_thread() can race with SIGCONT and sleep in TASK_STOPPED state after it was already sent. Add the new helper, kernel_signal_stop(), which does this correctly. Signed-off-by: Oleg Nesterov Reviewed-by: Tejun Heo Cc: David Woodhouse Cc: Felipe Balbi Cc: Markus Pargmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 3d54924b4b86..4069febaa34a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2475,6 +2475,16 @@ static inline int kernel_dequeue_signal(siginfo_t *info) return ret; } +static inline void kernel_signal_stop(void) +{ + spin_lock_irq(¤t->sighand->siglock); + if (current->jobctl & JOBCTL_STOP_DEQUEUED) + __set_current_state(TASK_STOPPED); + spin_unlock_irq(¤t->sighand->siglock); + + schedule(); +} + extern void release_task(struct task_struct * p); extern int send_sig_info(int, struct siginfo *, struct task_struct *); extern int force_sigsegv(int, struct task_struct *); -- cgit v1.2.3 From 002edb6f6f2a79bea50de11260ddc9572e6db731 Mon Sep 17 00:00:00 2001 From: Robin Murphy Date: Fri, 6 Nov 2015 16:32:51 -0800 Subject: dma-mapping: tidy up dma_parms default handling Many DMA controllers and other devices set max_segment_size to indicate their scatter-gather capability, but have no interest in segment_boundary_mask. However, the existence of a dma_parms structure precludes the use of any default value, leaving them as zeros (assuming a properly kzalloc'ed structure). If a well-behaved IOMMU (or SWIOTLB) then tries to respect this by ensuring a mapped segment does not cross a zero-byte boundary, hilarity ensues. Since zero is a nonsensical value for either parameter, treat it as an indicator for "default", as might be expected. In the process, clean up a bit by replacing the bare constants with slightly more meaningful macros and removing the superfluous "else" statements. [akpm@linux-foundation.org: dma-mapping.h needs sizes.h for SZ_64K] Signed-off-by: Robin Murphy Reviewed-by: Sumit Semwal Acked-by: Marek Szyprowski Cc: Arnd Bergmann Cc: Sakari Ailus Cc: Russell King Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/dma-mapping.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dma-mapping.h b/include/linux/dma-mapping.h index ac07ff090919..2e551e2d2d03 100644 --- a/include/linux/dma-mapping.h +++ b/include/linux/dma-mapping.h @@ -1,6 +1,7 @@ #ifndef _LINUX_DMA_MAPPING_H #define _LINUX_DMA_MAPPING_H +#include #include #include #include @@ -145,7 +146,9 @@ static inline void arch_teardown_dma_ops(struct device *dev) { } static inline unsigned int dma_get_max_seg_size(struct device *dev) { - return dev->dma_parms ? dev->dma_parms->max_segment_size : 65536; + if (dev->dma_parms && dev->dma_parms->max_segment_size) + return dev->dma_parms->max_segment_size; + return SZ_64K; } static inline unsigned int dma_set_max_seg_size(struct device *dev, @@ -154,14 +157,15 @@ static inline unsigned int dma_set_max_seg_size(struct device *dev, if (dev->dma_parms) { dev->dma_parms->max_segment_size = size; return 0; - } else - return -EIO; + } + return -EIO; } static inline unsigned long dma_get_seg_boundary(struct device *dev) { - return dev->dma_parms ? - dev->dma_parms->segment_boundary_mask : 0xffffffff; + if (dev->dma_parms && dev->dma_parms->segment_boundary_mask) + return dev->dma_parms->segment_boundary_mask; + return DMA_BIT_MASK(32); } static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) @@ -169,8 +173,8 @@ static inline int dma_set_seg_boundary(struct device *dev, unsigned long mask) if (dev->dma_parms) { dev->dma_parms->segment_boundary_mask = mask; return 0; - } else - return -EIO; + } + return -EIO; } #ifndef dma_max_pfn -- cgit v1.2.3 From cb7ae262e230064ba282094b7e1f60a092448b72 Mon Sep 17 00:00:00 2001 From: Anish Bhatt Date: Fri, 6 Nov 2015 16:33:01 -0800 Subject: include/linux/zutil.h: fix usage example of zlib_adler32() alder32 was renamed to zlib_adler32 since before 2.6.11. Signed-off-by: Anish Bhatt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/zutil.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/zutil.h b/include/linux/zutil.h index 6adfa9a6ffe9..663689521759 100644 --- a/include/linux/zutil.h +++ b/include/linux/zutil.h @@ -68,10 +68,10 @@ typedef uLong (*check_func) (uLong check, const Byte *buf, An Adler-32 checksum is almost as reliable as a CRC32 but can be computed much faster. Usage example: - uLong adler = adler32(0L, NULL, 0); + uLong adler = zlib_adler32(0L, NULL, 0); while (read_buffer(buffer, length) != EOF) { - adler = adler32(adler, buffer, length); + adler = zlib_adler32(adler, buffer, length); } if (adler != original_adler) error(); */ -- cgit v1.2.3 From 95ad1f4a9358dff1dcf84bf5c9cc84caa9215f7f Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 7 Nov 2015 11:21:47 +0100 Subject: netfilter: ipset: Fix extension alignment The data extensions in ipset lacked the proper memory alignment and thus could lead to kernel crash on several architectures. Therefore the structures have been reorganized and alignment attributes added where needed. The patch was tested on armv7h by Gerhard Wiesinger and on x86_64, sparc64 by Jozsef Kadlecsik. Reported-by: Gerhard Wiesinger Tested-by: Gerhard Wiesinger Tested-by: Jozsef Kadlecsik Signed-off-by: Jozsef Kadlecsik --- include/linux/netfilter/ipset/ip_set.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 48bb01edcf30..0e1f433cc4b7 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -421,7 +421,7 @@ extern void ip_set_free(void *members); extern int ip_set_get_ipaddr4(struct nlattr *nla, __be32 *ipaddr); extern int ip_set_get_ipaddr6(struct nlattr *nla, union nf_inet_addr *ipaddr); extern size_t ip_set_elem_len(struct ip_set *set, struct nlattr *tb[], - size_t len); + size_t len, size_t align); extern int ip_set_get_extensions(struct ip_set *set, struct nlattr *tb[], struct ip_set_ext *ext); -- cgit v1.2.3 From dece16353ef47d8d33f5302bc158072a9d65e26f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Nov 2015 10:41:16 -0700 Subject: block: change ->make_request_fn() and users to return a queue cookie No functional changes in this patch, but it prepares us for returning a more useful cookie related to the IO that was queued up. Signed-off-by: Jens Axboe Acked-by: Christoph Hellwig Acked-by: Keith Busch --- include/linux/blk_types.h | 24 ++++++++++++++++++++++++ include/linux/blkdev.h | 4 ++-- include/linux/fs.h | 2 +- include/linux/lightnvm.h | 2 +- 4 files changed, 28 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e8130138f29d..641e5a3ed58c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -244,4 +244,28 @@ enum rq_flag_bits { #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) #define REQ_NO_TIMEOUT (1ULL << __REQ_NO_TIMEOUT) +typedef unsigned int blk_qc_t; +#define BLK_QC_T_NONE -1U +#define BLK_QC_T_SHIFT 16 + +static inline bool blk_qc_t_valid(blk_qc_t cookie) +{ + return cookie != BLK_QC_T_NONE; +} + +static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num) +{ + return tag | (queue_num << BLK_QC_T_SHIFT); +} + +static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) +{ + return cookie >> BLK_QC_T_SHIFT; +} + +static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) +{ + return cookie & 0xffff; +} + #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d045ca8487af..5ee0f5243025 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -209,7 +209,7 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; typedef void (request_fn_proc) (struct request_queue *q); -typedef void (make_request_fn) (struct request_queue *q, struct bio *bio); +typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); @@ -761,7 +761,7 @@ static inline void rq_flush_dcache_pages(struct request *rq) extern int blk_register_queue(struct gendisk *disk); extern void blk_unregister_queue(struct gendisk *disk); -extern void generic_make_request(struct bio *bio); +extern blk_qc_t generic_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); diff --git a/include/linux/fs.h b/include/linux/fs.h index 72d8a844c692..bcca36e4bc1e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2625,7 +2625,7 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK -extern void submit_bio(int, struct bio *); +extern blk_qc_t submit_bio(int, struct bio *); extern int bdev_read_only(struct block_device *); #endif extern int set_blocksize(struct block_device *, int); diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 5ebd70d12f35..69c9057e1ab8 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -426,7 +426,7 @@ static inline struct ppa_addr block_to_ppa(struct nvm_dev *dev, return ppa; } -typedef void (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); +typedef blk_qc_t (nvm_tgt_make_rq_fn)(struct request_queue *, struct bio *); typedef sector_t (nvm_tgt_capacity_fn)(void *); typedef int (nvm_tgt_end_io_fn)(struct nvm_rq *, int); typedef void *(nvm_tgt_init_fn)(struct nvm_dev *, struct gendisk *, int, int); -- cgit v1.2.3 From 05229beeddf7e75e2e616ddaad4b70e7fca9528d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 5 Nov 2015 10:44:55 -0700 Subject: block: add block polling support Add basic support for polling for specific IO to complete. This uses the cookie that blk-mq passes back, which enables the block layer to pass this cookie to the driver to spin for a specific request. This will be combined with request latency tracking, so we can make qualified decisions about when to poll and when not to. For now, for benchmark purposes, we add a sysfs file that controls whether polling is enabled or not. Signed-off-by: Jens Axboe Acked-by: Christoph Hellwig Acked-by: Keith Busch --- include/linux/blk-mq.h | 10 ++++++++++ include/linux/blkdev.h | 3 +++ 2 files changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 83cc9d4e5455..daf17d70aeca 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -59,6 +59,9 @@ struct blk_mq_hw_ctx { struct blk_mq_cpu_notifier cpu_notifier; struct kobject kobj; + + unsigned long poll_invoked; + unsigned long poll_success; }; struct blk_mq_tag_set { @@ -97,6 +100,8 @@ typedef void (exit_request_fn)(void *, struct request *, unsigned int, typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef void (busy_tag_iter_fn)(struct request *, void *, bool); +typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); + struct blk_mq_ops { /* @@ -114,6 +119,11 @@ struct blk_mq_ops { */ timeout_fn *timeout; + /* + * Called to poll for completion of a specific tag. + */ + poll_fn *poll; + softirq_done_fn *complete; /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5ee0f5243025..3fe27f8d91f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -487,6 +487,7 @@ struct request_queue { #define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_INIT_DONE 20 /* queue is initialized */ #define QUEUE_FLAG_NO_SG_MERGE 21 /* don't attempt to merge SG segments*/ +#define QUEUE_FLAG_POLL 22 /* IO polling enabled if set */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -814,6 +815,8 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); +bool blk_poll(struct request_queue *q, blk_qc_t cookie); + static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { return bdev->bd_disk->queue; /* this is never NULL */ -- cgit v1.2.3 From 5037835c1f3eabf4f22163fc0278dd87165f8957 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Mon, 5 Oct 2015 16:33:36 -0600 Subject: coredump: add DAX filtering for ELF coredumps Add two new flags to the existing coredump mechanism for ELF files to allow us to explicitly filter DAX mappings. This is desirable because DAX mappings, like hugetlb mappings, have the potential to be very large. Update the coredump_filter documentation in Documentation/filesystems/proc.txt so that it addresses the new DAX coredump flags. Also update the documented default value of coredump_filter to be consistent with the core(5) man page. The documentation being updated talks about bit 4, Dump ELF headers, which is enabled if CONFIG_CORE_DUMP_DEFAULT_ELF_HEADERS is turned on in the kernel config. This kernel config option defaults to "y" if both ELF binaries and coredump are enabled. Signed-off-by: Ross Zwisler Acked-by: Jeff Moyer Signed-off-by: Dan Williams --- include/linux/sched.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index b7b9501b41af..3c02d92ed23b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -483,9 +483,11 @@ static inline int get_dumpable(struct mm_struct *mm) #define MMF_DUMP_ELF_HEADERS 6 #define MMF_DUMP_HUGETLB_PRIVATE 7 #define MMF_DUMP_HUGETLB_SHARED 8 +#define MMF_DUMP_DAX_PRIVATE 9 +#define MMF_DUMP_DAX_SHARED 10 #define MMF_DUMP_FILTER_SHIFT MMF_DUMPABLE_BITS -#define MMF_DUMP_FILTER_BITS 7 +#define MMF_DUMP_FILTER_BITS 9 #define MMF_DUMP_FILTER_MASK \ (((1 << MMF_DUMP_FILTER_BITS) - 1) << MMF_DUMP_FILTER_SHIFT) #define MMF_DUMP_FILTER_DEFAULT \ -- cgit v1.2.3 From c8299cb605b27dd5a49f7a69e48fd23e5a206298 Mon Sep 17 00:00:00 2001 From: Michal Nazarewicz Date: Mon, 9 Nov 2015 14:58:10 -0800 Subject: kernel.h: make abs() work with 64-bit types For 64-bit arguments, the abs macro casts it to an int which leads to lost precision and may cause incorrect results. To deal with 64-bit types abs64 macro has been introduced but still there are places where abs macro is used incorrectly. To deal with the problem, expand abs macro such that it operates on s64 type when dealing with 64-bit types while still returning long when dealing with smaller types. This fixes one known bug (per John): The internal clocksteering done for fine-grained error correction uses a : logarithmic approximation, so any time adjtimex() adjusts the clock : steering, timekeeping_freqadjust() quickly approximates the correct clock : frequency over a series of ticks. : : Unfortunately, the logic in timekeeping_freqadjust(), introduced in commit : dc491596f639438 (Rework frequency adjustments to work better w/ nohz), : used the abs() function with a s64 error value to calculate the size of : the approximated adjustment to be made. : : Per include/linux/kernel.h: "abs() should not be used for 64-bit types : (s64, u64, long long) - use abs64()". : : Thus on 32-bit platforms, this resulted in the clocksteering to take a : quite dampended random walk trying to converge on the proper frequency, : which caused the adjustments to be made much slower then intended (most : easily observed when large adjustments are made). Signed-off-by: Michal Nazarewicz Reported-by: John Stultz Tested-by: John Stultz Cc: Ingo Molnar Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 45 ++++++++++++++++++++++++--------------------- 1 file changed, 24 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 2c13f747ac2e..05ce782d53ab 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -200,28 +200,31 @@ extern int _cond_resched(void); #define might_sleep_if(cond) do { if (cond) might_sleep(); } while (0) -/* - * abs() handles unsigned and signed longs, ints, shorts and chars. For all - * input types abs() returns a signed long. - * abs() should not be used for 64-bit types (s64, u64, long long) - use abs64() - * for those. +/** + * abs - return absolute value of an argument + * @x: the value. If it is unsigned type, it is converted to signed type first + * (s64, long or int depending on its size). + * + * Return: an absolute value of x. If x is 64-bit, macro's return type is s64, + * otherwise it is signed long. */ -#define abs(x) ({ \ - long ret; \ - if (sizeof(x) == sizeof(long)) { \ - long __x = (x); \ - ret = (__x < 0) ? -__x : __x; \ - } else { \ - int __x = (x); \ - ret = (__x < 0) ? -__x : __x; \ - } \ - ret; \ - }) - -#define abs64(x) ({ \ - s64 __x = (x); \ - (__x < 0) ? -__x : __x; \ - }) +#define abs(x) __builtin_choose_expr(sizeof(x) == sizeof(s64), ({ \ + s64 __x = (x); \ + (__x < 0) ? -__x : __x; \ + }), ({ \ + long ret; \ + if (sizeof(x) == sizeof(long)) { \ + long __x = (x); \ + ret = (__x < 0) ? -__x : __x; \ + } else { \ + int __x = (x); \ + ret = (__x < 0) ? -__x : __x; \ + } \ + ret; \ + })) + +/* Deprecated, use abs instead. */ +#define abs64(x) abs((s64)(x)) /** * reciprocal_scale - "scale" a value into range [0, ep_ro) -- cgit v1.2.3 From 79211c8ed19c055ca105502c8733800d442a0ae6 Mon Sep 17 00:00:00 2001 From: Andrew Morton Date: Mon, 9 Nov 2015 14:58:13 -0800 Subject: remove abs64() Switch everything to the new and more capable implementation of abs(). Mainly to give the new abs() a bit of a workout. Cc: Michal Nazarewicz Cc: John Stultz Cc: Ingo Molnar Cc: Steven Rostedt Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 05ce782d53ab..350dfb08aee3 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -223,9 +223,6 @@ extern int _cond_resched(void); ret; \ })) -/* Deprecated, use abs instead. */ -#define abs64(x) abs((s64)(x)) - /** * reciprocal_scale - "scale" a value into range [0, ep_ro) * @val: value -- cgit v1.2.3 From 77c5b5da02f0a30d61144a546c4ef3657e3b817d Mon Sep 17 00:00:00 2001 From: Nicolas Pitre Date: Mon, 9 Nov 2015 14:58:23 -0800 Subject: kmap_atomic_to_page() has no users, remove it Removal started in commit 5bbeed12bdc3 ("sparc32: drop unused kmap_atomic_to_page"). Let's do it across the whole tree. Signed-off-by: Nicolas Pitre Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/highmem.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/highmem.h b/include/linux/highmem.h index 6aefcd0031a6..bb3f3297062a 100644 --- a/include/linux/highmem.h +++ b/include/linux/highmem.h @@ -78,7 +78,6 @@ static inline void __kunmap_atomic(void *addr) } #define kmap_atomic_pfn(pfn) kmap_atomic(pfn_to_page(pfn)) -#define kmap_atomic_to_page(ptr) virt_to_page(ptr) #define kmap_flush_unused() do {} while(0) #endif -- cgit v1.2.3 From 7bc4f1d281bc1f807fd0c9aaa2f2d333b6508790 Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Mon, 9 Nov 2015 14:58:26 -0800 Subject: include/linux/kdev_t.h: remove unused huge_valid_dev() There's no user of huge_valid_dev() any more, so remove it. No functional change. Signed-off-by: Yaowei Bai Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kdev_t.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h index c838abe3ee0a..a546d206c7f3 100644 --- a/include/linux/kdev_t.h +++ b/include/linux/kdev_t.h @@ -54,11 +54,6 @@ static inline dev_t new_decode_dev(u32 dev) return MKDEV(major, minor); } -static inline int huge_valid_dev(dev_t dev) -{ - return 1; -} - static inline u64 huge_encode_dev(dev_t dev) { return new_encode_dev(dev); -- cgit v1.2.3 From 8b9758b9c6f65f55c94370636c04e976edc93e1a Mon Sep 17 00:00:00 2001 From: Yaowei Bai Date: Mon, 9 Nov 2015 14:58:28 -0800 Subject: include/linux/kdev_t.h: old/new_valid_dev() can return bool Make old/new_valid_dev return bool due to these two particular functions only using either one or zero as their return value. No functional change. Signed-off-by: Yaowei Bai Cc: Al Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/kdev_t.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kdev_t.h b/include/linux/kdev_t.h index a546d206c7f3..052c7b32cc91 100644 --- a/include/linux/kdev_t.h +++ b/include/linux/kdev_t.h @@ -20,7 +20,7 @@ }) /* acceptable for old filesystems */ -static inline int old_valid_dev(dev_t dev) +static inline bool old_valid_dev(dev_t dev) { return MAJOR(dev) < 256 && MINOR(dev) < 256; } @@ -35,7 +35,7 @@ static inline dev_t old_decode_dev(u16 val) return MKDEV((val >> 8) & 255, val & 255); } -static inline int new_valid_dev(dev_t dev) +static inline bool new_valid_dev(dev_t dev) { return 1; } -- cgit v1.2.3 From 35181e86df97e4223f4a28fb33e2bcf3b73de141 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Tue, 20 Oct 2015 15:39:03 +0800 Subject: KVM: x86: Add a common TSC scaling function VMX and SVM calculate the TSC scaling ratio in a similar logic, so this patch generalizes it to a common TSC scaling function. Signed-off-by: Haozhong Zhang [Inline the multiplication and shift steps into mul_u64_u64_shr. Remove BUG_ON. - Paolo] Signed-off-by: Paolo Bonzini --- include/linux/kvm_host.h | 1 + include/linux/math64.h | 51 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 52 insertions(+) (limited to 'include/linux') diff --git a/include/linux/kvm_host.h b/include/linux/kvm_host.h index 242a6d2b53ff..5706a2108f0a 100644 --- a/include/linux/kvm_host.h +++ b/include/linux/kvm_host.h @@ -1183,4 +1183,5 @@ void kvm_arch_irq_bypass_start(struct irq_bypass_consumer *); int kvm_arch_update_irqfd_routing(struct kvm *kvm, unsigned int host_irq, uint32_t guest_irq, bool set); #endif /* CONFIG_HAVE_KVM_IRQ_BYPASS */ + #endif diff --git a/include/linux/math64.h b/include/linux/math64.h index c45c089bfdac..44282ec7b682 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -142,6 +142,13 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) } #endif /* mul_u64_u32_shr */ +#ifndef mul_u64_u64_shr +static inline u64 mul_u64_u64_shr(u64 a, u64 mul, unsigned int shift) +{ + return (u64)(((unsigned __int128)a * mul) >> shift); +} +#endif /* mul_u64_u64_shr */ + #else #ifndef mul_u64_u32_shr @@ -161,6 +168,50 @@ static inline u64 mul_u64_u32_shr(u64 a, u32 mul, unsigned int shift) } #endif /* mul_u64_u32_shr */ +#ifndef mul_u64_u64_shr +static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift) +{ + union { + u64 ll; + struct { +#ifdef __BIG_ENDIAN + u32 high, low; +#else + u32 low, high; +#endif + } l; + } rl, rm, rn, rh, a0, b0; + u64 c; + + a0.ll = a; + b0.ll = b; + + rl.ll = (u64)a0.l.low * b0.l.low; + rm.ll = (u64)a0.l.low * b0.l.high; + rn.ll = (u64)a0.l.high * b0.l.low; + rh.ll = (u64)a0.l.high * b0.l.high; + + /* + * Each of these lines computes a 64-bit intermediate result into "c", + * starting at bits 32-95. The low 32-bits go into the result of the + * multiplication, the high 32-bits are carried into the next step. + */ + rl.l.high = c = (u64)rl.l.high + rm.l.low + rn.l.low; + rh.l.low = c = (c >> 32) + rm.l.high + rn.l.high + rh.l.low; + rh.l.high = (c >> 32) + rh.l.high; + + /* + * The 128-bit result of the multiplication is in rl.ll and rh.ll, + * shift it right and throw away the high part of the result. + */ + if (shift == 0) + return rl.ll; + if (shift < 64) + return (rl.ll >> shift) | (rh.ll << (64 - shift)); + return rh.ll >> (shift & 63); +} +#endif /* mul_u64_u64_shr */ + #endif #endif /* _LINUX_MATH64_H */ -- cgit v1.2.3 From 381d585c80e34988269bd7901ad910981e900be1 Mon Sep 17 00:00:00 2001 From: Haozhong Zhang Date: Tue, 20 Oct 2015 15:39:04 +0800 Subject: KVM: x86: Replace call-back set_tsc_khz() with a common function Both VMX and SVM propagate virtual_tsc_khz in the same way, so this patch removes the call-back set_tsc_khz() and replaces it with a common function. Signed-off-by: Haozhong Zhang Signed-off-by: Paolo Bonzini --- include/linux/math64.h | 29 +++++++++++++++++++++++++++++ 1 file changed, 29 insertions(+) (limited to 'include/linux') diff --git a/include/linux/math64.h b/include/linux/math64.h index 44282ec7b682..6e8b5b270ffe 100644 --- a/include/linux/math64.h +++ b/include/linux/math64.h @@ -214,4 +214,33 @@ static inline u64 mul_u64_u64_shr(u64 a, u64 b, unsigned int shift) #endif +#ifndef mul_u64_u32_div +static inline u64 mul_u64_u32_div(u64 a, u32 mul, u32 divisor) +{ + union { + u64 ll; + struct { +#ifdef __BIG_ENDIAN + u32 high, low; +#else + u32 low, high; +#endif + } l; + } u, rl, rh; + + u.ll = a; + rl.ll = (u64)u.l.low * mul; + rh.ll = (u64)u.l.high * mul + rl.l.high; + + /* Bits 32-63 of the result will be in rh.l.low. */ + rl.l.high = do_div(rh.ll, divisor); + + /* Bits 0-31 of the result will be in rl.l.low. */ + do_div(rl.ll, divisor); + + rl.l.high = rh.l.low; + return rl.ll; +} +#endif /* mul_u64_u32_div */ + #endif /* _LINUX_MATH64_H */ -- cgit v1.2.3 From f70cd6b07e629f367bb9b1ac9d0e3e669eb325c0 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 02:39:55 +0100 Subject: context_tracking: remove duplicate enabled check All calls to context_tracking_enter and context_tracking_exit are already checking context_tracking_is_enabled, except the context_tracking_user_enter and context_tracking_user_exit functions left in for the benefit of assembly calls. Pull the check up to those functions, by making them simple wrappers around the user_enter and user_exit inline functions. Cc: Frederic Weisbecker Cc: Paul McKenney Reviewed-by: Rik van Riel Tested-by: Rik van Riel Acked-by: Andy Lutomirski Signed-off-by: Paolo Bonzini --- include/linux/context_tracking.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 008fc67d0d96..6ef136ff0897 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -18,13 +18,13 @@ extern void context_tracking_user_exit(void); static inline void user_enter(void) { if (context_tracking_is_enabled()) - context_tracking_user_enter(); + context_tracking_enter(CONTEXT_USER); } static inline void user_exit(void) { if (context_tracking_is_enabled()) - context_tracking_user_exit(); + context_tracking_exit(CONTEXT_USER); } static inline enum ctx_state exception_enter(void) -- cgit v1.2.3 From d0e536d89395ecd8ab78fe999dc4d6f5d140ce46 Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Wed, 28 Oct 2015 02:39:56 +0100 Subject: context_tracking: avoid irq_save/irq_restore on guest entry and exit guest_enter and guest_exit must be called with interrupts disabled, since they take the vtime_seqlock with write_seq{lock,unlock}. Therefore, it is not necessary to check for exceptions, nor to save/restore the IRQ state, when context tracking functions are called by guest_enter and guest_exit. Split the body of context_tracking_entry and context_tracking_exit out to __-prefixed functions, and use them from KVM. Rik van Riel has measured this to speed up a tight vmentry/vmexit loop by about 2%. Cc: Andy Lutomirski Cc: Frederic Weisbecker Cc: Paul McKenney Reviewed-by: Rik van Riel Tested-by: Rik van Riel Signed-off-by: Paolo Bonzini --- include/linux/context_tracking.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/context_tracking.h b/include/linux/context_tracking.h index 6ef136ff0897..68b575afe5f5 100644 --- a/include/linux/context_tracking.h +++ b/include/linux/context_tracking.h @@ -10,6 +10,10 @@ #ifdef CONFIG_CONTEXT_TRACKING extern void context_tracking_cpu_set(int cpu); +/* Called with interrupts disabled. */ +extern void __context_tracking_enter(enum ctx_state state); +extern void __context_tracking_exit(enum ctx_state state); + extern void context_tracking_enter(enum ctx_state state); extern void context_tracking_exit(enum ctx_state state); extern void context_tracking_user_enter(void); @@ -88,13 +92,13 @@ static inline void guest_enter(void) current->flags |= PF_VCPU; if (context_tracking_is_enabled()) - context_tracking_enter(CONTEXT_GUEST); + __context_tracking_enter(CONTEXT_GUEST); } static inline void guest_exit(void) { if (context_tracking_is_enabled()) - context_tracking_exit(CONTEXT_GUEST); + __context_tracking_exit(CONTEXT_GUEST); if (vtime_accounting_enabled()) vtime_guest_exit(current); -- cgit v1.2.3 From d1cd21427747f15920cd726f5f67a07880e7dee4 Mon Sep 17 00:00:00 2001 From: Jonathan Richardson Date: Fri, 16 Oct 2015 17:40:58 -0700 Subject: pwm: Set enable state properly on failed call to enable The pwm_enable() function didn't clear the enabled bit if a call to the driver's ->enable() callback returned an error. The result was that the state of the PWM core was wrong. Clearing the bit when enable returns an error ensures the state is properly set. Tested-by: Jonathan Richardson Reviewed-by: Dmitry Torokhov Signed-off-by: Jonathan Richardson [thierry.reding@gmail.com: add missing kerneldoc for the lock] Signed-off-by: Thierry Reding --- include/linux/pwm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/pwm.h b/include/linux/pwm.h index d681f6875aef..cfc3ed46cad2 100644 --- a/include/linux/pwm.h +++ b/include/linux/pwm.h @@ -2,6 +2,7 @@ #define __LINUX_PWM_H #include +#include #include struct pwm_device; @@ -87,6 +88,7 @@ enum { * @pwm: global index of the PWM device * @chip: PWM chip providing this PWM device * @chip_data: chip-private data associated with the PWM device + * @lock: used to serialize accesses to the PWM device where necessary * @period: period of the PWM signal (in nanoseconds) * @duty_cycle: duty cycle of the PWM signal (in nanoseconds) * @polarity: polarity of the PWM signal @@ -98,6 +100,7 @@ struct pwm_device { unsigned int pwm; struct pwm_chip *chip; void *chip_data; + struct mutex lock; unsigned int period; unsigned int duty_cycle; -- cgit v1.2.3 From aabc92bbe3cfe4c545f8ccdaaeeea012a46f0abf Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 10 Nov 2015 14:31:18 +0100 Subject: net: add __netdev_alloc_pcpu_stats() to indicate gfp flags nf_tables may create percpu counters from the packet path through its dynamic set instantiation infrastructure, so we need a way to allocate this through GFP_ATOMIC. Signed-off-by: Pablo Neira Ayuso Acked-by: David S. Miller --- include/linux/netdevice.h | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 2c00772bd136..e9d0c8a75380 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2068,20 +2068,23 @@ struct pcpu_sw_netstats { struct u64_stats_sync syncp; }; -#define netdev_alloc_pcpu_stats(type) \ -({ \ - typeof(type) __percpu *pcpu_stats = alloc_percpu(type); \ - if (pcpu_stats) { \ - int __cpu; \ - for_each_possible_cpu(__cpu) { \ - typeof(type) *stat; \ - stat = per_cpu_ptr(pcpu_stats, __cpu); \ - u64_stats_init(&stat->syncp); \ - } \ - } \ - pcpu_stats; \ +#define __netdev_alloc_pcpu_stats(type, gfp) \ +({ \ + typeof(type) __percpu *pcpu_stats = alloc_percpu_gfp(type, gfp);\ + if (pcpu_stats) { \ + int __cpu; \ + for_each_possible_cpu(__cpu) { \ + typeof(type) *stat; \ + stat = per_cpu_ptr(pcpu_stats, __cpu); \ + u64_stats_init(&stat->syncp); \ + } \ + } \ + pcpu_stats; \ }) +#define netdev_alloc_pcpu_stats(type) \ + __netdev_alloc_pcpu_stats(type, GFP_KERNEL); + #include /* netdevice notifier chain. Please remember to update the rtnetlink -- cgit v1.2.3 From b1d06b60e90cd5016798b9984f8e420e753f4846 Mon Sep 17 00:00:00 2001 From: Guenter Roeck Date: Fri, 6 Nov 2015 19:28:22 -0800 Subject: of: Provide static inline function for of_translate_address if needed If OF_ADDRESS is not configured, builds can fail with errors such as drivers/net/ethernet/hisilicon/hns_mdio.c: In function 'hns_mdio_bus_name': drivers/net/ethernet/hisilicon/hns_mdio.c:411:3: error: implicit declaration of function 'of_translate_address' as currently seen when building sparc:allmodconfig. Introduce a static inline function if OF_ADDRESS is not configured to fix the build failure. Return OF_BAD_ADDR in this case. For this to work, the definition of OF_BAD_ADDR has to be moved outside CONFIG_OF conditional code. Fixes: 876133d3161d ("net: hisilicon: add OF dependency") Cc: Arnd Bergmann Signed-off-by: Guenter Roeck Reviewed-by: Arnd Bergmann Reviewed-by: Frank Rowand Signed-off-by: Rob Herring --- include/linux/of.h | 4 ++-- include/linux/of_address.h | 7 +++++++ 2 files changed, 9 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 2194b8ca41f9..dd10626a615f 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -126,6 +126,8 @@ extern raw_spinlock_t devtree_lock; #define OF_POPULATED 3 /* device already created for the node */ #define OF_POPULATED_BUS 4 /* of_platform_populate recursed to children of this node */ +#define OF_BAD_ADDR ((u64)-1) + #ifdef CONFIG_OF void of_core_init(void); @@ -229,8 +231,6 @@ static inline unsigned long of_read_ulong(const __be32 *cell, int size) #define OF_IS_DYNAMIC(x) test_bit(OF_DYNAMIC, &x->_flags) #define OF_MARK_DYNAMIC(x) set_bit(OF_DYNAMIC, &x->_flags) -#define OF_BAD_ADDR ((u64)-1) - static inline const char *of_node_full_name(const struct device_node *np) { return np ? np->full_name : ""; diff --git a/include/linux/of_address.h b/include/linux/of_address.h index d88e81be6368..507daad0bc8d 100644 --- a/include/linux/of_address.h +++ b/include/linux/of_address.h @@ -57,6 +57,13 @@ extern int of_dma_get_range(struct device_node *np, u64 *dma_addr, u64 *paddr, u64 *size); extern bool of_dma_is_coherent(struct device_node *np); #else /* CONFIG_OF_ADDRESS */ + +static inline u64 of_translate_address(struct device_node *np, + const __be32 *addr) +{ + return OF_BAD_ADDR; +} + static inline struct device_node *of_find_matching_node_by_address( struct device_node *from, const struct of_device_id *matches, -- cgit v1.2.3 From 5c50002963369c7c622b18ff751719eadbe225c5 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Tue, 13 Oct 2015 16:51:02 -0600 Subject: vfs: remove unused wrapper block_page_mkwrite() The function currently called "__block_page_mkwrite()" used to be called "block_page_mkwrite()" until a wrapper for this function was added by: commit 24da4fab5a61 ("vfs: Create __block_page_mkwrite() helper passing error values back") This wrapper, the current "block_page_mkwrite()", is currently unused. __block_page_mkwrite() is used directly by ext4, nilfs2 and xfs. Remove the unused wrapper, rename __block_page_mkwrite() back to block_page_mkwrite() and update the comment above block_page_mkwrite(). Signed-off-by: Ross Zwisler Reviewed-by: Jan Kara Cc: Jan Kara Cc: Christoph Hellwig Cc: Al Viro Signed-off-by: Al Viro --- include/linux/buffer_head.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index e6797ded700e..89d9aa9e79bf 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -227,8 +227,6 @@ int cont_write_begin(struct file *, struct address_space *, loff_t, get_block_t *, loff_t *); int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); -int __block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, - get_block_t get_block); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); /* Convert errno to return value from ->page_mkwrite() call */ -- cgit v1.2.3 From c8fffa643583e00eb9a783abbca251b11bc0d163 Mon Sep 17 00:00:00 2001 From: Ross Zwisler Date: Thu, 8 Oct 2015 17:07:20 -0600 Subject: vfs: remove stale comment in inode_operations The big warning comment that is currently at the end of struct inode_operations was added as part of this commit: 4aa7c6346be3 ("vfs: add i_op->dentry_open()") It was added to warn people not to use the newly added 'dentry_open' function pointer. This function pointer was removed as part of this commit: 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay") The comment was left behind and now refers to nothing, so remove it. Signed-off-by: Ross Zwisler Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9a1cb8c605e0..f3bfbd7d3fa9 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1665,8 +1665,6 @@ struct inode_operations { umode_t create_mode, int *opened); int (*tmpfile) (struct inode *, struct dentry *, umode_t); int (*set_acl)(struct inode *, struct posix_acl *, int); - - /* WARNING: probably going away soon, do not use! */ } ____cacheline_aligned; ssize_t rw_copy_check_uvector(int type, const struct iovec __user * uvector, -- cgit v1.2.3 From c0a9f72c156baf1e88c33c6ba4450647af1b8804 Mon Sep 17 00:00:00 2001 From: Alex Smith Date: Mon, 12 Oct 2015 10:40:43 +0100 Subject: irqchip: irq-mips-gic: Provide function to map GIC user section The GIC provides a "user-mode visible" section containing a mirror of the counter registers which can be mapped into user memory. This will be used by the VDSO time function implementations, so provide a function to map it in. When the GIC is not enabled in Kconfig a dummy inline version of this function is provided, along with "#define gic_present 0", so that we don't have to litter the VDSO code with ifdefs. [markos.chandras@imgtec.com: - Move mapping code to arch/mips/kernel/vdso.c and use a resource type to get the GIC usermode information - Avoid renaming function arguments and use __gic_base_addr to hold the base GIC address prior to ioremap.] [ralf@linux-mips.org: Fix up gic_get_usm_range() to compile and make inline again.] Signed-off-by: Alex Smith Signed-off-by: Markos Chandras Reviewed-by: Marc Zyngier Cc: Thomas Gleixner Cc: Jason Cooper Cc: Marc Zyngier Cc: Alex Smith Cc: Markos Chandras Cc: linux-kernel@vger.kernel.org Cc: linux-mips@linux-mips.org Patchwork: http://patchwork.linux-mips.org/patch/11281/ Signed-off-by: Ralf Baechle --- include/linux/irqchip/mips-gic.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/irqchip/mips-gic.h b/include/linux/irqchip/mips-gic.h index 4e6861605050..ce824db48d64 100644 --- a/include/linux/irqchip/mips-gic.h +++ b/include/linux/irqchip/mips-gic.h @@ -9,6 +9,7 @@ #define __LINUX_IRQCHIP_MIPS_GIC_H #include +#include #define GIC_MAX_INTRS 256 @@ -245,6 +246,8 @@ #define GIC_SHARED_TO_HWIRQ(x) (GIC_SHARED_HWIRQ_BASE + (x)) #define GIC_HWIRQ_TO_SHARED(x) ((x) - GIC_SHARED_HWIRQ_BASE) +#ifdef CONFIG_MIPS_GIC + extern unsigned int gic_present; extern void gic_init(unsigned long gic_base_addr, @@ -264,4 +267,18 @@ extern unsigned int plat_ipi_resched_int_xlate(unsigned int); extern int gic_get_c0_compare_int(void); extern int gic_get_c0_perfcount_int(void); extern int gic_get_c0_fdc_int(void); +extern int gic_get_usm_range(struct resource *gic_usm_res); + +#else /* CONFIG_MIPS_GIC */ + +#define gic_present 0 + +static inline int gic_get_usm_range(struct resource *gic_usm_res) +{ + /* Shouldn't be called. */ + return -1; +} + +#endif /* CONFIG_MIPS_GIC */ + #endif /* __LINUX_IRQCHIP_MIPS_GIC_H */ -- cgit v1.2.3 From e3a7a3bf362e2a8acc301e5eaec2631e740a8a95 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 11 Nov 2015 09:37:34 -0700 Subject: block: don't hardcode blk_qc_t -> tag mask Use the shift/mask we use elsewhere. Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 641e5a3ed58c..0fb65843ec1e 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -265,7 +265,7 @@ static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) static inline unsigned int blk_qc_t_to_tag(blk_qc_t cookie) { - return cookie & 0xffff; + return cookie & ((1u << BLK_QC_T_SHIFT) - 1); } #endif /* __LINUX_BLK_TYPES_H */ -- cgit v1.2.3 From e409de992e3ea3674393465f07cc71c948edd87a Mon Sep 17 00:00:00 2001 From: Andreas Gruenbacher Date: Sun, 4 Oct 2015 19:18:52 +0200 Subject: 9p: xattr simplifications Now that the xattr handler is passed to the xattr handler operations, we can use the same get and set operations for the user, trusted, and security xattr namespaces. In those namespaces, we can access the full attribute name by "reattaching" the name prefix the vfs has skipped for us. Add a xattr_full_name helper to make this obvious in the code. For the "system.posix_acl_access" and "system.posix_acl_default" attributes, handler->prefix is the full attribute name; the suffix is the empty string. Signed-off-by: Andreas Gruenbacher Cc: Eric Van Hensbergen Cc: Ron Minnich Cc: Latchesar Ionkov Cc: v9fs-developer@lists.sourceforge.net Signed-off-by: Al Viro --- include/linux/xattr.h | 18 +++++++++++------- 1 file changed, 11 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/xattr.h b/include/linux/xattr.h index 91b0a68d38dc..89474b9d260c 100644 --- a/include/linux/xattr.h +++ b/include/linux/xattr.h @@ -21,15 +21,19 @@ struct dentry; struct xattr_handler { const char *prefix; - int flags; /* fs private flags passed back to the handlers */ - size_t (*list)(struct dentry *dentry, char *list, size_t list_size, - const char *name, size_t name_len, int handler_flags); - int (*get)(struct dentry *dentry, const char *name, void *buffer, - size_t size, int handler_flags); - int (*set)(struct dentry *dentry, const char *name, const void *buffer, - size_t size, int flags, int handler_flags); + int flags; /* fs private flags */ + size_t (*list)(const struct xattr_handler *, struct dentry *dentry, + char *list, size_t list_size, const char *name, + size_t name_len); + int (*get)(const struct xattr_handler *, struct dentry *dentry, + const char *name, void *buffer, size_t size); + int (*set)(const struct xattr_handler *, struct dentry *dentry, + const char *name, const void *buffer, size_t size, + int flags); }; +const char *xattr_full_name(const struct xattr_handler *, const char *); + struct xattr { const char *name; void *value; -- cgit v1.2.3 From 66189961e986e53ae39822898fc2ce88f44c61bb Mon Sep 17 00:00:00 2001 From: Tariq Toukan Date: Thu, 12 Nov 2015 19:35:26 +0200 Subject: net/mlx5e: Added self loopback prevention Prevent outgoing multicast frames from looping back to the RX queue. By introducing new HW capability self_lb_en_modifiable, which indicates the support to modify self_lb_en bit in modify_tir command. When this capability is set we can prevent TIRs from sending back loopback multicast traffic to their own RQs, by "refreshing TIRs" with modify_tir command, on every time new channels (SQs/RQs) are created at device open. This is needed since TIRs are static and only allocated once on driver load, and the loopback decision is under their responsibility. Fixes issues of the kind: "IPv6: eth2: IPv6 duplicate address fe80::e61d:2dff:fe5c:f2e9 detected!" The issue is seen since the IPv6 solicitations multicast messages are loopedback and the network stack thinks they are coming from another host. Fixes: 5c50368f3831 ("net/mlx5e: Light-weight netdev open/stop") Signed-off-by: Tariq Toukan Signed-off-by: Saeed Mahameed Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx5/mlx5_ifc.h | 24 ++++++++++++++---------- 1 file changed, 14 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/mlx5_ifc.h b/include/linux/mlx5/mlx5_ifc.h index dd2097455a2e..1565324eb620 100644 --- a/include/linux/mlx5/mlx5_ifc.h +++ b/include/linux/mlx5/mlx5_ifc.h @@ -453,26 +453,28 @@ struct mlx5_ifc_per_protocol_networking_offload_caps_bits { u8 lro_cap[0x1]; u8 lro_psh_flag[0x1]; u8 lro_time_stamp[0x1]; - u8 reserved_0[0x6]; + u8 reserved_0[0x3]; + u8 self_lb_en_modifiable[0x1]; + u8 reserved_1[0x2]; u8 max_lso_cap[0x5]; - u8 reserved_1[0x4]; + u8 reserved_2[0x4]; u8 rss_ind_tbl_cap[0x4]; - u8 reserved_2[0x3]; + u8 reserved_3[0x3]; u8 tunnel_lso_const_out_ip_id[0x1]; - u8 reserved_3[0x2]; + u8 reserved_4[0x2]; u8 tunnel_statless_gre[0x1]; u8 tunnel_stateless_vxlan[0x1]; - u8 reserved_4[0x20]; + u8 reserved_5[0x20]; - u8 reserved_5[0x10]; + u8 reserved_6[0x10]; u8 lro_min_mss_size[0x10]; - u8 reserved_6[0x120]; + u8 reserved_7[0x120]; u8 lro_timer_supported_periods[4][0x20]; - u8 reserved_7[0x600]; + u8 reserved_8[0x600]; }; struct mlx5_ifc_roce_cap_bits { @@ -4051,9 +4053,11 @@ struct mlx5_ifc_modify_tis_in_bits { }; struct mlx5_ifc_modify_tir_bitmask_bits { - u8 reserved[0x20]; + u8 reserved_0[0x20]; - u8 reserved1[0x1f]; + u8 reserved_1[0x1b]; + u8 self_lb_en[0x1]; + u8 reserved_2[0x3]; u8 lro[0x1]; }; -- cgit v1.2.3 From 500404ebcbd074ca11aa0c3fd9a268aa4054fd8b Mon Sep 17 00:00:00 2001 From: Peter Ujfalusi Date: Tue, 3 Nov 2015 12:28:10 +0200 Subject: dmaengine: of_dma: Correct return code for of_dma_request_slave_channel in case !CONFIG_OF of_dma_request_slave_channel should return either pointer for valid dma_chan or ERR_PTR() error code, NULL is not expected to be returned. Signed-off-by: Peter Ujfalusi Acked-by: Arnd Bergmann Signed-off-by: Vinod Koul --- include/linux/of_dma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_dma.h b/include/linux/of_dma.h index 36112cdd665a..b90d8ec57c1f 100644 --- a/include/linux/of_dma.h +++ b/include/linux/of_dma.h @@ -80,7 +80,7 @@ static inline int of_dma_router_register(struct device_node *np, static inline struct dma_chan *of_dma_request_slave_channel(struct device_node *np, const char *name) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct dma_chan *of_dma_simple_xlate(struct of_phandle_args *dma_spec, -- cgit v1.2.3 From 28f9ee22bcdd84726dbf6267d0b58f254166b900 Mon Sep 17 00:00:00 2001 From: Vlad Yasevich Date: Mon, 16 Nov 2015 15:43:45 -0500 Subject: vlan: Do not put vlan headers back on bridge and macvlan ports When a vlan is configured with REORDER_HEADER set to 0, the vlan header is put back into the packet and makes it appear that the vlan header is still there even after it's been processed. This posses a problem for bridge and macvlan ports. The packets passed to those device may be forwarded and at the time of the forward, vlan headers end up being unexpectedly present. With the patch, we make sure that we do not put the vlan header back (when REORDER_HEADER is 0) if a bridge or macvlan has been configured on top of the vlan device. Signed-off-by: Vladislav Yasevich Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cc221b967687..67bfac1abfc1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -3857,6 +3857,11 @@ static inline bool netif_is_bridge_master(const struct net_device *dev) return dev->priv_flags & IFF_EBRIDGE; } +static inline bool netif_is_bridge_port(const struct net_device *dev) +{ + return dev->priv_flags & IFF_BRIDGE_PORT; +} + static inline bool netif_is_ovs_master(const struct net_device *dev) { return dev->priv_flags & IFF_OPENVSWITCH; -- cgit v1.2.3 From 819ec8e1f349f73bdf65bf33a364538e59007a9a Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Mon, 16 Nov 2015 23:34:41 +0100 Subject: phy: marvell: Add support for 88E1540 PHY The 88E1540 can be found embedded in the Marvell 88E6352 switch. It is compatible with the 88E1510, so add support for it, using the 88E1510 specific functions. Signed-off-by: Andrew Lunn Signed-off-by: David S. Miller --- include/linux/marvell_phy.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/marvell_phy.h b/include/linux/marvell_phy.h index e6982ac3200d..a57f0dfb6db7 100644 --- a/include/linux/marvell_phy.h +++ b/include/linux/marvell_phy.h @@ -16,6 +16,7 @@ #define MARVELL_PHY_ID_88E1318S 0x01410e90 #define MARVELL_PHY_ID_88E1116R 0x01410e40 #define MARVELL_PHY_ID_88E1510 0x01410dd0 +#define MARVELL_PHY_ID_88E1540 0x01410eb0 #define MARVELL_PHY_ID_88E3016 0x01410e60 /* struct phy_device dev_flags definitions */ -- cgit v1.2.3 From 2e6edc95382cc36423aff18a237173ad62d5ab52 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Thu, 19 Nov 2015 13:29:28 -0800 Subject: block: protect rw_page against device teardown Fix use after free crashes like the following: general protection fault: 0000 [#1] SMP Call Trace: [] ? pmem_do_bvec.isra.12+0xa6/0xf0 [nd_pmem] [] pmem_rw_page+0x42/0x80 [nd_pmem] [] bdev_read_page+0x50/0x60 [] do_mpage_readpage+0x510/0x770 [] ? I_BDEV+0x20/0x20 [] ? lru_cache_add+0x1c/0x50 [] mpage_readpages+0x107/0x170 [] ? I_BDEV+0x20/0x20 [] ? I_BDEV+0x20/0x20 [] blkdev_readpages+0x1d/0x20 [] __do_page_cache_readahead+0x28f/0x310 [] ? __do_page_cache_readahead+0x169/0x310 [] ? pagecache_get_page+0x2d/0x1d0 [] filemap_fault+0x396/0x530 [] __do_fault+0x4e/0xf0 [] handle_mm_fault+0x11bd/0x1b50 Cc: Cc: Jens Axboe Cc: Alexander Viro Reported-by: kbuild test robot Acked-by: Matthew Wilcox [willy: symmetry fixups] Signed-off-by: Dan Williams --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3fe27f8d91f0..c0d2b7927c1f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -794,6 +794,8 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); +extern int blk_queue_enter(struct request_queue *q, gfp_t gfp); +extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -- cgit v1.2.3 From 94a58c360a45c066ab5472cfd2bf2a4ba63aa532 Mon Sep 17 00:00:00 2001 From: Rasmus Villemoes Date: Fri, 20 Nov 2015 15:56:48 -0800 Subject: slab.h: sprinkle __assume_aligned attributes The various allocators return aligned memory. Telling the compiler that allows it to generate better code in many cases, for example when the return value is immediately passed to memset(). Some code does become larger, but at least we win twice as much as we lose: $ scripts/bloat-o-meter /tmp/vmlinux vmlinux add/remove: 0/0 grow/shrink: 13/52 up/down: 995/-2140 (-1145) An example of the different (and smaller) code can be seen in mm_alloc(). Before: : 48 8d 78 08 lea 0x8(%rax),%rdi : 48 89 c1 mov %rax,%rcx : 48 89 c2 mov %rax,%rdx : 48 c7 00 00 00 00 00 movq $0x0,(%rax) : 48 c7 80 48 03 00 00 movq $0x0,0x348(%rax) : 00 00 00 00 : 31 c0 xor %eax,%eax : 48 83 e7 f8 and $0xfffffffffffffff8,%rdi : 48 29 f9 sub %rdi,%rcx : 81 c1 50 03 00 00 add $0x350,%ecx : c1 e9 03 shr $0x3,%ecx : f3 48 ab rep stos %rax,%es:(%rdi) After: : 48 89 c2 mov %rax,%rdx : b9 6a 00 00 00 mov $0x6a,%ecx : 31 c0 xor %eax,%eax : 48 89 d7 mov %rdx,%rdi : f3 48 ab rep stos %rax,%es:(%rdi) So gcc's strategy is to do two possibly (but not really, of course) unaligned stores to the first and last word, then do an aligned rep stos covering the middle part with a little overlap. Maybe arches which do not allow unaligned stores gain even more. I don't know if gcc can actually make use of alignments greater than 8 for anything, so one could probably drop the __assume_xyz_alignment macros and just use __assume_aligned(8). The increases in code size are mostly caused by gcc deciding to opencode strlen() using the check-four-bytes-at-a-time trick when it knows the buffer is sufficiently aligned (one function grew by 200 bytes). Now it turns out that many of these strlen() calls showing up were in fact redundant, and they're gone from -next. Applying the two patches to next-20151001 bloat-o-meter instead says add/remove: 0/0 grow/shrink: 6/52 up/down: 244/-2140 (-1896) Signed-off-by: Rasmus Villemoes Acked-by: Christoph Lameter Cc: David Rientjes Cc: Pekka Enberg Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 43 ++++++++++++++++++++++++++----------------- 1 file changed, 26 insertions(+), 17 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 7c82e3b307a3..96940772bb92 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -157,6 +157,24 @@ size_t ksize(const void *); #define ARCH_KMALLOC_MINALIGN __alignof__(unsigned long long) #endif +/* + * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. + * Intended for arches that get misalignment faults even for 64 bit integer + * aligned buffers. + */ +#ifndef ARCH_SLAB_MINALIGN +#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) +#endif + +/* + * kmalloc and friends return ARCH_KMALLOC_MINALIGN aligned + * pointers. kmem_cache_alloc and friends return ARCH_SLAB_MINALIGN + * aligned pointers. + */ +#define __assume_kmalloc_alignment __assume_aligned(ARCH_KMALLOC_MINALIGN) +#define __assume_slab_alignment __assume_aligned(ARCH_SLAB_MINALIGN) +#define __assume_page_alignment __assume_aligned(PAGE_SIZE) + /* * Kmalloc array related definitions */ @@ -286,8 +304,8 @@ static __always_inline int kmalloc_index(size_t size) } #endif /* !CONFIG_SLOB */ -void *__kmalloc(size_t size, gfp_t flags); -void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags); +void *__kmalloc(size_t size, gfp_t flags) __assume_kmalloc_alignment; +void *kmem_cache_alloc(struct kmem_cache *, gfp_t flags) __assume_slab_alignment; void kmem_cache_free(struct kmem_cache *, void *); /* @@ -301,8 +319,8 @@ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_NUMA -void *__kmalloc_node(size_t size, gfp_t flags, int node); -void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node); +void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; +void *kmem_cache_alloc_node(struct kmem_cache *, gfp_t flags, int node) __assume_slab_alignment; #else static __always_inline void *__kmalloc_node(size_t size, gfp_t flags, int node) { @@ -316,12 +334,12 @@ static __always_inline void *kmem_cache_alloc_node(struct kmem_cache *s, gfp_t f #endif #ifdef CONFIG_TRACING -extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t); +extern void *kmem_cache_alloc_trace(struct kmem_cache *, gfp_t, size_t) __assume_slab_alignment; #ifdef CONFIG_NUMA extern void *kmem_cache_alloc_node_trace(struct kmem_cache *s, gfp_t gfpflags, - int node, size_t size); + int node, size_t size) __assume_slab_alignment; #else static __always_inline void * kmem_cache_alloc_node_trace(struct kmem_cache *s, @@ -354,10 +372,10 @@ kmem_cache_alloc_node_trace(struct kmem_cache *s, } #endif /* CONFIG_TRACING */ -extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order); +extern void *kmalloc_order(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment; #ifdef CONFIG_TRACING -extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order); +extern void *kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) __assume_page_alignment; #else static __always_inline void * kmalloc_order_trace(size_t size, gfp_t flags, unsigned int order) @@ -482,15 +500,6 @@ static __always_inline void *kmalloc_node(size_t size, gfp_t flags, int node) return __kmalloc_node(size, flags, node); } -/* - * Setting ARCH_SLAB_MINALIGN in arch headers allows a different alignment. - * Intended for arches that get misalignment faults even for 64 bit integer - * aligned buffers. - */ -#ifndef ARCH_SLAB_MINALIGN -#define ARCH_SLAB_MINALIGN __alignof__(unsigned long long) -#endif - struct memcg_cache_array { struct rcu_head rcu; struct kmem_cache *entries[0]; -- cgit v1.2.3 From 5cf6a51e6062afe7cc507f32f1e5f7e6497ae844 Mon Sep 17 00:00:00 2001 From: Daniel Baluta Date: Fri, 20 Nov 2015 15:56:53 -0800 Subject: configfs: allow dynamic group creation This patchset introduces IIO software triggers, offers a way of configuring them via configfs and adds the IIO hrtimer based interrupt source to be used with software triggers. The architecture is now split in 3 parts, to remove all IIO trigger specific parts from IIO configfs core: (1) IIO configfs - creates the root of the IIO configfs subsys. (2) IIO software triggers - software trigger implementation, dynamically creating /config/iio/triggers group. (3) IIO hrtimer trigger - is the first interrupt source for software triggers (with syfs to follow). Each trigger type can implement its own set of attributes. Lockdep seems to be happy with the locking in configfs patch. This patch (of 5): We don't want to hardcode default groups at subsystem creation time. We export: * configfs_register_group * configfs_unregister_group to allow drivers to programatically create/destroy groups later, after module init time. This is needed for IIO configfs support. (akpm: the other 4 patches to be merged via the IIO tree) Signed-off-by: Daniel Baluta Suggested-by: Lars-Peter Clausen Reviewed-by: Christoph Hellwig Acked-by: Joel Becker Cc: Hartmut Knaack Cc: Octavian Purdila Cc: Paul Bolle Cc: Adriana Reus Cc: Cristina Opriceana Cc: Peter Meerwald Cc: Alexander Viro Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/configfs.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index a8a335b7fce0..758a029011b1 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -197,6 +197,16 @@ static inline struct configfs_subsystem *to_configfs_subsystem(struct config_gro int configfs_register_subsystem(struct configfs_subsystem *subsys); void configfs_unregister_subsystem(struct configfs_subsystem *subsys); +int configfs_register_group(struct config_group *parent_group, + struct config_group *group); +void configfs_unregister_group(struct config_group *group); + +struct config_group * +configfs_register_default_group(struct config_group *parent_group, + const char *name, + struct config_item_type *item_type); +void configfs_unregister_default_group(struct config_group *group); + /* These functions can sleep and can alloc with GFP_KERNEL */ /* WARNING: These cannot be called underneath configfs callbacks!! */ int configfs_depend_item(struct configfs_subsystem *subsys, struct config_item *target); -- cgit v1.2.3 From 9d8a765211335cfdad464b90fb19f546af5706ae Mon Sep 17 00:00:00 2001 From: Richard Weinberger Date: Fri, 20 Nov 2015 15:57:21 -0800 Subject: kernel/signal.c: unexport sigsuspend() sigsuspend() is nowhere used except in signal.c itself, so we can mark it static do not pollute the global namespace. But this patch is more than a boring cleanup patch, it fixes a real issue on UserModeLinux. UML has a special console driver to display ttys using xterm, or other terminal emulators, on the host side. Vegard reported that sometimes UML is unable to spawn a xterm and he's facing the following warning: WARNING: CPU: 0 PID: 908 at include/linux/thread_info.h:128 sigsuspend+0xab/0xc0() It turned out that this warning makes absolutely no sense as the UML xterm code calls sigsuspend() on the host side, at least it tries. But as the kernel itself offers a sigsuspend() symbol the linker choose this one instead of the glibc wrapper. Interestingly this code used to work since ever but always blocked signals on the wrong side. Some recent kernel change made the WARN_ON() trigger and uncovered the bug. It is a wonderful example of how much works by chance on computers. :-) Fixes: 68f3f16d9ad0f1 ("new helper: sigsuspend()") Signed-off-by: Richard Weinberger Reported-by: Vegard Nossum Tested-by: Vegard Nossum Acked-by: Oleg Nesterov Cc: [3.5+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/signal.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/signal.h b/include/linux/signal.h index ab1e0392b5ac..92557bbce7e7 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -239,7 +239,6 @@ extern int sigprocmask(int, sigset_t *, sigset_t *); extern void set_current_blocked(sigset_t *); extern void __set_current_blocked(const sigset_t *); extern int show_unhandled_signals; -extern int sigsuspend(sigset_t *); struct sigaction { #ifndef __ARCH_HAS_IRIX_SIGACTION -- cgit v1.2.3 From 21fa8442799945beaca074cb5bcf7cfe24969d59 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Fri, 20 Nov 2015 15:57:32 -0800 Subject: mm: fix up sparse warning in gfpflags_allow_blocking sparse says: include/linux/gfp.h:274:26: warning: incorrect type in return expression (different base types) include/linux/gfp.h:274:26: expected bool include/linux/gfp.h:274:26: got restricted gfp_t ...add a forced cast to silence the warning. Signed-off-by: Jeff Layton Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 6523109e136d..8942af0813e3 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -271,7 +271,7 @@ static inline int gfpflags_to_migratetype(const gfp_t gfp_flags) static inline bool gfpflags_allow_blocking(const gfp_t gfp_flags) { - return gfp_flags & __GFP_DIRECT_RECLAIM; + return (bool __force)(gfp_flags & __GFP_DIRECT_RECLAIM); } #ifdef CONFIG_HIGHMEM -- cgit v1.2.3 From 6b2a3d628aa752f0ab825fc6d4d07b09e274d1c1 Mon Sep 17 00:00:00 2001 From: Peter Hurley Date: Sun, 8 Nov 2015 08:52:31 -0500 Subject: tty: audit: Fix audit source MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The data to audit/record is in the 'from' buffer (ie., the input read buffer). Fixes: 72586c6061ab ("n_tty: Fix auditing support for cannonical mode") Cc: stable # 4.1+ Cc: Miloslav Trmač Signed-off-by: Peter Hurley Acked-by: Laura Abbott Signed-off-by: Greg Kroah-Hartman --- include/linux/tty.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tty.h b/include/linux/tty.h index 5b04b0a5375b..5e31f1b99037 100644 --- a/include/linux/tty.h +++ b/include/linux/tty.h @@ -607,7 +607,7 @@ extern void n_tty_inherit_ops(struct tty_ldisc_ops *ops); /* tty_audit.c */ #ifdef CONFIG_AUDIT -extern void tty_audit_add_data(struct tty_struct *tty, unsigned char *data, +extern void tty_audit_add_data(struct tty_struct *tty, const void *data, size_t size, unsigned icanon); extern void tty_audit_exit(void); extern void tty_audit_fork(struct signal_struct *sig); @@ -615,8 +615,8 @@ extern void tty_audit_tiocsti(struct tty_struct *tty, char ch); extern void tty_audit_push(struct tty_struct *tty); extern int tty_audit_push_current(void); #else -static inline void tty_audit_add_data(struct tty_struct *tty, - unsigned char *data, size_t size, unsigned icanon) +static inline void tty_audit_add_data(struct tty_struct *tty, const void *data, + size_t size, unsigned icanon) { } static inline void tty_audit_tiocsti(struct tty_struct *tty, char ch) -- cgit v1.2.3 From 865762a8119e74b5f0e236d2d8eaaf8be9292a06 Mon Sep 17 00:00:00 2001 From: Jesper Dangaard Brouer Date: Fri, 20 Nov 2015 15:57:58 -0800 Subject: slab/slub: adjust kmem_cache_alloc_bulk API Adjust kmem_cache_alloc_bulk API before we have any real users. Adjust API to return type 'int' instead of previously type 'bool'. This is done to allow future extension of the bulk alloc API. A future extension could be to allow SLUB to stop at a page boundary, when specified by a flag, and then return the number of objects. The advantage of this approach, would make it easier to make bulk alloc run without local IRQs disabled. With an approach of cmpxchg "stealing" the entire c->freelist or page->freelist. To avoid overshooting we would stop processing at a slab-page boundary. Else we always end up returning some objects at the cost of another cmpxchg. To keep compatible with future users of this API linking against an older kernel when using the new flag, we need to return the number of allocated objects with this API change. Signed-off-by: Jesper Dangaard Brouer Cc: Vladimir Davydov Acked-by: Christoph Lameter Cc: Pekka Enberg Cc: David Rientjes Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slab.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 96940772bb92..2037a861e367 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -316,7 +316,7 @@ void kmem_cache_free(struct kmem_cache *, void *); * Note that interrupts must be enabled when calling these functions. */ void kmem_cache_free_bulk(struct kmem_cache *, size_t, void **); -bool kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); +int kmem_cache_alloc_bulk(struct kmem_cache *, gfp_t, size_t, void **); #ifdef CONFIG_NUMA void *__kmalloc_node(size_t size, gfp_t flags, int node) __assume_kmalloc_alignment; -- cgit v1.2.3