From f315e3fa1cf5b3317fc948708645fff889ce1e63 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Mon, 2 Dec 2013 17:49:41 +0900 Subject: slab: restrict the number of objects in a slab To prepare to implement byte sized index for managing the freelist of a slab, we should restrict the number of objects in a slab to be less or equal to 256, since byte only represent 256 different values. Setting the size of object to value equal or more than newly introduced SLAB_OBJ_MIN_SIZE ensures that the number of objects in a slab is less or equal to 256 for a slab with 1 page. If page size is rather larger than 4096, above assumption would be wrong. In this case, we would fall back on 2 bytes sized index. If minimum size of kmalloc is less than 16, we use it as minimum object size and give up this optimization. Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- include/linux/slab.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index 9260abdd67df..d015dec02bf3 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -201,6 +201,17 @@ struct kmem_cache { #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 #endif + +/* + * This restriction comes from byte sized index implementation. + * Page size is normally 2^12 bytes and, in this case, if we want to use + * byte sized index which can represent 2^8 entries, the size of the object + * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. + * If minimum size of kmalloc is less than 16, we use it as minimum object + * size and give up to use byte sized index. + */ +#define SLAB_OBJ_MIN_SIZE (KMALLOC_SHIFT_LOW < 4 ? \ + (1 << KMALLOC_SHIFT_LOW) : 16) #endif #ifdef CONFIG_SLUB -- cgit v1.2.3 From 0f6a928d035b82c0b3aa387d510a73f3e6dbf8e3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Thu, 6 Feb 2014 13:25:40 +0200 Subject: acpi-dma: convert to return error code when asked for channel Currently acpi_dma_request_slave_chan_by_index() and acpi_dma_request_slave_chan_by_name() return only requested channel or NULL. This patch converts them to return appropriate error code instead of NULL in case of unsuccessfull request. Signed-off-by: Andy Shevchenko Acked-by: Mika Westerberg Signed-off-by: Vinod Koul --- include/linux/acpi_dma.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/acpi_dma.h b/include/linux/acpi_dma.h index fb0298082916..329436d38e66 100644 --- a/include/linux/acpi_dma.h +++ b/include/linux/acpi_dma.h @@ -16,6 +16,7 @@ #include #include +#include #include /** @@ -103,12 +104,12 @@ static inline void devm_acpi_dma_controller_free(struct device *dev) static inline struct dma_chan *acpi_dma_request_slave_chan_by_index( struct device *dev, size_t index) { - return NULL; + return ERR_PTR(-ENODEV); } static inline struct dma_chan *acpi_dma_request_slave_chan_by_name( struct device *dev, const char *name) { - return NULL; + return ERR_PTR(-ENODEV); } #define acpi_dma_simple_xlate NULL -- cgit v1.2.3 From 2ea24497a1b30dd03dd42b873fa5097913587f4d Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Mon, 10 Feb 2014 11:18:39 -0500 Subject: SUNRPC: RPC callbacks may be split across several TCP segments Since TCP is a stream protocol, our callback read code needs to take into account the fact that RPC callbacks are not always confined to a single TCP segment. This patch adds support for multiple TCP segments by ensuring that we only remove the rpc_rqst structure from the 'free backchannel requests' list once the data has been completely received. We rely on the fact that TCP data is ordered for the duration of the connection. Reported-by: shaobingqing Signed-off-by: Trond Myklebust --- include/linux/sunrpc/bc_xprt.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/bc_xprt.h b/include/linux/sunrpc/bc_xprt.h index 969c0a671dbf..2ca67b55e0fe 100644 --- a/include/linux/sunrpc/bc_xprt.h +++ b/include/linux/sunrpc/bc_xprt.h @@ -32,7 +32,8 @@ SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. #include #ifdef CONFIG_SUNRPC_BACKCHANNEL -struct rpc_rqst *xprt_alloc_bc_request(struct rpc_xprt *xprt); +struct rpc_rqst *xprt_lookup_bc_request(struct rpc_xprt *xprt, __be32 xid); +void xprt_complete_bc_request(struct rpc_rqst *req, uint32_t copied); void xprt_free_bc_request(struct rpc_rqst *req); int xprt_setup_backchannel(struct rpc_xprt *, unsigned int min_reqs); void xprt_destroy_backchannel(struct rpc_xprt *, unsigned int max_reqs); -- cgit v1.2.3 From 311324ad1713666a6e803aecf0d4e1a136a5b34a Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Fri, 7 Feb 2014 17:02:08 -0500 Subject: NFS: Be more aggressive in using readdirplus for 'ls -l' situations Try to detect 'ls -l' by having nfs_getattr() look at whether or not there is an opendir() file descriptor for the parent directory. If so, then assume that we want to force use of readdirplus in order to avoid the multiple GETATTR calls over the wire. Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 0ae5807480f4..f55a90bed0b4 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -92,6 +92,7 @@ struct nfs_open_context { }; struct nfs_open_dir_context { + struct list_head list; struct rpc_cred *cred; unsigned long attr_gencount; __u64 dir_cookie; -- cgit v1.2.3 From 2501c9179dff2add6aadd3898cd729e94e777d3a Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 30 Oct 2013 01:00:18 +0100 Subject: mmc: core: Use MMC_UNSAFE_RESUME as default behavior Invoking system suspend or shutdown without using the Kconfig option MMC_UNSAFE_RESUME, did trigger an ungraceful power cut of the card. To improve the situation, change the behavior to always make use of the available bus_ops callbacks that handles system suspend and shutdown properly. By changing the behavior MMC_UNSAFE_RESUME becomes redundant, so lets's remove it. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 99f5709ac343..2a139b2bdb9e 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -424,12 +424,9 @@ static inline int mmc_regulator_get_supply(struct mmc_host *mmc) int mmc_pm_notify(struct notifier_block *notify_block, unsigned long, void *); -/* Module parameter */ -extern bool mmc_assume_removable; - static inline int mmc_card_is_removable(struct mmc_host *host) { - return !(host->caps & MMC_CAP_NONREMOVABLE) && mmc_assume_removable; + return !(host->caps & MMC_CAP_NONREMOVABLE); } static inline int mmc_card_keep_power(struct mmc_host *host) -- cgit v1.2.3 From a2d1086de6cc3ae2378d9db8b92712911c9e5fef Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 16 Dec 2013 14:37:26 +0100 Subject: mmc: card: Remove host cap MMC_CAP2_SANITIZE There is no need for keeping a host cap for MMC_CAP2_SANITIZE, instead we just make the feature default available. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 2a139b2bdb9e..40f832e5db40 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -281,7 +281,6 @@ struct mmc_host { #define MMC_CAP2_PACKED_CMD (MMC_CAP2_PACKED_RD | \ MMC_CAP2_PACKED_WR) #define MMC_CAP2_NO_PRESCAN_POWERUP (1 << 14) /* Don't power up before scan */ -#define MMC_CAP2_SANITIZE (1 << 15) /* Support Sanitize */ mmc_pm_flag_t pm_caps; /* supported pm features */ -- cgit v1.2.3 From 325e2f96b926ae852fa2aa5e64d1040ed9518ae1 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 16 Dec 2013 14:43:51 +0100 Subject: mmc: core: Remove unused host cap MMC_CAP2_BROKEN_VOLTAGE Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 40f832e5db40..461c69f19425 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -272,7 +272,6 @@ struct mmc_host { #define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ #define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ MMC_CAP2_HS200_1_2V_SDR) -#define MMC_CAP2_BROKEN_VOLTAGE (1 << 7) /* Use the broken voltage */ #define MMC_CAP2_HC_ERASE_SZ (1 << 9) /* High-capacity erase size */ #define MMC_CAP2_CD_ACTIVE_HIGH (1 << 10) /* Card-detect signal active high */ #define MMC_CAP2_RO_ACTIVE_HIGH (1 << 11) /* Write-protect signal active high */ -- cgit v1.2.3 From 469a00b017a9b2c630bff962ffd64ba626977830 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 16 Dec 2013 14:46:00 +0100 Subject: mmc: core: Remove support for MMC_CAP2_NO_SLEEP_CMD There are no active users of this host capability. The primary reason for adding this cap was due to a bug in ux500 boot loader code, which is not a relevant issue any more. So, let's remove it. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 461c69f19425..4c0176a8e474 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -267,7 +267,6 @@ struct mmc_host { #define MMC_CAP2_CACHE_CTRL (1 << 1) /* Allow cache control */ #define MMC_CAP2_FULL_PWR_CYCLE (1 << 2) /* Can do full power cycle */ #define MMC_CAP2_NO_MULTI_READ (1 << 3) /* Multiblock reads don't work */ -#define MMC_CAP2_NO_SLEEP_CMD (1 << 4) /* Don't allow sleep command */ #define MMC_CAP2_HS200_1_8V_SDR (1 << 5) /* can support */ #define MMC_CAP2_HS200_1_2V_SDR (1 << 6) /* can support */ #define MMC_CAP2_HS200 (MMC_CAP2_HS200_1_8V_SDR | \ -- cgit v1.2.3 From 10e5d9652499a8bc0a99ffc2a96a3030fee576cb Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Mon, 16 Dec 2013 16:23:22 +0100 Subject: mmc: core: Use mmc_flush_cache() during mmc suspend Earlier we disabled the cache during suspend, which meant a flush was internally at the eMMC performed as well. To simplify code we can make use of the mmc_flush_cache(), during mmc suspend, which makes the mmc_cache_ctrl() redundant so then we can remove it. Signed-off-by: Ulf Hansson Acked-by: Seungwon Jeon Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 4c0176a8e474..f69bd70b1046 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -385,8 +385,6 @@ int mmc_power_restore_host(struct mmc_host *host); void mmc_detect_change(struct mmc_host *, unsigned long delay); void mmc_request_done(struct mmc_host *, struct mmc_request *); -int mmc_cache_ctrl(struct mmc_host *, u8); - static inline void mmc_signal_sdio_irq(struct mmc_host *host) { host->ops->enable_sdio_irq(host, 0); -- cgit v1.2.3 From 7536d3f83aa42ba1a3b1c6b30c2b6d94a820cbb2 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 18 Dec 2013 11:59:17 +0100 Subject: mmc: core: Enable MMC_CAP2_CACHE_CTRL as default There are no reason to why the use of a non-volatile internal eMMC cache should be controlled by a host cap. Instead let's just enable it if the eMMC card supports it. Signed-off-by: Ulf Hansson Acked-by: Seungwon Jeon Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index f69bd70b1046..719db89ef134 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -264,7 +264,6 @@ struct mmc_host { u32 caps2; /* More host capabilities */ #define MMC_CAP2_BOOTPART_NOACC (1 << 0) /* Boot partition no access */ -#define MMC_CAP2_CACHE_CTRL (1 << 1) /* Allow cache control */ #define MMC_CAP2_FULL_PWR_CYCLE (1 << 2) /* Can do full power cycle */ #define MMC_CAP2_NO_MULTI_READ (1 << 3) /* Multiblock reads don't work */ #define MMC_CAP2_HS200_1_8V_SDR (1 << 5) /* can support */ -- cgit v1.2.3 From dd5720b3006210ecdf4e3c8889e6051f432c4ba3 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Wed, 12 Feb 2014 11:16:17 +0200 Subject: dma: dw: remove leftovers in the comment blocks There is couple of leftovers in the comment blocks. This patch modifies the comments accordingly. There is no functional change. Signed-off-by: Andy Shevchenko Signed-off-by: Vinod Koul --- include/linux/dw_dmac.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dw_dmac.h b/include/linux/dw_dmac.h index 481ab2345d6b..68b4024184de 100644 --- a/include/linux/dw_dmac.h +++ b/include/linux/dw_dmac.h @@ -1,6 +1,5 @@ /* - * Driver for the Synopsys DesignWare DMA Controller (aka DMACA on - * AVR32 systems.) + * Driver for the Synopsys DesignWare DMA Controller * * Copyright (C) 2007 Atmel Corporation * Copyright (C) 2010-2011 ST Microelectronics @@ -44,8 +43,6 @@ struct dw_dma_slave { * @nr_masters: Number of AHB masters supported by the controller * @data_width: Maximum data width supported by hardware per AHB master * (0 - 8bits, 1 - 16bits, ..., 5 - 256bits) - * @sd: slave specific data. Used for configuring channels - * @sd_count: count of slave data structures passed. */ struct dw_dma_platform_data { unsigned int nr_channels; -- cgit v1.2.3 From 9107ebbf9652c033eb5dd10a6ea34a132db3cde1 Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Fri, 21 Feb 2014 18:40:35 +0800 Subject: mmc: sdhci: add support for realtek rts5250 Add support for realtek rts5250 pci card reader. The card reader has some problems with DDR50 mode, so add a new quirks2 for broken ddr50. Signed-off-by: Micky Ching Signed-off-by: Chris Ball --- include/linux/mmc/sdhci.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci.h b/include/linux/mmc/sdhci.h index 362927c48f97..7be12b883485 100644 --- a/include/linux/mmc/sdhci.h +++ b/include/linux/mmc/sdhci.h @@ -100,6 +100,8 @@ struct sdhci_host { #define SDHCI_QUIRK2_BROKEN_HOST_CONTROL (1<<5) /* Controller does not support HS200 */ #define SDHCI_QUIRK2_BROKEN_HS200 (1<<6) +/* Controller does not support DDR50 */ +#define SDHCI_QUIRK2_BROKEN_DDR50 (1<<7) int irq; /* Device IRQ */ void __iomem *ioaddr; /* Mapped address */ -- cgit v1.2.3 From abcc6b2943145ae2e17a52632ccab50cd612914c Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Mon, 17 Feb 2014 16:45:47 +0800 Subject: mmc: rtsx: modify phase searching method for tuning The new phase searching method is more concise and easier to understand. Signed-off-by: Micky Ching Signed-off-by: Chris Ball --- include/linux/mfd/rtsx_pci.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index 0ce772105508..a3835976f7c6 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -144,7 +144,7 @@ #define HOST_TO_DEVICE 0 #define DEVICE_TO_HOST 1 -#define MAX_PHASE 31 +#define RTSX_PHASE_MAX 32 #define RX_TUNING_CNT 3 /* SG descriptor */ -- cgit v1.2.3 From c42deffd5b53c9e583d83c7964854ede2f12410d Mon Sep 17 00:00:00 2001 From: Micky Ching Date: Mon, 17 Feb 2014 16:45:48 +0800 Subject: mmc: rtsx: add support for pre_req and post_req Add support for non-blocking request, pre_req() runs dma_map_sg() and post_req() runs dma_unmap_sg(). This patch can increase card read/write speed, especially for high speed card and slow CPU(for some embedded platform). Users can get a great benefit from this patch. if CPU frequency is 800MHz, SDR104 or DDR50 card read/write speed may increase more than 15%. test results: intel i3(800MHz - 2.3GHz), SD card clock 208MHz performance mode(2.3GHz): Before: dd if=/dev/mmcblk0p1 of=/dev/null bs=64k count=1024 67108864 bytes (67 MB) copied, 1.18191 s, 56.8 MB/s After: dd if=/dev/mmcblk0p1 of=/dev/null bs=64k count=1024 67108864 bytes (67 MB) copied, 1.09276 s, 61.4 MB/s powersave mode(800MHz): Before: dd if=/dev/mmcblk0p1 of=/dev/null bs=64k count=1024 67108864 bytes (67 MB) copied, 1.29569 s, 51.8 MB/s After: dd if=/dev/mmcblk0p1 of=/dev/null bs=64k count=1024 67108864 bytes (67 MB) copied, 1.11218 s, 60.3 MB/s Signed-off-by: Micky Ching Signed-off-by: Chris Ball --- include/linux/mfd/rtsx_common.h | 1 + include/linux/mfd/rtsx_pci.h | 6 ++++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_common.h b/include/linux/mfd/rtsx_common.h index 443176ee1ab0..7c36cc55d2c7 100644 --- a/include/linux/mfd/rtsx_common.h +++ b/include/linux/mfd/rtsx_common.h @@ -45,6 +45,7 @@ struct platform_device; struct rtsx_slot { struct platform_device *p_dev; void (*card_event)(struct platform_device *p_dev); + void (*done_transfer)(struct platform_device *p_dev); }; #endif diff --git a/include/linux/mfd/rtsx_pci.h b/include/linux/mfd/rtsx_pci.h index a3835976f7c6..8d6bbd609ad9 100644 --- a/include/linux/mfd/rtsx_pci.h +++ b/include/linux/mfd/rtsx_pci.h @@ -943,6 +943,12 @@ void rtsx_pci_send_cmd_no_wait(struct rtsx_pcr *pcr); int rtsx_pci_send_cmd(struct rtsx_pcr *pcr, int timeout); int rtsx_pci_transfer_data(struct rtsx_pcr *pcr, struct scatterlist *sglist, int num_sg, bool read, int timeout); +int rtsx_pci_dma_map_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read); +int rtsx_pci_dma_unmap_sg(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int num_sg, bool read); +int rtsx_pci_dma_transfer(struct rtsx_pcr *pcr, struct scatterlist *sglist, + int sg_count, bool read); int rtsx_pci_read_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); int rtsx_pci_write_ppbuf(struct rtsx_pcr *pcr, u8 *buf, int buf_len); int rtsx_pci_card_pull_ctl_enable(struct rtsx_pcr *pcr, int card); -- cgit v1.2.3 From 68eb80e06bfa06035d0304686124974780308fae Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 18 Dec 2013 09:57:38 +0100 Subject: mmc: core: Rename max_discard_to to max_busy_timeout Rename host->max_discard_to to host->max_busy_timeout, to reflect that it tells the mmc core layer about the maximum supported busy detection timeout by the host. This timeout is at the moment only applicable to erase/trim/discard commands. By the renaming we provide the option of make use of it for other commands that cares about busy detection. In other words, those commands that wants an R1B response, like for example the mmc switch command. Do note that the max_busy_timeout is supposed to be specified only by hosts supporting MMC_CAP_WAIT_WHILE_BUSY. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- include/linux/mmc/host.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 719db89ef134..cb61ea4d6945 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -300,7 +300,7 @@ struct mmc_host { unsigned int max_req_size; /* maximum number of bytes in one req */ unsigned int max_blk_size; /* maximum size of one mmc block */ unsigned int max_blk_count; /* maximum number of blocks in one req */ - unsigned int max_discard_to; /* max. discard timeout in ms */ + unsigned int max_busy_timeout; /* max busy timeout in ms */ /* private data */ spinlock_t lock; /* lock for claim and bus ops */ -- cgit v1.2.3 From 1d4d77444bf4212c44585146a2b353ca24c815f9 Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 8 Jan 2014 15:06:08 +0100 Subject: mmc: core: Rename cmd_timeout_ms to busy_timeout To better reflect that the cmd_timeout_ms is directly related to the busy detection timeout, let's rename it. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- include/linux/mmc/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index 87079fc38011..b27699612888 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -95,7 +95,7 @@ struct mmc_command { * actively failing requests */ - unsigned int cmd_timeout_ms; /* in milliseconds */ + unsigned int busy_timeout; /* busy detect timeout in ms */ /* Set this flag only for blocking sanitize request */ bool sanitize_busy; -- cgit v1.2.3 From 4509f847751c9d2a724f37fe831393fbac34b80f Mon Sep 17 00:00:00 2001 From: Ulf Hansson Date: Wed, 8 Jan 2014 16:09:33 +0100 Subject: mmc: core: Add ignore_crc flag to __mmc_switch Instead of handle specific adaptations, releated to certain switch operations, inside __mmc_switch, push this to be handled by the caller instead. Signed-off-by: Ulf Hansson Signed-off-by: Chris Ball --- include/linux/mmc/core.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mmc/core.h b/include/linux/mmc/core.h index b27699612888..f206e29f94d7 100644 --- a/include/linux/mmc/core.h +++ b/include/linux/mmc/core.h @@ -152,7 +152,7 @@ extern int mmc_wait_for_app_cmd(struct mmc_host *, struct mmc_card *, struct mmc_command *, int); extern void mmc_start_bkops(struct mmc_card *card, bool from_exception); extern int __mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int, bool, - bool); + bool, bool); extern int mmc_switch(struct mmc_card *, u8, u8, u8, unsigned int); extern int mmc_send_ext_csd(struct mmc_card *card, u8 *ext_csd); -- cgit v1.2.3 From 2a5153aaae498f645573a13f60fed8226cc4e865 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 25 Feb 2014 15:18:26 +0530 Subject: mmc: msm: Cleanup mmc-msm_sdcc.h header Commit 1ef21f6343ff ("ARM: msm: move platform_data definitions") moved the file to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also change the header file protection macros appropriately. Signed-off-by: Sachin Kamat Signed-off-by: Chris Ball --- include/linux/platform_data/mmc-msm_sdcc.h | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mmc-msm_sdcc.h b/include/linux/platform_data/mmc-msm_sdcc.h index ffcd9e3a6a7e..55aa873c9396 100644 --- a/include/linux/platform_data/mmc-msm_sdcc.h +++ b/include/linux/platform_data/mmc-msm_sdcc.h @@ -1,8 +1,5 @@ -/* - * arch/arm/include/asm/mach/mmc.h - */ -#ifndef ASMARM_MACH_MMC_H -#define ASMARM_MACH_MMC_H +#ifndef __MMC_MSM_SDCC_H +#define __MMC_MSM_SDCC_H #include #include -- cgit v1.2.3 From 550459eeb21b8f3553283d506fdcb92c9147c1eb Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Tue, 25 Feb 2014 15:18:27 +0530 Subject: mmc: mvsdio: Cleanup mmc-mvsdio.h header Commit c02cecb92ed4 ("ARM: orion: move platform_data definitions") moved the file to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also change the header file protection macros appropriately. Signed-off-by: Sachin Kamat Signed-off-by: Chris Ball --- include/linux/platform_data/mmc-mvsdio.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mmc-mvsdio.h b/include/linux/platform_data/mmc-mvsdio.h index 1190efedcb94..d02704cd3695 100644 --- a/include/linux/platform_data/mmc-mvsdio.h +++ b/include/linux/platform_data/mmc-mvsdio.h @@ -1,13 +1,11 @@ /* - * arch/arm/plat-orion/include/plat/mvsdio.h - * * This file is licensed under the terms of the GNU General Public * License version 2. This program is licensed "as is" without any * warranty of any kind, whether express or implied. */ -#ifndef __MACH_MVSDIO_H -#define __MACH_MVSDIO_H +#ifndef __MMC_MVSDIO_H +#define __MMC_MVSDIO_H #include -- cgit v1.2.3 From 3843154598a00408f4214a68bd536fdf27b1df10 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Thu, 27 Feb 2014 18:20:00 +0900 Subject: f2fs: introduce large directory support This patch introduces an i_dir_level field to support large directory. Previously, f2fs maintains multi-level hash tables to find a dentry quickly from a bunch of chiild dentries in a directory, and the hash tables consist of the following tree structure as below. In Documentation/filesystems/f2fs.txt, ---------------------- A : bucket B : block N : MAX_DIR_HASH_DEPTH ---------------------- level #0 | A(2B) | level #1 | A(2B) - A(2B) | level #2 | A(2B) - A(2B) - A(2B) - A(2B) . | . . . . level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) . | . . . . level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) But, if we can guess that a directory will handle a number of child files, we don't need to traverse the tree from level #0 to #N all the time. Since the lower level tables contain relatively small number of dentries, the miss ratio of the target dentry is likely to be high. In order to avoid that, we can configure the hash tables sparsely from level #0 like this. level #0 | A(2B) - A(2B) - A(2B) - A(2B) level #1 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) . | . . . . level #N/2 | A(2B) - A(2B) - A(2B) - A(2B) - A(2B) - ... - A(2B) . | . . . . level #N | A(4B) - A(4B) - A(4B) - A(4B) - A(4B) - ... - A(4B) With this structure, we can skip the ineffective tree searches in lower level hash tables. This patch adds just a facility for this by introducing i_dir_level in f2fs_inode. Signed-off-by: Jaegeuk Kim --- include/linux/f2fs_fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index da74d878dc4f..df53e1753a76 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -183,7 +183,7 @@ struct f2fs_inode { __le32 i_pino; /* parent inode number */ __le32 i_namelen; /* file name length */ __u8 i_name[F2FS_NAME_LEN]; /* file name for SPOR */ - __u8 i_reserved2; /* for backward compatibility */ + __u8 i_dir_level; /* dentry_level for large dir */ struct f2fs_extent i_ext; /* caching a largest extent */ -- cgit v1.2.3 From a59ce6584d566847980f9dcad5343cd9856145c8 Mon Sep 17 00:00:00 2001 From: Alexander Shiyan Date: Fri, 31 Jan 2014 22:36:28 -0800 Subject: leds: leds-mc13783: Add MC34708 LED support This patch adds support for two LEDs on MC34708 PMIC. Signed-off-by: Alexander Shiyan Signed-off-by: Bryan Wu --- include/linux/mfd/mc13xxx.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/mc13xxx.h b/include/linux/mfd/mc13xxx.h index ac39d910e70b..a326c850f046 100644 --- a/include/linux/mfd/mc13xxx.h +++ b/include/linux/mfd/mc13xxx.h @@ -104,6 +104,9 @@ enum { MC13892_LED_R, MC13892_LED_G, MC13892_LED_B, + /* MC34708 LED IDs */ + MC34708_LED_R, + MC34708_LED_G, }; struct mc13xxx_led_platform_data { @@ -163,6 +166,9 @@ struct mc13xxx_leds_platform_data { #define MC13892_LED_C2_CURRENT_G(x) (((x) & 0x7) << 21) /* MC13892 LED Control 3 */ #define MC13892_LED_C3_CURRENT_B(x) (((x) & 0x7) << 9) +/* MC34708 LED Control 0 */ +#define MC34708_LED_C0_CURRENT_R(x) (((x) & 0x3) << 9) +#define MC34708_LED_C0_CURRENT_G(x) (((x) & 0x3) << 21) u32 led_control[MAX_LED_CONTROL_REGS]; }; -- cgit v1.2.3 From fc87eb0b588394d8304d942bcd5c71d6bcc595c7 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Thu, 13 Feb 2014 22:37:09 -0800 Subject: leds: leds-s3c24xx: Trivial cleanup in header file Commit 436d42c61c3e ("ARM: samsung: move platform_data definitions") moved the files to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also change the header file protection macros appropriately. Signed-off-by: Sachin Kamat Signed-off-by: Bryan Wu --- include/linux/platform_data/leds-s3c24xx.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/leds-s3c24xx.h b/include/linux/platform_data/leds-s3c24xx.h index d8a7672519b6..441a6f290649 100644 --- a/include/linux/platform_data/leds-s3c24xx.h +++ b/include/linux/platform_data/leds-s3c24xx.h @@ -1,5 +1,4 @@ -/* arch/arm/mach-s3c2410/include/mach/leds-gpio.h - * +/* * Copyright (c) 2006 Simtec Electronics * http://armlinux.simtec.co.uk/ * Ben Dooks @@ -11,8 +10,8 @@ * published by the Free Software Foundation. */ -#ifndef __ASM_ARCH_LEDSGPIO_H -#define __ASM_ARCH_LEDSGPIO_H "leds-gpio.h" +#ifndef __LEDS_S3C24XX_H +#define __LEDS_S3C24XX_H #define S3C24XX_LEDF_ACTLOW (1<<0) /* LED is on when GPIO low */ #define S3C24XX_LEDF_TRISTATE (1<<1) /* tristate to turn off */ @@ -25,4 +24,4 @@ struct s3c24xx_led_platdata { char *def_trigger; }; -#endif /* __ASM_ARCH_LEDSGPIO_H */ +#endif /* __LEDS_S3C24XX_H */ -- cgit v1.2.3 From 42c1add97073b5a701b8aee0fdb69ef0345d4c50 Mon Sep 17 00:00:00 2001 From: Russell King Date: Fri, 28 Feb 2014 21:32:44 +0000 Subject: mmc: sdhci-spear: remove support for power gpio None of this code is currently used: there are no definitions of struct sdhci_plat_data in arch/arm, neither are there any DT properties which use card_power_gpio/power_active_high/power_always_enb. In any case, slot power control should be rigged up via vmmc and the regulator subsystem in the DT case. Signed-off-by: Russell King Signed-off-by: Chris Ball --- include/linux/mmc/sdhci-spear.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmc/sdhci-spear.h b/include/linux/mmc/sdhci-spear.h index e78c0e236e9d..8cc095a76cf8 100644 --- a/include/linux/mmc/sdhci-spear.h +++ b/include/linux/mmc/sdhci-spear.h @@ -18,17 +18,9 @@ /* * struct sdhci_plat_data: spear sdhci platform data structure * - * @card_power_gpio: gpio pin for enabling/disabling power to sdhci socket - * @power_active_high: if set, enable power to sdhci socket by setting - * card_power_gpio - * @power_always_enb: If set, then enable power on probe, otherwise enable only - * on card insertion and disable on card removal. * card_int_gpio: gpio pin used for card detection */ struct sdhci_plat_data { - int card_power_gpio; - int power_active_high; - int power_always_enb; int card_int_gpio; }; -- cgit v1.2.3 From 0c176170089c3a7f2a891f9860f5cdc5f481ff78 Mon Sep 17 00:00:00 2001 From: Wolfram Sang Date: Mon, 10 Feb 2014 11:03:56 +0100 Subject: i2c: add deprecation warning for class based instantiation Class based instantiation can cause noticeable delays when booting. This mechanism is used when it is not possible to describe slaves on I2C busses. As we do have other mechanisms, most embedded I2C will not need classes and for embedded it is explicitly not recommended to use them. Add a deprecation warning for drivers which want to disable class based instantiation in the near future to gain boot-up time, so users relying on this technique can switch to something better. They really should. Signed-off-by: Wolfram Sang --- include/linux/i2c.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/i2c.h b/include/linux/i2c.h index deddeb8c337c..b556e0ab946f 100644 --- a/include/linux/i2c.h +++ b/include/linux/i2c.h @@ -487,6 +487,7 @@ void i2c_unlock_adapter(struct i2c_adapter *); #define I2C_CLASS_HWMON (1<<0) /* lm_sensors, ... */ #define I2C_CLASS_DDC (1<<3) /* DDC bus on graphics adapters */ #define I2C_CLASS_SPD (1<<7) /* Memory modules */ +#define I2C_CLASS_DEPRECATED (1<<8) /* Warn users that adapter will stop using classes */ /* Internal numbers to terminate lists */ #define I2C_CLIENT_END 0xfffeU -- cgit v1.2.3 From 7cbccb55f04bef306bc2840185ec8f986bd0df3c Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Sun, 16 Feb 2014 14:21:22 +0100 Subject: dma: Remove comment about embedding dma_slave_config into custom structs The documentation for the dma_slave_config struct recommends that if a DMA controller has special configuration options, which can not be configured through the dma_slave_config struct, the driver should create its own custom config struct and embed the dma_slave_config struct in it and pass the custom config struct to dmaengine_slave_config(). This overloads the generic dmaengine_slave_config() API with custom semantics and any caller of the dmaengine_slave_config() that is not aware of these special semantics will cause undefined behavior. This means that it is impossible for generic code to make use of dmaengine_slave_config(). Such a restriction contradicts the very idea of having a generic API. E.g. consider the following case of a DMA controller that has an option to reverse the field polarity of the DMA transfer with the following implementation for setting the configuration: struct my_slave_config { struct dma_slave_config config; unsigned int field_polarity; }; static int my_dma_controller_slave_config(struct dma_chan *chan, struct dma_slave_config *config) { struct my_slave_config *my_cfg = container_of(config, struct my_slave_config, config); ... my_dma_set_field_polarity(chan, my_cfg->field_polarity); ... } Now a generic user of the dmaengine API might want to configure a DMA channel for this DMA controller that it obtained using the following code: struct dma_slave_config config; config.src_addr = ...; ... dmaengine_slave_config(chan, &config); The call to dmaengine_slave_config() will eventually call into my_dma_controller_slave_config() which will cast from dma_slave_config to my_slave_config and then tries to access the field_polarity member. Since the dma_slave_config struct that was passed in was never embedded into a my_slave_config struct this attempt will just read random stack garbage and use that to configure the DMA controller. This is bad. Instead, if a DMA controller really needs to have custom configuration options, the driver should create a custom API for it. This makes it very clear that there is a direct dependency of a user of such an API and the implementer. E.g.: int my_dma_set_field_polarity(struct dma_chan *chan, unsigned int field_polarity) { if (chan->device->dev->driver != &my_dma_controller_driver.driver) return -EINVAL; ... } EXPORT_SYMBOL_GPL(my_dma_set_field_polarity); Signed-off-by: Lars-Peter Clausen Signed-off-by: Vinod Koul --- include/linux/dmaengine.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/dmaengine.h b/include/linux/dmaengine.h index c5c92d59e531..8300fb87b84a 100644 --- a/include/linux/dmaengine.h +++ b/include/linux/dmaengine.h @@ -341,15 +341,11 @@ enum dma_slave_buswidth { * and this struct will then be passed in as an argument to the * DMA engine device_control() function. * - * The rationale for adding configuration information to this struct - * is as follows: if it is likely that most DMA slave controllers in - * the world will support the configuration option, then make it - * generic. If not: if it is fixed so that it be sent in static from - * the platform data, then prefer to do that. Else, if it is neither - * fixed at runtime, nor generic enough (such as bus mastership on - * some CPU family and whatnot) then create a custom slave config - * struct and pass that, then make this config a member of that - * struct, if applicable. + * The rationale for adding configuration information to this struct is as + * follows: if it is likely that more than one DMA slave controllers in + * the world will support the configuration option, then make it generic. + * If not: if it is fixed so that it be sent in static from the platform + * data, then prefer to do that. */ struct dma_slave_config { enum dma_transfer_direction direction; -- cgit v1.2.3 From a90902531a06a030a252a07fbff7f45a189a64fe Mon Sep 17 00:00:00 2001 From: William Roberts Date: Tue, 11 Feb 2014 10:11:59 -0800 Subject: mm: Create utility function for accessing a tasks commandline value introduce get_cmdline() for retreiving the value of a processes proc/self/cmdline value. Acked-by: David Rientjes Acked-by: Stephen Smalley Acked-by: Richard Guy Briggs Signed-off-by: William Roberts Signed-off-by: Eric Paris --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 35527173cf50..01e797031993 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1134,6 +1134,7 @@ void account_page_writeback(struct page *page); int set_page_dirty(struct page *page); int set_page_dirty_lock(struct page *page); int clear_page_dirty_for_io(struct page *page); +int get_cmdline(struct task_struct *task, char *buffer, int buflen); /* Is the vma a continuation of the stack vma above it? */ static inline int vma_growsdown(struct vm_area_struct *vma, unsigned long addr) -- cgit v1.2.3 From 45126da22452ac3d4685401a1e921a39ac0ff2f6 Mon Sep 17 00:00:00 2001 From: Sonic Zhang Date: Tue, 28 Jan 2014 16:55:21 +0800 Subject: i2c: bfin-twi: move bits macros and structs in header from arch include to generic include The ADI TWI peripheral is not binding to the Blackfin processor only. The bits macros and structs should be put in the generic include header. And update head file path in drivers accordingly. Signed-off-by: Sonic Zhang Signed-off-by: Wolfram Sang --- include/linux/i2c/bfin_twi.h | 145 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 145 insertions(+) create mode 100644 include/linux/i2c/bfin_twi.h (limited to 'include/linux') diff --git a/include/linux/i2c/bfin_twi.h b/include/linux/i2c/bfin_twi.h new file mode 100644 index 000000000000..135a4e0876ae --- /dev/null +++ b/include/linux/i2c/bfin_twi.h @@ -0,0 +1,145 @@ +/* + * i2c-bfin-twi.h - interface to ADI TWI controller + * + * Copyright 2005-2014 Analog Devices Inc. + * + * Licensed under the GPL-2 or later. + */ + +#ifndef __I2C_BFIN_TWI_H__ +#define __I2C_BFIN_TWI_H__ + +#include +#include + +/* + * ADI twi registers layout + */ +struct bfin_twi_regs { + u16 clkdiv; + u16 dummy1; + u16 control; + u16 dummy2; + u16 slave_ctl; + u16 dummy3; + u16 slave_stat; + u16 dummy4; + u16 slave_addr; + u16 dummy5; + u16 master_ctl; + u16 dummy6; + u16 master_stat; + u16 dummy7; + u16 master_addr; + u16 dummy8; + u16 int_stat; + u16 dummy9; + u16 int_mask; + u16 dummy10; + u16 fifo_ctl; + u16 dummy11; + u16 fifo_stat; + u16 dummy12; + u32 __pad[20]; + u16 xmt_data8; + u16 dummy13; + u16 xmt_data16; + u16 dummy14; + u16 rcv_data8; + u16 dummy15; + u16 rcv_data16; + u16 dummy16; +}; + +struct bfin_twi_iface { + int irq; + spinlock_t lock; + char read_write; + u8 command; + u8 *transPtr; + int readNum; + int writeNum; + int cur_mode; + int manual_stop; + int result; + struct i2c_adapter adap; + struct completion complete; + struct i2c_msg *pmsg; + int msg_num; + int cur_msg; + u16 saved_clkdiv; + u16 saved_control; + struct bfin_twi_regs __iomem *regs_base; +}; + +/* ******************** TWO-WIRE INTERFACE (TWI) MASKS ********************/ +/* TWI_CLKDIV Macros (Use: *pTWI_CLKDIV = CLKLOW(x)|CLKHI(y); ) */ +#define CLKLOW(x) ((x) & 0xFF) /* Periods Clock Is Held Low */ +#define CLKHI(y) (((y)&0xFF)<<0x8) /* Periods Before New Clock Low */ + +/* TWI_PRESCALE Masks */ +#define PRESCALE 0x007F /* SCLKs Per Internal Time Reference (10MHz) */ +#define TWI_ENA 0x0080 /* TWI Enable */ +#define SCCB 0x0200 /* SCCB Compatibility Enable */ + +/* TWI_SLAVE_CTL Masks */ +#define SEN 0x0001 /* Slave Enable */ +#define SADD_LEN 0x0002 /* Slave Address Length */ +#define STDVAL 0x0004 /* Slave Transmit Data Valid */ +#define NAK 0x0008 /* NAK Generated At Conclusion Of Transfer */ +#define GEN 0x0010 /* General Call Address Matching Enabled */ + +/* TWI_SLAVE_STAT Masks */ +#define SDIR 0x0001 /* Slave Transfer Direction (RX/TX*) */ +#define GCALL 0x0002 /* General Call Indicator */ + +/* TWI_MASTER_CTL Masks */ +#define MEN 0x0001 /* Master Mode Enable */ +#define MADD_LEN 0x0002 /* Master Address Length */ +#define MDIR 0x0004 /* Master Transmit Direction (RX/TX*) */ +#define FAST 0x0008 /* Use Fast Mode Timing Specs */ +#define STOP 0x0010 /* Issue Stop Condition */ +#define RSTART 0x0020 /* Repeat Start or Stop* At End Of Transfer */ +#define DCNT 0x3FC0 /* Data Bytes To Transfer */ +#define SDAOVR 0x4000 /* Serial Data Override */ +#define SCLOVR 0x8000 /* Serial Clock Override */ + +/* TWI_MASTER_STAT Masks */ +#define MPROG 0x0001 /* Master Transfer In Progress */ +#define LOSTARB 0x0002 /* Lost Arbitration Indicator (Xfer Aborted) */ +#define ANAK 0x0004 /* Address Not Acknowledged */ +#define DNAK 0x0008 /* Data Not Acknowledged */ +#define BUFRDERR 0x0010 /* Buffer Read Error */ +#define BUFWRERR 0x0020 /* Buffer Write Error */ +#define SDASEN 0x0040 /* Serial Data Sense */ +#define SCLSEN 0x0080 /* Serial Clock Sense */ +#define BUSBUSY 0x0100 /* Bus Busy Indicator */ + +/* TWI_INT_SRC and TWI_INT_ENABLE Masks */ +#define SINIT 0x0001 /* Slave Transfer Initiated */ +#define SCOMP 0x0002 /* Slave Transfer Complete */ +#define SERR 0x0004 /* Slave Transfer Error */ +#define SOVF 0x0008 /* Slave Overflow */ +#define MCOMP 0x0010 /* Master Transfer Complete */ +#define MERR 0x0020 /* Master Transfer Error */ +#define XMTSERV 0x0040 /* Transmit FIFO Service */ +#define RCVSERV 0x0080 /* Receive FIFO Service */ + +/* TWI_FIFO_CTRL Masks */ +#define XMTFLUSH 0x0001 /* Transmit Buffer Flush */ +#define RCVFLUSH 0x0002 /* Receive Buffer Flush */ +#define XMTINTLEN 0x0004 /* Transmit Buffer Interrupt Length */ +#define RCVINTLEN 0x0008 /* Receive Buffer Interrupt Length */ + +/* TWI_FIFO_STAT Masks */ +#define XMTSTAT 0x0003 /* Transmit FIFO Status */ +#define XMT_EMPTY 0x0000 /* Transmit FIFO Empty */ +#define XMT_HALF 0x0001 /* Transmit FIFO Has 1 Byte To Write */ +#define XMT_FULL 0x0003 /* Transmit FIFO Full (2 Bytes To Write) */ + +#define RCVSTAT 0x000C /* Receive FIFO Status */ +#define RCV_EMPTY 0x0000 /* Receive FIFO Empty */ +#define RCV_HALF 0x0004 /* Receive FIFO Has 1 Byte To Read */ +#define RCV_FULL 0x000C /* Receive FIFO Full (2 Bytes To Read) */ + +#endif -- cgit v1.2.3 From cd5006db1b6b9128d1f5b0f1ad17cbced9d3841c Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Fri, 14 Feb 2014 12:01:30 +0530 Subject: i2c: s3c2410: Trivial cleanup in header file Commit 436d42c61c3e ("ARM: samsung: move platform_data definitions") moved the files to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also change the header file protection macros appropriately. Signed-off-by: Sachin Kamat Signed-off-by: Wolfram Sang --- include/linux/platform_data/i2c-s3c2410.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/i2c-s3c2410.h b/include/linux/platform_data/i2c-s3c2410.h index 2a50048c1c44..05af66b840b9 100644 --- a/include/linux/platform_data/i2c-s3c2410.h +++ b/include/linux/platform_data/i2c-s3c2410.h @@ -1,5 +1,4 @@ -/* arch/arm/plat-s3c/include/plat/iic.h - * +/* * Copyright 2004-2009 Simtec Electronics * Ben Dooks * @@ -10,8 +9,8 @@ * published by the Free Software Foundation. */ -#ifndef __ASM_ARCH_IIC_H -#define __ASM_ARCH_IIC_H __FILE__ +#ifndef __I2C_S3C2410_H +#define __I2C_S3C2410_H __FILE__ #define S3C_IICFLG_FILTER (1<<0) /* enable s3c2440 filter */ @@ -76,4 +75,4 @@ extern void s3c_i2c7_cfg_gpio(struct platform_device *dev); extern struct s3c2410_platform_i2c default_i2c_data; -#endif /* __ASM_ARCH_IIC_H */ +#endif /* __I2C_S3C2410_H */ -- cgit v1.2.3 From f02ea4e6a47d50a38f5baadbe87f5087dd337db0 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Mon, 13 Jan 2014 14:27:12 +0800 Subject: mtd: nand: kill the the NAND_MAX_PAGESIZE/NAND_MAX_OOBSIZE for nand_buffers{} The patch converts the arrays to buffer pointers for nand_buffers{}. The cafe_nand.c is the only NAND_OWN_BUFFERS user which allocates nand_buffers{} itself. This patch disables the DMA for nand_scan_ident, and restores the DMA status after we finish the nand_scan_ident. This way, we can get page size and OOB size and use them to allocate cafe->dmabuf. Since the cafe_nand.c uses the NAND_ECC_HW_SYNDROME ECC mode, we do not allocate the buffers for @ecccalc and @ecccode. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 12 ++++++------ 1 file changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 32f8612469d8..520ebca11f5d 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -435,17 +435,17 @@ struct nand_ecc_ctrl { /** * struct nand_buffers - buffer structure for read/write - * @ecccalc: buffer for calculated ECC - * @ecccode: buffer for ECC read from flash - * @databuf: buffer for data - dynamically sized + * @ecccalc: buffer pointer for calculated ECC, size is oobsize. + * @ecccode: buffer pointer for ECC read from flash, size is oobsize. + * @databuf: buffer pointer for data, size is (page size + oobsize). * * Do not change the order of buffers. databuf and oobrbuf must be in * consecutive order. */ struct nand_buffers { - uint8_t ecccalc[NAND_MAX_OOBSIZE]; - uint8_t ecccode[NAND_MAX_OOBSIZE]; - uint8_t databuf[NAND_MAX_PAGESIZE + NAND_MAX_OOBSIZE]; + uint8_t *ecccalc; + uint8_t *ecccode; + uint8_t *databuf; }; /** -- cgit v1.2.3 From 3f172cbdfbb799e35cc972fc9f7e43d0e971577b Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Sat, 21 Dec 2013 00:02:30 +0800 Subject: mtd: nand: remove the NAND_MAX_PAGESIZE/NAND_MAX_OOBSIZE There is no reference to these two macros now. Just remove them. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 520ebca11f5d..a719686c9cce 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -51,14 +51,6 @@ extern int nand_unlock(struct mtd_info *mtd, loff_t ofs, uint64_t len); /* The maximum number of NAND chips in an array */ #define NAND_MAX_CHIPS 8 -/* - * This constant declares the max. oobsize / page, which - * is supported now. If you add a chip with bigger oobsize/page - * adjust this accordingly. - */ -#define NAND_MAX_OOBSIZE 744 -#define NAND_MAX_PAGESIZE 8192 - /* * Constants for hardware specific CLE/ALE/NCE function * -- cgit v1.2.3 From 3dad2344e92c6e1aeae42df1c4824f307c51bcc7 Mon Sep 17 00:00:00 2001 From: Brian Norris Date: Wed, 29 Jan 2014 14:08:12 -0800 Subject: mtd: nand: force NAND_CMD_READID onto 8-bit bus The NAND command helpers tend to automatically shift the column address for x16 bus devices, since most commands expect a word address, not a byte address. The Read ID command, however, expects an 8-bit address (i.e., 0x00, 0x20, or 0x40 should not be translated to 0x00, 0x10, or 0x20). This fixes the column address for a few drivers which imitate the nand_base defaults. Note that I don't touch sh_flctl.c, since it already handles this problem slightly differently (note its comment "READID is always performed using an 8-bit bus"). I have not tested this patch, as I only have x8 parts up for testing at this point. Hopefully that can change soon... Signed-off-by: Brian Norris Tested-by: Ezequiel Garcia Tested-By: Pekon Gupta --- include/linux/mtd/nand.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index a719686c9cce..c034dc4224cb 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -832,4 +832,14 @@ static inline bool nand_is_slc(struct nand_chip *chip) { return chip->bits_per_cell == 1; } + +/** + * Check if the opcode's address should be sent only on the lower 8 bits + * @command: opcode to check + */ +static inline int nand_opcode_8bits(unsigned int command) +{ + return command == NAND_CMD_READID; +} + #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From 91e165030deedc612c153dfeaba294d49632ade3 Mon Sep 17 00:00:00 2001 From: Sachin Kamat Date: Fri, 14 Feb 2014 12:16:38 +0530 Subject: mtd: nand: s3c2410: Trivial cleanup in header file Commit 436d42c61c3e ("ARM: samsung: move platform_data definitions") moved the files to the current location but forgot to remove the pointer to its previous location. Clean it up. While at it also add the header file protection macros appropriately. Signed-off-by: Sachin Kamat Signed-off-by: Brian Norris --- include/linux/platform_data/mtd-nand-s3c2410.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/mtd-nand-s3c2410.h b/include/linux/platform_data/mtd-nand-s3c2410.h index b64115fa93a4..36bb92172f47 100644 --- a/include/linux/platform_data/mtd-nand-s3c2410.h +++ b/include/linux/platform_data/mtd-nand-s3c2410.h @@ -1,5 +1,4 @@ -/* arch/arm/mach-s3c2410/include/mach/nand.h - * +/* * Copyright (c) 2004 Simtec Electronics * Ben Dooks * @@ -10,6 +9,9 @@ * published by the Free Software Foundation. */ +#ifndef __MTD_NAND_S3C2410_H +#define __MTD_NAND_S3C2410_H + /** * struct s3c2410_nand_set - define a set of one or more nand chips * @disable_ecc: Entirely disable ECC - Dangerous @@ -65,3 +67,5 @@ struct s3c2410_platform_nand { * it with the s3c_device_nand. This allows @nand to be __initdata. */ extern void s3c_nand_set_platdata(struct s3c2410_platform_nand *nand); + +#endif /*__MTD_NAND_S3C2410_H */ -- cgit v1.2.3 From afbfff03d611de22b1ec7127ad56920e02936d5e Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 21 Feb 2014 13:39:37 +0800 Subject: mtd: nand: add the data structures for JEDEC parameter page Create the nand_jedec_params{} and jedec_ecc_info{} according to the JESD230A (Revision of JESD230, October 2012). Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 75 ++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index c034dc4224cb..588f8a4a27af 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -342,6 +342,81 @@ struct nand_onfi_vendor_micron { u8 param_revision; } __packed; +struct jedec_ecc_info { + u8 ecc_bits; + u8 codeword_size; + __le16 bb_per_lun; + __le16 block_endurance; + u8 reserved[2]; +} __packed; + +struct nand_jedec_params { + /* rev info and features block */ + /* 'J' 'E' 'S' 'D' */ + u8 sig[4]; + __le16 revision; + __le16 features; + u8 opt_cmd[3]; + __le16 sec_cmd; + u8 num_of_param_pages; + u8 reserved0[18]; + + /* manufacturer information block */ + char manufacturer[12]; + char model[20]; + u8 jedec_id[6]; + u8 reserved1[10]; + + /* memory organization block */ + __le32 byte_per_page; + __le16 spare_bytes_per_page; + u8 reserved2[6]; + __le32 pages_per_block; + __le32 blocks_per_lun; + u8 lun_count; + u8 addr_cycles; + u8 bits_per_cell; + u8 programs_per_page; + u8 multi_plane_addr; + u8 multi_plane_op_attr; + u8 reserved3[38]; + + /* electrical parameter block */ + __le16 async_sdr_speed_grade; + __le16 toggle_ddr_speed_grade; + __le16 sync_ddr_speed_grade; + u8 async_sdr_features; + u8 toggle_ddr_features; + u8 sync_ddr_features; + __le16 t_prog; + __le16 t_bers; + __le16 t_r; + __le16 t_r_multi_plane; + __le16 t_ccs; + __le16 io_pin_capacitance_typ; + __le16 input_pin_capacitance_typ; + __le16 clk_pin_capacitance_typ; + u8 driver_strength_support; + __le16 t_ald; + u8 reserved4[36]; + + /* ECC and endurance block */ + u8 guaranteed_good_blocks; + __le16 guaranteed_block_endurance; + struct jedec_ecc_info ecc_info[4]; + u8 reserved5[29]; + + /* reserved */ + u8 reserved6[148]; + + /* vendor */ + __le16 vendor_rev_num; + u8 reserved7[88]; + + /* CRC for Parameter Page */ + __le16 crc; +} __packed; + /** * struct nand_hw_control - Control structure for hardware controller (e.g ECC generator) shared among independent devices * @lock: protection lock -- cgit v1.2.3 From d94abba7605d3c15123eb3b331a1872ef17d29e0 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 21 Feb 2014 13:39:38 +0800 Subject: mtd: nand: add fields for JEDEC in nand_chip Add the jedec_version field, and add an anonymous union which contains the nand_onfi_params and nand_jedec_params. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 588f8a4a27af..f9af7564118a 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -590,8 +590,12 @@ struct nand_buffers { * @subpagesize: [INTERN] holds the subpagesize * @onfi_version: [INTERN] holds the chip ONFI version (BCD encoded), * non 0 if ONFI supported. + * @jedec_version: [INTERN] holds the chip JEDEC version (BCD encoded), + * non 0 if JEDEC supported. * @onfi_params: [INTERN] holds the ONFI page parameter when ONFI is * supported, 0 otherwise. + * @jedec_params: [INTERN] holds the JEDEC parameter page when JEDEC is + * supported, 0 otherwise. * @read_retries: [INTERN] the number of read retry modes supported * @onfi_set_features: [REPLACEABLE] set the features for ONFI nand * @onfi_get_features: [REPLACEABLE] get the features for ONFI nand @@ -664,7 +668,11 @@ struct nand_chip { int badblockbits; int onfi_version; - struct nand_onfi_params onfi_params; + int jedec_version; + union { + struct nand_onfi_params onfi_params; + struct nand_jedec_params jedec_params; + }; int read_retries; -- cgit v1.2.3 From 7852f8962f0f022b11fc56d63de06226a9f70d88 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 21 Feb 2014 13:39:39 +0800 Subject: mtd: nand: add a helper to get the supported features for JEDEC Add a helper to get the supported features for JEDEC compliant NAND. Also add a macro JEDEC_FEATURE_16_BIT_BUS. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index f9af7564118a..afd1cf9b5eaf 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -350,6 +350,9 @@ struct jedec_ecc_info { u8 reserved[2]; } __packed; +/* JEDEC features */ +#define JEDEC_FEATURE_16_BIT_BUS (1 << 0) + struct nand_jedec_params { /* rev info and features block */ /* 'J' 'E' 'S' 'D' */ @@ -925,4 +928,10 @@ static inline int nand_opcode_8bits(unsigned int command) return command == NAND_CMD_READID; } +/* return the supported JEDEC features. */ +static inline int jedec_feature(struct nand_chip *chip) +{ + return chip->jedec_version ? le16_to_cpu(chip->jedec_params.features) + : 0; +} #endif /* __LINUX_MTD_NAND_H */ -- cgit v1.2.3 From 913618185e51ee1fcbc193b2f121a9d072405619 Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 21 Feb 2014 13:39:40 +0800 Subject: mtd: nand: parse out the JEDEC compliant NAND This patch adds the parsing code for the JEDEC compliant NAND. Since we need the 0x40 as the column address, this patch also makes the NAND_CMD_PARAM to use the 8-bit address only. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index afd1cf9b5eaf..aa005e87d161 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -925,7 +925,7 @@ static inline bool nand_is_slc(struct nand_chip *chip) */ static inline int nand_opcode_8bits(unsigned int command) { - return command == NAND_CMD_READID; + return command == NAND_CMD_READID || command == NAND_CMD_PARAM; } /* return the supported JEDEC features. */ -- cgit v1.2.3 From 4b78fc42f3e3f07687dc27efc1153d29e360afa1 Mon Sep 17 00:00:00 2001 From: Christian Riesch Date: Tue, 28 Jan 2014 09:29:44 +0100 Subject: mtd: Add a retlen parameter to _get_{fact,user}_prot_info Signed-off-by: Christian Riesch Cc: Artem Bityutskiy Signed-off-by: Brian Norris --- include/linux/mtd/mtd.h | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mtd/mtd.h b/include/linux/mtd/mtd.h index 8cc0e2fb6894..a1b0b4c8fd79 100644 --- a/include/linux/mtd/mtd.h +++ b/include/linux/mtd/mtd.h @@ -204,12 +204,12 @@ struct mtd_info { struct mtd_oob_ops *ops); int (*_write_oob) (struct mtd_info *mtd, loff_t to, struct mtd_oob_ops *ops); - int (*_get_fact_prot_info) (struct mtd_info *mtd, struct otp_info *buf, - size_t len); + int (*_get_fact_prot_info) (struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *buf); int (*_read_fact_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); - int (*_get_user_prot_info) (struct mtd_info *mtd, struct otp_info *buf, - size_t len); + int (*_get_user_prot_info) (struct mtd_info *mtd, size_t len, + size_t *retlen, struct otp_info *buf); int (*_read_user_prot_reg) (struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); int (*_write_user_prot_reg) (struct mtd_info *mtd, loff_t to, @@ -278,12 +278,12 @@ static inline int mtd_write_oob(struct mtd_info *mtd, loff_t to, return mtd->_write_oob(mtd, to, ops); } -int mtd_get_fact_prot_info(struct mtd_info *mtd, struct otp_info *buf, - size_t len); +int mtd_get_fact_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, + struct otp_info *buf); int mtd_read_fact_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); -int mtd_get_user_prot_info(struct mtd_info *mtd, struct otp_info *buf, - size_t len); +int mtd_get_user_prot_info(struct mtd_info *mtd, size_t len, size_t *retlen, + struct otp_info *buf); int mtd_read_user_prot_reg(struct mtd_info *mtd, loff_t from, size_t len, size_t *retlen, u_char *buf); int mtd_write_user_prot_reg(struct mtd_info *mtd, loff_t to, size_t len, -- cgit v1.2.3 From 6d9434ebb76157071164b32ad03fbed165c74382 Mon Sep 17 00:00:00 2001 From: Ezequiel Garcia Date: Mon, 24 Feb 2014 19:24:48 -0300 Subject: of_mtd: Add helpers to get ECC strength and ECC step size This commit adds simple helpers to obtain the devicetree properties that specify the ECC strength and ECC step size to use on a given NAND controller. Acked-by: Boris BREZILLON Signed-off-by: Ezequiel Garcia Signed-off-by: Brian Norris --- include/linux/of_mtd.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/of_mtd.h b/include/linux/of_mtd.h index cb32d9c1e8dc..e266caa36402 100644 --- a/include/linux/of_mtd.h +++ b/include/linux/of_mtd.h @@ -13,6 +13,8 @@ #include int of_get_nand_ecc_mode(struct device_node *np); +int of_get_nand_ecc_step_size(struct device_node *np); +int of_get_nand_ecc_strength(struct device_node *np); int of_get_nand_bus_width(struct device_node *np); bool of_get_nand_on_flash_bbt(struct device_node *np); @@ -23,6 +25,16 @@ static inline int of_get_nand_ecc_mode(struct device_node *np) return -ENOSYS; } +static inline int of_get_nand_ecc_step_size(struct device_node *np) +{ + return -ENOSYS; +} + +static inline int of_get_nand_ecc_strength(struct device_node *np) +{ + return -ENOSYS; +} + static inline int of_get_nand_bus_width(struct device_node *np) { return -ENOSYS; -- cgit v1.2.3 From e004debdadf1a661dcd24e2a654e56b0a113e29e Mon Sep 17 00:00:00 2001 From: Huang Shijie Date: Fri, 3 Jan 2014 11:01:40 +0800 Subject: mtd: nand: add "page" argument for read_subpage hook Add the "page" argument for the read_subpage hook. With this argument, the implementation of this hook could prints out more accurate information for debugging. Signed-off-by: Huang Shijie Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index aa005e87d161..0747fef2bfc6 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -488,7 +488,7 @@ struct nand_ecc_ctrl { int (*read_page)(struct mtd_info *mtd, struct nand_chip *chip, uint8_t *buf, int oob_required, int page); int (*read_subpage)(struct mtd_info *mtd, struct nand_chip *chip, - uint32_t offs, uint32_t len, uint8_t *buf); + uint32_t offs, uint32_t len, uint8_t *buf, int page); int (*write_subpage)(struct mtd_info *mtd, struct nand_chip *chip, uint32_t offset, uint32_t data_len, const uint8_t *data_buf, int oob_required); -- cgit v1.2.3 From 21bdd17b21b45ea48e06e23918d681afbe0622e9 Mon Sep 17 00:00:00 2001 From: Tom Gundersen Date: Mon, 3 Feb 2014 11:14:13 +1030 Subject: module: allow multiple calls to MODULE_DEVICE_TABLE() per module Commit 78551277e4df5: "Input: i8042 - add PNP modaliases" had a bug, where the second call to MODULE_DEVICE_TABLE() overrode the first resulting in not all the modaliases being exposed. This fixes the problem by including the name of the device_id table in the __mod_*_device_table alias, allowing us to export several device_id tables per module. Suggested-by: Kay Sievers Acked-by: Greg Kroah-Hartman Cc: Dmitry Torokhov Signed-off-by: Tom Gundersen Signed-off-by: Rusty Russell --- include/linux/module.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/module.h b/include/linux/module.h index eaf60ff9ba94..ad18f6006f86 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -142,7 +142,7 @@ extern const struct gtype##_id __mod_##gtype##_table \ #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) #define MODULE_DEVICE_TABLE(type, name) \ - MODULE_GENERIC_TABLE(type##_device, name) + MODULE_GENERIC_TABLE(type##__##name##_device, name) /* Version of form [:][-]. * Or for CVS/RCS ID version, everything but the number is stripped. -- cgit v1.2.3 From cff26a51da5d206d3baf871e75778da44710219d Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 3 Feb 2014 11:15:13 +1030 Subject: module: remove MODULE_GENERIC_TABLE MODULE_DEVICE_TABLE() calles MODULE_GENERIC_TABLE(); make it do the work directly. This also removes a wart introduced in the last patch, where the alias is defined to be an unknown struct type "struct type##__##name##_device_id" instead of "struct type##_device_id" (it's an extern so GCC doesn't care, but it's wrong). The other user of MODULE_GENERIC_TABLE (ISAPNP_CARD_TABLE) is unused, so delete it. Signed-off-by: Rusty Russell --- include/linux/isapnp.h | 4 ---- include/linux/module.h | 19 ++++++++----------- 2 files changed, 8 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/isapnp.h b/include/linux/isapnp.h index e2d28b026a8c..3c77bf9b1efd 100644 --- a/include/linux/isapnp.h +++ b/include/linux/isapnp.h @@ -56,10 +56,6 @@ #define ISAPNP_DEVICE_ID(_va, _vb, _vc, _function) \ { .vendor = ISAPNP_VENDOR(_va, _vb, _vc), .function = ISAPNP_FUNCTION(_function) } -/* export used IDs outside module */ -#define ISAPNP_CARD_TABLE(name) \ - MODULE_GENERIC_TABLE(isapnp_card, name) - struct isapnp_card_id { unsigned long driver_data; /* data private to the driver */ unsigned short card_vendor, card_device; diff --git a/include/linux/module.h b/include/linux/module.h index ad18f6006f86..5686b37e11d6 100644 --- a/include/linux/module.h +++ b/include/linux/module.h @@ -82,15 +82,6 @@ void sort_extable(struct exception_table_entry *start, void sort_main_extable(void); void trim_init_extable(struct module *m); -#ifdef MODULE -#define MODULE_GENERIC_TABLE(gtype, name) \ -extern const struct gtype##_id __mod_##gtype##_table \ - __attribute__ ((unused, alias(__stringify(name)))) - -#else /* !MODULE */ -#define MODULE_GENERIC_TABLE(gtype, name) -#endif - /* Generic info of form tag = "info" */ #define MODULE_INFO(tag, info) __MODULE_INFO(tag, tag, info) @@ -141,8 +132,14 @@ extern const struct gtype##_id __mod_##gtype##_table \ /* What your module does. */ #define MODULE_DESCRIPTION(_description) MODULE_INFO(description, _description) -#define MODULE_DEVICE_TABLE(type, name) \ - MODULE_GENERIC_TABLE(type##__##name##_device, name) +#ifdef MODULE +/* Creates an alias so file2alias.c can find device table. */ +#define MODULE_DEVICE_TABLE(type, name) \ + extern const struct type##_device_id __mod_##type##__##name##_device_table \ + __attribute__ ((unused, alias(__stringify(name)))) +#else /* !MODULE */ +#define MODULE_DEVICE_TABLE(type, name) +#endif /* Version of form [:][-]. * Or for CVS/RCS ID version, everything but the number is stripped. -- cgit v1.2.3 From 66cc69e34e86a231fbe68d8918c6119e3b7549a3 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Thu, 13 Mar 2014 12:11:30 +1030 Subject: Fix: module signature vs tracepoints: add new TAINT_UNSIGNED_MODULE Users have reported being unable to trace non-signed modules loaded within a kernel supporting module signature. This is caused by tracepoint.c:tracepoint_module_coming() refusing to take into account tracepoints sitting within force-loaded modules (TAINT_FORCED_MODULE). The reason for this check, in the first place, is that a force-loaded module may have a struct module incompatible with the layout expected by the kernel, and can thus cause a kernel crash upon forced load of that module on a kernel with CONFIG_TRACEPOINTS=y. Tracepoints, however, specifically accept TAINT_OOT_MODULE and TAINT_CRAP, since those modules do not lead to the "very likely system crash" issue cited above for force-loaded modules. With kernels having CONFIG_MODULE_SIG=y (signed modules), a non-signed module is tainted re-using the TAINT_FORCED_MODULE taint flag. Unfortunately, this means that Tracepoints treat that module as a force-loaded module, and thus silently refuse to consider any tracepoint within this module. Since an unsigned module does not fit within the "very likely system crash" category of tainting, add a new TAINT_UNSIGNED_MODULE taint flag to specifically address this taint behavior, and accept those modules within Tracepoints. We use the letter 'X' as a taint flag character for a module being loaded that doesn't know how to sign its name (proposed by Steven Rostedt). Also add the missing 'O' entry to trace event show_module_flags() list for the sake of completeness. Signed-off-by: Mathieu Desnoyers Acked-by: Steven Rostedt NAKed-by: Ingo Molnar CC: Thomas Gleixner CC: David Howells CC: Greg Kroah-Hartman Signed-off-by: Rusty Russell --- include/linux/kernel.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 196d1ea86df0..471090093c67 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -469,6 +469,7 @@ extern enum system_states { #define TAINT_CRAP 10 #define TAINT_FIRMWARE_WORKAROUND 11 #define TAINT_OOT_MODULE 12 +#define TAINT_UNSIGNED_MODULE 13 extern const char hex_asc[]; #define hex_asc_lo(x) hex_asc[((x) & 0x0f)] -- cgit v1.2.3 From 2d123f463669cb7b84b56aa00e073ce07fe7aff2 Mon Sep 17 00:00:00 2001 From: Daniel Vetter Date: Wed, 22 Jan 2014 18:26:16 +0100 Subject: drm/docs: Include hdmi infoframe helper reference Thierry created such nice kerneldocs, it's a shame we've left them lingering! For the fun of it also add a bit of kerneldoc to the header so that we can also include that. Just in case someone adds kerneldoc in there. Cc: Thierry Reding Reviewed-by: Alex Deucher Signed-off-by: Daniel Vetter --- include/linux/hdmi.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/hdmi.h b/include/linux/hdmi.h index 9231be9e90a2..11c0182a153b 100644 --- a/include/linux/hdmi.h +++ b/include/linux/hdmi.h @@ -262,6 +262,18 @@ union hdmi_vendor_any_infoframe { struct hdmi_vendor_infoframe hdmi; }; +/** + * union hdmi_infoframe - overall union of all abstract infoframe representations + * @any: generic infoframe + * @avi: avi infoframe + * @spd: spd infoframe + * @vendor: union of all vendor infoframes + * @audio: audio infoframe + * + * This is used by the generic pack function. This works since all infoframes + * have the same header which also indicates which type of infoframe should be + * packed. + */ union hdmi_infoframe { struct hdmi_any_infoframe any; struct hdmi_avi_infoframe avi; -- cgit v1.2.3 From 0283f9a529c81e64bafea80d6d3e056d1c3f656d Mon Sep 17 00:00:00 2001 From: Mark Charlebois Date: Mon, 17 Mar 2014 13:18:26 +1030 Subject: module: LLVMLinux: Remove unused function warning from __param_check macro This code makes a compile time type check that is optimized away. Clang complains that it generates an unused function: linux/kernel/panic.c:471:1: warning: unused function '__check_panic' [-Wunused-function] core_param(panic, panic_timeout, int, 0644); ^ linux/moduleparam.h:283:2: note: expanded from macro 'core_param' param_check_##type(name, &(var)); \ ^ :87:1: note: expanded from here param_check_int ^ linux/moduleparam.h:369:34: note: expanded from macro 'param_check_int' #define param_check_int(name, p) __param_check(name, p, int) ^ linux/moduleparam.h:349:22: note: expanded from macro '__param_check' static inline type *__check_##name(void) { return(p); } ^ :88:1: note: expanded from here __check_panic GCC won't complain for a static inline function but would if it was just a static function. Adding the unused attribute to the function declaration removes the warning. Per request from Rusty Russell it is marked as __always_unused as the code is meant to be optimized away. This code works for both GCC and clang. Signed-off-by: Mark Charlebois Signed-off-by: Behan Webster Signed-off-by: Rusty Russell --- include/linux/moduleparam.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index c3eb102a9cc8..175f6995d1af 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -346,7 +346,7 @@ static inline void destroy_params(const struct kernel_param *params, /* The macros to do compile-time type checking stolen from Jakub Jelinek, who IIRC came up with this idea for the 2.4 module init code. */ #define __param_check(name, p, type) \ - static inline type *__check_##name(void) { return(p); } + static inline type __always_unused *__check_##name(void) { return(p); } extern struct kernel_param_ops param_ops_byte; extern int param_set_byte(const char *val, const struct kernel_param *kp); -- cgit v1.2.3 From 740a221ef0e579dc7c675cf6b90f5313509788f7 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Mon, 10 Mar 2014 15:02:41 +0200 Subject: mmc: slot-gpio: Add GPIO descriptor based CD GPIO API Add functions to request a CD GPIO using the GPIO descriptor API. Note that the new request function is paired with mmc_gpiod_free_cd() not mmc_gpio_free_cd(). Note also that it must be called prior to mmc_add_host() otherwise the caller must also call mmc_gpiod_request_cd_irq(). Signed-off-by: Adrian Hunter Reviewed-by: Linus Walleij Signed-off-by: Chris Ball --- include/linux/mmc/slot-gpio.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/slot-gpio.h b/include/linux/mmc/slot-gpio.h index b0c73e4cacea..d2433381e828 100644 --- a/include/linux/mmc/slot-gpio.h +++ b/include/linux/mmc/slot-gpio.h @@ -22,4 +22,10 @@ int mmc_gpio_request_cd(struct mmc_host *host, unsigned int gpio, unsigned int debounce); void mmc_gpio_free_cd(struct mmc_host *host); +int mmc_gpiod_request_cd(struct mmc_host *host, const char *con_id, + unsigned int idx, bool override_active_level, + unsigned int debounce); +void mmc_gpiod_free_cd(struct mmc_host *host); +void mmc_gpiod_request_cd_irq(struct mmc_host *host); + #endif -- cgit v1.2.3 From 96f9d8c0740264c5e2975361389ff2c21f2c5a4d Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Mar 2014 07:06:54 -0400 Subject: nfs: abstract out code needed to complete a sillyrename The async rename code is currently "polluted" with some parts that are really just for sillyrenames. Add a new "complete" operation vector to the nfs_renamedata to separate out the stuff that just needs to be done for a sillyrename. Signed-off-by: Jeff Layton Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index b2fb167b2e6d..0534184b65ce 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1397,6 +1397,7 @@ struct nfs_renamedata { struct inode *new_dir; struct dentry *new_dentry; struct nfs_fattr new_fattr; + void (*complete)(struct rpc_task *, struct nfs_renamedata *); }; struct nfs_access_entry; -- cgit v1.2.3 From 0e862a405185b28e775eeeae6b04bfa39724b1ad Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Mar 2014 07:06:55 -0400 Subject: nfs: make nfs_async_rename non-static ...and move the prototype for nfs_sillyrename to internal.h. Signed-off-by: Jeff Layton Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/nfs_fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index f55a90bed0b4..fa6918b0f829 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -511,7 +511,6 @@ extern void nfs_complete_unlink(struct dentry *dentry, struct inode *); extern void nfs_wait_on_sillyrename(struct dentry *dentry); extern void nfs_block_sillyrename(struct dentry *dentry); extern void nfs_unblock_sillyrename(struct dentry *dentry); -extern int nfs_sillyrename(struct inode *dir, struct dentry *dentry); /* * linux/fs/nfs/write.c -- cgit v1.2.3 From 33912be816d96e204ed7a93690552daa39c08ea9 Mon Sep 17 00:00:00 2001 From: Jeff Layton Date: Mon, 17 Mar 2014 07:06:57 -0400 Subject: nfs: remove synchronous rename code Now that nfs_rename uses the async infrastructure, we can remove this. Signed-off-by: Jeff Layton Tested-by: Anna Schumaker Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 0534184b65ce..ad88a0a30a18 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -1440,8 +1440,6 @@ struct nfs_rpc_ops { void (*unlink_setup) (struct rpc_message *, struct inode *dir); void (*unlink_rpc_prepare) (struct rpc_task *, struct nfs_unlinkdata *); int (*unlink_done) (struct rpc_task *, struct inode *); - int (*rename) (struct inode *, struct qstr *, - struct inode *, struct qstr *); void (*rename_setup) (struct rpc_message *msg, struct inode *dir); void (*rename_rpc_prepare)(struct rpc_task *task, struct nfs_renamedata *); int (*rename_done) (struct rpc_task *task, struct inode *old_dir, struct inode *new_dir); -- cgit v1.2.3 From 99be0245c8869cbd7b9c0ab3f093f41c60362f91 Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 16 Mar 2014 02:43:27 +0100 Subject: mfd: twl4030-madc: Cleanup driver Some style fixes in twl4030-madc driver. Reported-by: Jonathan Cameron Reported-by: Lee Jones Signed-off-by: Sebastian Reichel Acked-by: Jonathan Cameron Tested-by: Marek Belisko Signed-off-by: Lee Jones --- include/linux/i2c/twl4030-madc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/i2c/twl4030-madc.h b/include/linux/i2c/twl4030-madc.h index 01f595107048..1c0134dd3271 100644 --- a/include/linux/i2c/twl4030-madc.h +++ b/include/linux/i2c/twl4030-madc.h @@ -44,7 +44,7 @@ struct twl4030_madc_conversion_method { struct twl4030_madc_request { unsigned long channels; - u16 do_avg; + bool do_avg; u16 method; u16 type; bool active; -- cgit v1.2.3 From 204dfd63a07ec4785fc786ff3511d85eb1346b7b Mon Sep 17 00:00:00 2001 From: Sebastian Reichel Date: Sun, 16 Mar 2014 02:43:28 +0100 Subject: mfd: twl-core: Add twl_i2c_read/write_u16 Add a simple twl_i2c_read/write_u16 wrapper over the twl_i2c_read/write, which is similar to the twl_i2c_read/write_u8 wrapper. Signed-off-by: Sebastian Reichel Acked-by: Jonathan Cameron Tested-by: Marek Belisko Signed-off-by: Lee Jones --- include/linux/i2c/twl.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/i2c/twl.h b/include/linux/i2c/twl.h index ade1c06d4ceb..d2b16704624c 100644 --- a/include/linux/i2c/twl.h +++ b/include/linux/i2c/twl.h @@ -195,6 +195,18 @@ static inline int twl_i2c_read_u8(u8 mod_no, u8 *val, u8 reg) { return twl_i2c_read(mod_no, val, reg, 1); } +static inline int twl_i2c_write_u16(u8 mod_no, u16 val, u8 reg) { + val = cpu_to_le16(val); + return twl_i2c_write(mod_no, (u8*) &val, reg, 2); +} + +static inline int twl_i2c_read_u16(u8 mod_no, u16 *val, u8 reg) { + int ret; + ret = twl_i2c_read(mod_no, (u8*) val, reg, 2); + *val = le16_to_cpu(*val); + return ret; +} + int twl_get_type(void); int twl_get_version(void); int twl_get_hfclk_rate(void); -- cgit v1.2.3 From 5e98fc8feaa869aa6ca5c73830f0855133eb461e Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 28 Jan 2014 13:18:26 +0100 Subject: mfd: max14577: Remove unused enum max14577_irq_source Remove unused symbol: enum max14577_irq_source. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/max14577-private.h | 8 -------- 1 file changed, 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max14577-private.h b/include/linux/mfd/max14577-private.h index a3d0185196d3..c9b332fb0d5d 100644 --- a/include/linux/mfd/max14577-private.h +++ b/include/linux/mfd/max14577-private.h @@ -248,14 +248,6 @@ enum max14577_charger_reg { /* MAX14577 regulator SFOUT LDO voltage, fixed, uV */ #define MAX14577_REGULATOR_SAFEOUT_VOLTAGE 4900000 -enum max14577_irq_source { - MAX14577_IRQ_INT1 = 0, - MAX14577_IRQ_INT2, - MAX14577_IRQ_INT3, - - MAX14577_IRQ_REGS_NUM, -}; - enum max14577_irq { /* INT1 */ MAX14577_IRQ_INT1_ADC, -- cgit v1.2.3 From ec2b26724f81d58008626228686f47ca3337e7ed Mon Sep 17 00:00:00 2001 From: Krzysztof Kozlowski Date: Tue, 28 Jan 2014 13:18:27 +0100 Subject: mfd: max14577: Remove not needed header inclusion Remove not needed max14577-private.h header inclusion in the main driver header. Remove obvious comment. Signed-off-by: Krzysztof Kozlowski Signed-off-by: Lee Jones --- include/linux/mfd/max14577.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/max14577.h b/include/linux/mfd/max14577.h index 247b021dfaaf..736d39c3ec0d 100644 --- a/include/linux/mfd/max14577.h +++ b/include/linux/mfd/max14577.h @@ -25,13 +25,8 @@ #ifndef __MAX14577_H__ #define __MAX14577_H__ -#include #include -/* - * MAX14577 Regulator - */ - /* MAX14577 regulator IDs */ enum max14577_regulators { MAX14577_SAFEOUT = 0, -- cgit v1.2.3 From 8dbb947af37159d3df202d3cdbbae5fb896c462a Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 22 Jan 2014 13:04:23 +0000 Subject: mfd: wm5102: Make additional DSP registers available to the user Expose some DSP registers which are useful for DSP users to be able to access whilst debugging their firmware. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/arizona/registers.h | 17 +++++++++++++++++ 1 file changed, 17 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index fdf3aa376eb2..6a3b16993c1a 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -1034,6 +1034,23 @@ #define ARIZONA_DSP1_STATUS_1 0x1104 #define ARIZONA_DSP1_STATUS_2 0x1105 #define ARIZONA_DSP1_STATUS_3 0x1106 +#define ARIZONA_DSP1_WDMA_BUFFER_1 0x1110 +#define ARIZONA_DSP1_WDMA_BUFFER_2 0x1111 +#define ARIZONA_DSP1_WDMA_BUFFER_3 0x1112 +#define ARIZONA_DSP1_WDMA_BUFFER_4 0x1113 +#define ARIZONA_DSP1_WDMA_BUFFER_5 0x1114 +#define ARIZONA_DSP1_WDMA_BUFFER_6 0x1115 +#define ARIZONA_DSP1_WDMA_BUFFER_7 0x1116 +#define ARIZONA_DSP1_WDMA_BUFFER_8 0x1117 +#define ARIZONA_DSP1_RDMA_BUFFER_1 0x1120 +#define ARIZONA_DSP1_RDMA_BUFFER_2 0x1121 +#define ARIZONA_DSP1_RDMA_BUFFER_3 0x1122 +#define ARIZONA_DSP1_RDMA_BUFFER_4 0x1123 +#define ARIZONA_DSP1_RDMA_BUFFER_5 0x1124 +#define ARIZONA_DSP1_RDMA_BUFFER_6 0x1125 +#define ARIZONA_DSP1_WDMA_CONFIG_1 0x1130 +#define ARIZONA_DSP1_WDMA_CONFIG_2 0x1131 +#define ARIZONA_DSP1_RDMA_CONFIG_1 0x1134 #define ARIZONA_DSP1_SCRATCH_0 0x1140 #define ARIZONA_DSP1_SCRATCH_1 0x1141 #define ARIZONA_DSP1_SCRATCH_2 0x1142 -- cgit v1.2.3 From 47ec66a17cca571eb7038b7835dff7cbd5851b90 Mon Sep 17 00:00:00 2001 From: Charles Keepax Date: Wed, 22 Jan 2014 13:04:24 +0000 Subject: mfd: wm5110: Make additional DSP registers available to the user Expose some DSP registers which are useful for DSP users to be able to access whilst debugging their firmware. Signed-off-by: Charles Keepax Signed-off-by: Lee Jones --- include/linux/mfd/arizona/registers.h | 67 +++++++++++++++++++++++++++++++++++ 1 file changed, 67 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mfd/arizona/registers.h b/include/linux/mfd/arizona/registers.h index 6a3b16993c1a..90d57aec34e7 100644 --- a/include/linux/mfd/arizona/registers.h +++ b/include/linux/mfd/arizona/registers.h @@ -1034,6 +1034,7 @@ #define ARIZONA_DSP1_STATUS_1 0x1104 #define ARIZONA_DSP1_STATUS_2 0x1105 #define ARIZONA_DSP1_STATUS_3 0x1106 +#define ARIZONA_DSP1_STATUS_4 0x1107 #define ARIZONA_DSP1_WDMA_BUFFER_1 0x1110 #define ARIZONA_DSP1_WDMA_BUFFER_2 0x1111 #define ARIZONA_DSP1_WDMA_BUFFER_3 0x1112 @@ -1050,7 +1051,10 @@ #define ARIZONA_DSP1_RDMA_BUFFER_6 0x1125 #define ARIZONA_DSP1_WDMA_CONFIG_1 0x1130 #define ARIZONA_DSP1_WDMA_CONFIG_2 0x1131 +#define ARIZONA_DSP1_WDMA_OFFSET_1 0x1132 #define ARIZONA_DSP1_RDMA_CONFIG_1 0x1134 +#define ARIZONA_DSP1_RDMA_OFFSET_1 0x1135 +#define ARIZONA_DSP1_EXTERNAL_START_SELECT_1 0x1138 #define ARIZONA_DSP1_SCRATCH_0 0x1140 #define ARIZONA_DSP1_SCRATCH_1 0x1141 #define ARIZONA_DSP1_SCRATCH_2 0x1142 @@ -1060,6 +1064,27 @@ #define ARIZONA_DSP2_STATUS_1 0x1204 #define ARIZONA_DSP2_STATUS_2 0x1205 #define ARIZONA_DSP2_STATUS_3 0x1206 +#define ARIZONA_DSP2_STATUS_4 0x1207 +#define ARIZONA_DSP2_WDMA_BUFFER_1 0x1210 +#define ARIZONA_DSP2_WDMA_BUFFER_2 0x1211 +#define ARIZONA_DSP2_WDMA_BUFFER_3 0x1212 +#define ARIZONA_DSP2_WDMA_BUFFER_4 0x1213 +#define ARIZONA_DSP2_WDMA_BUFFER_5 0x1214 +#define ARIZONA_DSP2_WDMA_BUFFER_6 0x1215 +#define ARIZONA_DSP2_WDMA_BUFFER_7 0x1216 +#define ARIZONA_DSP2_WDMA_BUFFER_8 0x1217 +#define ARIZONA_DSP2_RDMA_BUFFER_1 0x1220 +#define ARIZONA_DSP2_RDMA_BUFFER_2 0x1221 +#define ARIZONA_DSP2_RDMA_BUFFER_3 0x1222 +#define ARIZONA_DSP2_RDMA_BUFFER_4 0x1223 +#define ARIZONA_DSP2_RDMA_BUFFER_5 0x1224 +#define ARIZONA_DSP2_RDMA_BUFFER_6 0x1225 +#define ARIZONA_DSP2_WDMA_CONFIG_1 0x1230 +#define ARIZONA_DSP2_WDMA_CONFIG_2 0x1231 +#define ARIZONA_DSP2_WDMA_OFFSET_1 0x1232 +#define ARIZONA_DSP2_RDMA_CONFIG_1 0x1234 +#define ARIZONA_DSP2_RDMA_OFFSET_1 0x1235 +#define ARIZONA_DSP2_EXTERNAL_START_SELECT_1 0x1238 #define ARIZONA_DSP2_SCRATCH_0 0x1240 #define ARIZONA_DSP2_SCRATCH_1 0x1241 #define ARIZONA_DSP2_SCRATCH_2 0x1242 @@ -1069,6 +1094,27 @@ #define ARIZONA_DSP3_STATUS_1 0x1304 #define ARIZONA_DSP3_STATUS_2 0x1305 #define ARIZONA_DSP3_STATUS_3 0x1306 +#define ARIZONA_DSP3_STATUS_4 0x1307 +#define ARIZONA_DSP3_WDMA_BUFFER_1 0x1310 +#define ARIZONA_DSP3_WDMA_BUFFER_2 0x1311 +#define ARIZONA_DSP3_WDMA_BUFFER_3 0x1312 +#define ARIZONA_DSP3_WDMA_BUFFER_4 0x1313 +#define ARIZONA_DSP3_WDMA_BUFFER_5 0x1314 +#define ARIZONA_DSP3_WDMA_BUFFER_6 0x1315 +#define ARIZONA_DSP3_WDMA_BUFFER_7 0x1316 +#define ARIZONA_DSP3_WDMA_BUFFER_8 0x1317 +#define ARIZONA_DSP3_RDMA_BUFFER_1 0x1320 +#define ARIZONA_DSP3_RDMA_BUFFER_2 0x1321 +#define ARIZONA_DSP3_RDMA_BUFFER_3 0x1322 +#define ARIZONA_DSP3_RDMA_BUFFER_4 0x1323 +#define ARIZONA_DSP3_RDMA_BUFFER_5 0x1324 +#define ARIZONA_DSP3_RDMA_BUFFER_6 0x1325 +#define ARIZONA_DSP3_WDMA_CONFIG_1 0x1330 +#define ARIZONA_DSP3_WDMA_CONFIG_2 0x1331 +#define ARIZONA_DSP3_WDMA_OFFSET_1 0x1332 +#define ARIZONA_DSP3_RDMA_CONFIG_1 0x1334 +#define ARIZONA_DSP3_RDMA_OFFSET_1 0x1335 +#define ARIZONA_DSP3_EXTERNAL_START_SELECT_1 0x1338 #define ARIZONA_DSP3_SCRATCH_0 0x1340 #define ARIZONA_DSP3_SCRATCH_1 0x1341 #define ARIZONA_DSP3_SCRATCH_2 0x1342 @@ -1078,6 +1124,27 @@ #define ARIZONA_DSP4_STATUS_1 0x1404 #define ARIZONA_DSP4_STATUS_2 0x1405 #define ARIZONA_DSP4_STATUS_3 0x1406 +#define ARIZONA_DSP4_STATUS_4 0x1407 +#define ARIZONA_DSP4_WDMA_BUFFER_1 0x1410 +#define ARIZONA_DSP4_WDMA_BUFFER_2 0x1411 +#define ARIZONA_DSP4_WDMA_BUFFER_3 0x1412 +#define ARIZONA_DSP4_WDMA_BUFFER_4 0x1413 +#define ARIZONA_DSP4_WDMA_BUFFER_5 0x1414 +#define ARIZONA_DSP4_WDMA_BUFFER_6 0x1415 +#define ARIZONA_DSP4_WDMA_BUFFER_7 0x1416 +#define ARIZONA_DSP4_WDMA_BUFFER_8 0x1417 +#define ARIZONA_DSP4_RDMA_BUFFER_1 0x1420 +#define ARIZONA_DSP4_RDMA_BUFFER_2 0x1421 +#define ARIZONA_DSP4_RDMA_BUFFER_3 0x1422 +#define ARIZONA_DSP4_RDMA_BUFFER_4 0x1423 +#define ARIZONA_DSP4_RDMA_BUFFER_5 0x1424 +#define ARIZONA_DSP4_RDMA_BUFFER_6 0x1425 +#define ARIZONA_DSP4_WDMA_CONFIG_1 0x1430 +#define ARIZONA_DSP4_WDMA_CONFIG_2 0x1431 +#define ARIZONA_DSP4_WDMA_OFFSET_1 0x1432 +#define ARIZONA_DSP4_RDMA_CONFIG_1 0x1434 +#define ARIZONA_DSP4_RDMA_OFFSET_1 0x1435 +#define ARIZONA_DSP4_EXTERNAL_START_SELECT_1 0x1438 #define ARIZONA_DSP4_SCRATCH_0 0x1440 #define ARIZONA_DSP4_SCRATCH_1 0x1441 #define ARIZONA_DSP4_SCRATCH_2 0x1442 -- cgit v1.2.3 From 44b4dc616365d7897808555d415099330e3af9df Mon Sep 17 00:00:00 2001 From: Keerthy Date: Thu, 6 Feb 2014 11:20:12 +0530 Subject: mfd: tps65218: Add driver for the TPS65218 PMIC The TPS65218 chip is a power management IC for Portable Navigation Systems and Tablet Computing devices. It contains the following components: - Regulators. - Over Temperature warning and Shut down. This patch adds support for tps65218 mfd device. At this time only the regulator functionality is made available. Signed-off-by: Keerthy Signed-off-by: Lee Jones --- include/linux/mfd/tps65218.h | 284 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 284 insertions(+) create mode 100644 include/linux/mfd/tps65218.h (limited to 'include/linux') diff --git a/include/linux/mfd/tps65218.h b/include/linux/mfd/tps65218.h new file mode 100644 index 000000000000..d2e357df5a0e --- /dev/null +++ b/include/linux/mfd/tps65218.h @@ -0,0 +1,284 @@ +/* + * linux/mfd/tps65218.h + * + * Functions to access TPS65219 power management chip. + * + * Copyright (C) 2014 Texas Instruments Incorporated - http://www.ti.com/ + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether expressed or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License version 2 for more details. + */ + +#ifndef __LINUX_MFD_TPS65218_H +#define __LINUX_MFD_TPS65218_H + +#include +#include +#include +#include + +/* TPS chip id list */ +#define TPS65218 0xF0 + +/* I2C ID for TPS65218 part */ +#define TPS65218_I2C_ID 0x24 + +/* All register addresses */ +#define TPS65218_REG_CHIPID 0x00 +#define TPS65218_REG_INT1 0x01 +#define TPS65218_REG_INT2 0x02 +#define TPS65218_REG_INT_MASK1 0x03 +#define TPS65218_REG_INT_MASK2 0x04 +#define TPS65218_REG_STATUS 0x05 +#define TPS65218_REG_CONTROL 0x06 +#define TPS65218_REG_FLAG 0x07 + +#define TPS65218_REG_PASSWORD 0x10 +#define TPS65218_REG_ENABLE1 0x11 +#define TPS65218_REG_ENABLE2 0x12 +#define TPS65218_REG_CONFIG1 0x13 +#define TPS65218_REG_CONFIG2 0x14 +#define TPS65218_REG_CONFIG3 0x15 +#define TPS65218_REG_CONTROL_DCDC1 0x16 +#define TPS65218_REG_CONTROL_DCDC2 0x17 +#define TPS65218_REG_CONTROL_DCDC3 0x18 +#define TPS65218_REG_CONTROL_DCDC4 0x19 +#define TPS65218_REG_CONTRL_SLEW_RATE 0x1A +#define TPS65218_REG_CONTROL_LDO1 0x1B +#define TPS65218_REG_SEQ1 0x20 +#define TPS65218_REG_SEQ2 0x21 +#define TPS65218_REG_SEQ3 0x22 +#define TPS65218_REG_SEQ4 0x23 +#define TPS65218_REG_SEQ5 0x24 +#define TPS65218_REG_SEQ6 0x25 +#define TPS65218_REG_SEQ7 0x26 + +/* Register field definitions */ +#define TPS65218_CHIPID_CHIP_MASK 0xF8 +#define TPS65218_CHIPID_REV_MASK 0x07 + +#define TPS65218_INT1_VPRG BIT(5) +#define TPS65218_INT1_AC BIT(4) +#define TPS65218_INT1_PB BIT(3) +#define TPS65218_INT1_HOT BIT(2) +#define TPS65218_INT1_CC_AQC BIT(1) +#define TPS65218_INT1_PRGC BIT(0) + +#define TPS65218_INT2_LS3_F BIT(5) +#define TPS65218_INT2_LS2_F BIT(4) +#define TPS65218_INT2_LS1_F BIT(3) +#define TPS65218_INT2_LS3_I BIT(2) +#define TPS65218_INT2_LS2_I BIT(1) +#define TPS65218_INT2_LS1_I BIT(0) + +#define TPS65218_INT_MASK1_VPRG BIT(5) +#define TPS65218_INT_MASK1_AC BIT(4) +#define TPS65218_INT_MASK1_PB BIT(3) +#define TPS65218_INT_MASK1_HOT BIT(2) +#define TPS65218_INT_MASK1_CC_AQC BIT(1) +#define TPS65218_INT_MASK1_PRGC BIT(0) + +#define TPS65218_INT_MASK2_LS3_F BIT(5) +#define TPS65218_INT_MASK2_LS2_F BIT(4) +#define TPS65218_INT_MASK2_LS1_F BIT(3) +#define TPS65218_INT_MASK2_LS3_I BIT(2) +#define TPS65218_INT_MASK2_LS2_I BIT(1) +#define TPS65218_INT_MASK2_LS1_I BIT(0) + +#define TPS65218_STATUS_FSEAL BIT(7) +#define TPS65218_STATUS_EE BIT(6) +#define TPS65218_STATUS_AC_STATE BIT(5) +#define TPS65218_STATUS_PB_STATE BIT(4) +#define TPS65218_STATUS_STATE_MASK 0xC +#define TPS65218_STATUS_CC_STAT 0x3 + +#define TPS65218_CONTROL_OFFNPFO BIT(1) +#define TPS65218_CONTROL_CC_AQ BIT(0) + +#define TPS65218_FLAG_GPO3_FLG BIT(7) +#define TPS65218_FLAG_GPO2_FLG BIT(6) +#define TPS65218_FLAG_GPO1_FLG BIT(5) +#define TPS65218_FLAG_LDO1_FLG BIT(4) +#define TPS65218_FLAG_DC4_FLG BIT(3) +#define TPS65218_FLAG_DC3_FLG BIT(2) +#define TPS65218_FLAG_DC2_FLG BIT(1) +#define TPS65218_FLAG_DC1_FLG BIT(0) + +#define TPS65218_ENABLE1_DC6_EN BIT(5) +#define TPS65218_ENABLE1_DC5_EN BIT(4) +#define TPS65218_ENABLE1_DC4_EN BIT(3) +#define TPS65218_ENABLE1_DC3_EN BIT(2) +#define TPS65218_ENABLE1_DC2_EN BIT(1) +#define TPS65218_ENABLE1_DC1_EN BIT(0) + +#define TPS65218_ENABLE2_GPIO3 BIT(6) +#define TPS65218_ENABLE2_GPIO2 BIT(5) +#define TPS65218_ENABLE2_GPIO1 BIT(4) +#define TPS65218_ENABLE2_LS3_EN BIT(3) +#define TPS65218_ENABLE2_LS2_EN BIT(2) +#define TPS65218_ENABLE2_LS1_EN BIT(1) +#define TPS65218_ENABLE2_LDO1_EN BIT(0) + + +#define TPS65218_CONFIG1_TRST BIT(7) +#define TPS65218_CONFIG1_GPO2_BUF BIT(6) +#define TPS65218_CONFIG1_IO1_SEL BIT(5) +#define TPS65218_CONFIG1_PGDLY_MASK 0x18 +#define TPS65218_CONFIG1_STRICT BIT(2) +#define TPS65218_CONFIG1_UVLO_MASK 0x3 + +#define TPS65218_CONFIG2_DC12_RST BIT(7) +#define TPS65218_CONFIG2_UVLOHYS BIT(6) +#define TPS65218_CONFIG2_LS3ILIM_MASK 0xC +#define TPS65218_CONFIG2_LS2ILIM_MASK 0x3 + +#define TPS65218_CONFIG3_LS3NPFO BIT(5) +#define TPS65218_CONFIG3_LS2NPFO BIT(4) +#define TPS65218_CONFIG3_LS1NPFO BIT(3) +#define TPS65218_CONFIG3_LS3DCHRG BIT(2) +#define TPS65218_CONFIG3_LS2DCHRG BIT(1) +#define TPS65218_CONFIG3_LS1DCHRG BIT(0) + +#define TPS65218_CONTROL_DCDC1_PFM BIT(7) +#define TPS65218_CONTROL_DCDC1_MASK 0x7F + +#define TPS65218_CONTROL_DCDC2_PFM BIT(7) +#define TPS65218_CONTROL_DCDC2_MASK 0x3F + +#define TPS65218_CONTROL_DCDC3_PFM BIT(7) +#define TPS65218_CONTROL_DCDC3_MASK 0x3F + +#define TPS65218_CONTROL_DCDC4_PFM BIT(7) +#define TPS65218_CONTROL_DCDC4_MASK 0x3F + +#define TPS65218_SLEW_RATE_GO BIT(7) +#define TPS65218_SLEW_RATE_GODSBL BIT(6) +#define TPS65218_SLEW_RATE_SLEW_MASK 0x7 + +#define TPS65218_CONTROL_LDO1_MASK 0x3F + +#define TPS65218_SEQ1_DLY8 BIT(7) +#define TPS65218_SEQ1_DLY7 BIT(6) +#define TPS65218_SEQ1_DLY6 BIT(5) +#define TPS65218_SEQ1_DLY5 BIT(4) +#define TPS65218_SEQ1_DLY4 BIT(3) +#define TPS65218_SEQ1_DLY3 BIT(2) +#define TPS65218_SEQ1_DLY2 BIT(1) +#define TPS65218_SEQ1_DLY1 BIT(0) + +#define TPS65218_SEQ2_DLYFCTR BIT(7) +#define TPS65218_SEQ2_DLY9 BIT(0) + +#define TPS65218_SEQ3_DC2_SEQ_MASK 0xF0 +#define TPS65218_SEQ3_DC1_SEQ_MASK 0xF + +#define TPS65218_SEQ4_DC4_SEQ_MASK 0xF0 +#define TPS65218_SEQ4_DC3_SEQ_MASK 0xF + +#define TPS65218_SEQ5_DC6_SEQ_MASK 0xF0 +#define TPS65218_SEQ5_DC5_SEQ_MASK 0xF + +#define TPS65218_SEQ6_LS1_SEQ_MASK 0xF0 +#define TPS65218_SEQ6_LDO1_SEQ_MASK 0xF + +#define TPS65218_SEQ7_GPO3_SEQ_MASK 0xF0 +#define TPS65218_SEQ7_GPO1_SEQ_MASK 0xF +#define TPS65218_PROTECT_NONE 0 +#define TPS65218_PROTECT_L1 1 + +enum tps65218_regulator_id { + /* DCDC's */ + TPS65218_DCDC_1, + TPS65218_DCDC_2, + TPS65218_DCDC_3, + TPS65218_DCDC_4, + TPS65218_DCDC_5, + TPS65218_DCDC_6, + /* LDOs */ + TPS65218_LDO_1, +}; + +#define TPS65218_MAX_REG_ID TPS65218_LDO_1 + +/* Number of step-down converters available */ +#define TPS65218_NUM_DCDC 6 +/* Number of LDO voltage regulators available */ +#define TPS65218_NUM_LDO 1 +/* Number of total regulators available */ +#define TPS65218_NUM_REGULATOR (TPS65218_NUM_DCDC + TPS65218_NUM_LDO) + +/* Define the TPS65218 IRQ numbers */ +enum tps65218_irqs { + /* INT1 registers */ + TPS65218_PRGC_IRQ, + TPS65218_CC_AQC_IRQ, + TPS65218_HOT_IRQ, + TPS65218_PB_IRQ, + TPS65218_AC_IRQ, + TPS65218_VPRG_IRQ, + TPS65218_INVALID1_IRQ, + TPS65218_INVALID2_IRQ, + /* INT2 registers */ + TPS65218_LS1_I_IRQ, + TPS65218_LS2_I_IRQ, + TPS65218_LS3_I_IRQ, + TPS65218_LS1_F_IRQ, + TPS65218_LS2_F_IRQ, + TPS65218_LS3_F_IRQ, + TPS65218_INVALID3_IRQ, + TPS65218_INVALID4_IRQ, +}; + +/** + * struct tps_info - packages regulator constraints + * @id: Id of the regulator + * @name: Voltage regulator name + * @min_uV: minimum micro volts + * @max_uV: minimum micro volts + * + * This data is used to check the regualtor voltage limits while setting. + */ +struct tps_info { + int id; + const char *name; + int min_uV; + int max_uV; +}; + +/** + * struct tps65218 - tps65218 sub-driver chip access routines + * + * Device data may be used to access the TPS65218 chip + */ + +struct tps65218 { + struct device *dev; + unsigned int id; + + struct mutex tps_lock; /* lock guarding the data structure */ + /* IRQ Data */ + int irq; + u32 irq_mask; + struct regmap_irq_chip_data *irq_data; + struct regulator_desc desc[TPS65218_NUM_REGULATOR]; + struct regulator_dev *rdev[TPS65218_NUM_REGULATOR]; + struct tps_info *info[TPS65218_NUM_REGULATOR]; + struct regmap *regmap; +}; + +int tps65218_reg_read(struct tps65218 *tps, unsigned int reg, + unsigned int *val); +int tps65218_reg_write(struct tps65218 *tps, unsigned int reg, + unsigned int val, unsigned int level); +int tps65218_set_bits(struct tps65218 *tps, unsigned int reg, + unsigned int mask, unsigned int val, unsigned int level); +int tps65218_clear_bits(struct tps65218 *tps, unsigned int reg, + unsigned int mask, unsigned int level); + +#endif /* __LINUX_MFD_TPS65218_H */ -- cgit v1.2.3 From 730876be256603b4ee7225a125467d97a7ce9060 Mon Sep 17 00:00:00 2001 From: Roger Tseng Date: Wed, 12 Feb 2014 18:00:36 +0800 Subject: mfd: Add realtek USB card reader driver Realtek USB card reader provides a channel to transfer command or data to flash memory cards. This driver exports host instances for mmc and memstick subsystems and handles basic works. Acked-by: Greg Kroah-Hartman Signed-off-by: Roger Tseng Signed-off-by: Lee Jones --- include/linux/mfd/rtsx_usb.h | 628 +++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 628 insertions(+) create mode 100644 include/linux/mfd/rtsx_usb.h (limited to 'include/linux') diff --git a/include/linux/mfd/rtsx_usb.h b/include/linux/mfd/rtsx_usb.h new file mode 100644 index 000000000000..c446e4fd6b5c --- /dev/null +++ b/include/linux/mfd/rtsx_usb.h @@ -0,0 +1,628 @@ +/* Driver for Realtek RTS5139 USB card reader + * + * Copyright(c) 2009-2013 Realtek Semiconductor Corp. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License version 2 + * as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, but + * WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with this program; if not, see . + * + * Author: + * Roger Tseng + */ + +#ifndef __RTSX_USB_H +#define __RTSX_USB_H + +#include + +/* related module names */ +#define RTSX_USB_SD_CARD 0 +#define RTSX_USB_MS_CARD 1 + +/* endpoint numbers */ +#define EP_BULK_OUT 1 +#define EP_BULK_IN 2 +#define EP_INTR_IN 3 + +/* USB vendor requests */ +#define RTSX_USB_REQ_REG_OP 0x00 +#define RTSX_USB_REQ_POLL 0x02 + +/* miscellaneous parameters */ +#define MIN_DIV_N 60 +#define MAX_DIV_N 120 + +#define MAX_PHASE 15 +#define RX_TUNING_CNT 3 + +#define QFN24 0 +#define LQFP48 1 +#define CHECK_PKG(ucr, pkg) ((ucr)->package == (pkg)) + +/* data structures */ +struct rtsx_ucr { + u16 vendor_id; + u16 product_id; + + int package; + u8 ic_version; + bool is_rts5179; + + unsigned int cur_clk; + + u8 *cmd_buf; + unsigned int cmd_idx; + u8 *rsp_buf; + + struct usb_device *pusb_dev; + struct usb_interface *pusb_intf; + struct usb_sg_request current_sg; + unsigned char *iobuf; + dma_addr_t iobuf_dma; + + struct timer_list sg_timer; + struct mutex dev_mutex; +}; + +/* buffer size */ +#define IOBUF_SIZE 1024 + +/* prototypes of exported functions */ +extern int rtsx_usb_get_card_status(struct rtsx_ucr *ucr, u16 *status); + +extern int rtsx_usb_read_register(struct rtsx_ucr *ucr, u16 addr, u8 *data); +extern int rtsx_usb_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, + u8 data); + +extern int rtsx_usb_ep0_write_register(struct rtsx_ucr *ucr, u16 addr, u8 mask, + u8 data); +extern int rtsx_usb_ep0_read_register(struct rtsx_ucr *ucr, u16 addr, + u8 *data); + +extern void rtsx_usb_add_cmd(struct rtsx_ucr *ucr, u8 cmd_type, + u16 reg_addr, u8 mask, u8 data); +extern int rtsx_usb_send_cmd(struct rtsx_ucr *ucr, u8 flag, int timeout); +extern int rtsx_usb_get_rsp(struct rtsx_ucr *ucr, int rsp_len, int timeout); +extern int rtsx_usb_transfer_data(struct rtsx_ucr *ucr, unsigned int pipe, + void *buf, unsigned int len, int use_sg, + unsigned int *act_len, int timeout); + +extern int rtsx_usb_read_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len); +extern int rtsx_usb_write_ppbuf(struct rtsx_ucr *ucr, u8 *buf, int buf_len); +extern int rtsx_usb_switch_clock(struct rtsx_ucr *ucr, unsigned int card_clock, + u8 ssc_depth, bool initial_mode, bool double_clk, bool vpclk); +extern int rtsx_usb_card_exclusive_check(struct rtsx_ucr *ucr, int card); + +/* card status */ +#define SD_CD 0x01 +#define MS_CD 0x02 +#define XD_CD 0x04 +#define CD_MASK (SD_CD | MS_CD | XD_CD) +#define SD_WP 0x08 + +/* reader command field offset & parameters */ +#define READ_REG_CMD 0 +#define WRITE_REG_CMD 1 +#define CHECK_REG_CMD 2 + +#define PACKET_TYPE 4 +#define CNT_H 5 +#define CNT_L 6 +#define STAGE_FLAG 7 +#define CMD_OFFSET 8 +#define SEQ_WRITE_DATA_OFFSET 12 + +#define BATCH_CMD 0 +#define SEQ_READ 1 +#define SEQ_WRITE 2 + +#define STAGE_R 0x01 +#define STAGE_DI 0x02 +#define STAGE_DO 0x04 +#define STAGE_MS_STATUS 0x08 +#define STAGE_XD_STATUS 0x10 +#define MODE_C 0x00 +#define MODE_CR (STAGE_R) +#define MODE_CDIR (STAGE_R | STAGE_DI) +#define MODE_CDOR (STAGE_R | STAGE_DO) + +#define EP0_OP_SHIFT 14 +#define EP0_READ_REG_CMD 2 +#define EP0_WRITE_REG_CMD 3 + +#define rtsx_usb_cmd_hdr_tag(ucr) \ + do { \ + ucr->cmd_buf[0] = 'R'; \ + ucr->cmd_buf[1] = 'T'; \ + ucr->cmd_buf[2] = 'C'; \ + ucr->cmd_buf[3] = 'R'; \ + } while (0) + +static inline void rtsx_usb_init_cmd(struct rtsx_ucr *ucr) +{ + rtsx_usb_cmd_hdr_tag(ucr); + ucr->cmd_idx = 0; + ucr->cmd_buf[PACKET_TYPE] = BATCH_CMD; +} + +/* internal register address */ +#define FPDCTL 0xFC00 +#define SSC_DIV_N_0 0xFC07 +#define SSC_CTL1 0xFC09 +#define SSC_CTL2 0xFC0A +#define CFG_MODE 0xFC0E +#define CFG_MODE_1 0xFC0F +#define RCCTL 0xFC14 +#define SOF_WDOG 0xFC28 +#define SYS_DUMMY0 0xFC30 + +#define MS_BLKEND 0xFD30 +#define MS_READ_START 0xFD31 +#define MS_READ_COUNT 0xFD32 +#define MS_WRITE_START 0xFD33 +#define MS_WRITE_COUNT 0xFD34 +#define MS_COMMAND 0xFD35 +#define MS_OLD_BLOCK_0 0xFD36 +#define MS_OLD_BLOCK_1 0xFD37 +#define MS_NEW_BLOCK_0 0xFD38 +#define MS_NEW_BLOCK_1 0xFD39 +#define MS_LOG_BLOCK_0 0xFD3A +#define MS_LOG_BLOCK_1 0xFD3B +#define MS_BUS_WIDTH 0xFD3C +#define MS_PAGE_START 0xFD3D +#define MS_PAGE_LENGTH 0xFD3E +#define MS_CFG 0xFD40 +#define MS_TPC 0xFD41 +#define MS_TRANS_CFG 0xFD42 +#define MS_TRANSFER 0xFD43 +#define MS_INT_REG 0xFD44 +#define MS_BYTE_CNT 0xFD45 +#define MS_SECTOR_CNT_L 0xFD46 +#define MS_SECTOR_CNT_H 0xFD47 +#define MS_DBUS_H 0xFD48 + +#define CARD_DMA1_CTL 0xFD5C +#define CARD_PULL_CTL1 0xFD60 +#define CARD_PULL_CTL2 0xFD61 +#define CARD_PULL_CTL3 0xFD62 +#define CARD_PULL_CTL4 0xFD63 +#define CARD_PULL_CTL5 0xFD64 +#define CARD_PULL_CTL6 0xFD65 +#define CARD_EXIST 0xFD6F +#define CARD_INT_PEND 0xFD71 + +#define LDO_POWER_CFG 0xFD7B + +#define SD_CFG1 0xFDA0 +#define SD_CFG2 0xFDA1 +#define SD_CFG3 0xFDA2 +#define SD_STAT1 0xFDA3 +#define SD_STAT2 0xFDA4 +#define SD_BUS_STAT 0xFDA5 +#define SD_PAD_CTL 0xFDA6 +#define SD_SAMPLE_POINT_CTL 0xFDA7 +#define SD_PUSH_POINT_CTL 0xFDA8 +#define SD_CMD0 0xFDA9 +#define SD_CMD1 0xFDAA +#define SD_CMD2 0xFDAB +#define SD_CMD3 0xFDAC +#define SD_CMD4 0xFDAD +#define SD_CMD5 0xFDAE +#define SD_BYTE_CNT_L 0xFDAF +#define SD_BYTE_CNT_H 0xFDB0 +#define SD_BLOCK_CNT_L 0xFDB1 +#define SD_BLOCK_CNT_H 0xFDB2 +#define SD_TRANSFER 0xFDB3 +#define SD_CMD_STATE 0xFDB5 +#define SD_DATA_STATE 0xFDB6 +#define SD_VPCLK0_CTL 0xFC2A +#define SD_VPCLK1_CTL 0xFC2B +#define SD_DCMPS0_CTL 0xFC2C +#define SD_DCMPS1_CTL 0xFC2D + +#define CARD_DMA1_CTL 0xFD5C + +#define HW_VERSION 0xFC01 + +#define SSC_CLK_FPGA_SEL 0xFC02 +#define CLK_DIV 0xFC03 +#define SFSM_ED 0xFC04 + +#define CD_DEGLITCH_WIDTH 0xFC20 +#define CD_DEGLITCH_EN 0xFC21 +#define AUTO_DELINK_EN 0xFC23 + +#define FPGA_PULL_CTL 0xFC1D +#define CARD_CLK_SOURCE 0xFC2E + +#define CARD_SHARE_MODE 0xFD51 +#define CARD_DRIVE_SEL 0xFD52 +#define CARD_STOP 0xFD53 +#define CARD_OE 0xFD54 +#define CARD_AUTO_BLINK 0xFD55 +#define CARD_GPIO 0xFD56 +#define SD30_DRIVE_SEL 0xFD57 + +#define CARD_DATA_SOURCE 0xFD5D +#define CARD_SELECT 0xFD5E + +#define CARD_CLK_EN 0xFD79 +#define CARD_PWR_CTL 0xFD7A + +#define OCPCTL 0xFD80 +#define OCPPARA1 0xFD81 +#define OCPPARA2 0xFD82 +#define OCPSTAT 0xFD83 + +#define HS_USB_STAT 0xFE01 +#define HS_VCONTROL 0xFE26 +#define HS_VSTAIN 0xFE27 +#define HS_VLOADM 0xFE28 +#define HS_VSTAOUT 0xFE29 + +#define MC_IRQ 0xFF00 +#define MC_IRQEN 0xFF01 +#define MC_FIFO_CTL 0xFF02 +#define MC_FIFO_BC0 0xFF03 +#define MC_FIFO_BC1 0xFF04 +#define MC_FIFO_STAT 0xFF05 +#define MC_FIFO_MODE 0xFF06 +#define MC_FIFO_RD_PTR0 0xFF07 +#define MC_FIFO_RD_PTR1 0xFF08 +#define MC_DMA_CTL 0xFF10 +#define MC_DMA_TC0 0xFF11 +#define MC_DMA_TC1 0xFF12 +#define MC_DMA_TC2 0xFF13 +#define MC_DMA_TC3 0xFF14 +#define MC_DMA_RST 0xFF15 + +#define RBUF_SIZE_MASK 0xFBFF +#define RBUF_BASE 0xF000 +#define PPBUF_BASE1 0xF800 +#define PPBUF_BASE2 0xFA00 + +/* internal register value macros */ +#define POWER_OFF 0x03 +#define PARTIAL_POWER_ON 0x02 +#define POWER_ON 0x00 +#define POWER_MASK 0x03 +#define LDO3318_PWR_MASK 0x0C +#define LDO_ON 0x00 +#define LDO_SUSPEND 0x08 +#define LDO_OFF 0x0C +#define DV3318_AUTO_PWR_OFF 0x10 +#define FORCE_LDO_POWERB 0x60 + +/* LDO_POWER_CFG */ +#define TUNE_SD18_MASK 0x1C +#define TUNE_SD18_1V7 0x00 +#define TUNE_SD18_1V8 (0x01 << 2) +#define TUNE_SD18_1V9 (0x02 << 2) +#define TUNE_SD18_2V0 (0x03 << 2) +#define TUNE_SD18_2V7 (0x04 << 2) +#define TUNE_SD18_2V8 (0x05 << 2) +#define TUNE_SD18_2V9 (0x06 << 2) +#define TUNE_SD18_3V3 (0x07 << 2) + +/* CLK_DIV */ +#define CLK_CHANGE 0x80 +#define CLK_DIV_1 0x00 +#define CLK_DIV_2 0x01 +#define CLK_DIV_4 0x02 +#define CLK_DIV_8 0x03 + +#define SSC_POWER_MASK 0x01 +#define SSC_POWER_DOWN 0x01 +#define SSC_POWER_ON 0x00 + +#define FPGA_VER 0x80 +#define HW_VER_MASK 0x0F + +#define EXTEND_DMA1_ASYNC_SIGNAL 0x02 + +/* CFG_MODE*/ +#define XTAL_FREE 0x80 +#define CLK_MODE_MASK 0x03 +#define CLK_MODE_12M_XTAL 0x00 +#define CLK_MODE_NON_XTAL 0x01 +#define CLK_MODE_24M_OSC 0x02 +#define CLK_MODE_48M_OSC 0x03 + +/* CFG_MODE_1*/ +#define RTS5179 0x02 + +#define NYET_EN 0x01 +#define NYET_MSAK 0x01 + +#define SD30_DRIVE_MASK 0x07 +#define SD20_DRIVE_MASK 0x03 + +#define DISABLE_SD_CD 0x08 +#define DISABLE_MS_CD 0x10 +#define DISABLE_XD_CD 0x20 +#define SD_CD_DEGLITCH_EN 0x01 +#define MS_CD_DEGLITCH_EN 0x02 +#define XD_CD_DEGLITCH_EN 0x04 + +#define CARD_SHARE_LQFP48 0x04 +#define CARD_SHARE_QFN24 0x00 +#define CARD_SHARE_LQFP_SEL 0x04 +#define CARD_SHARE_XD 0x00 +#define CARD_SHARE_SD 0x01 +#define CARD_SHARE_MS 0x02 +#define CARD_SHARE_MASK 0x03 + + +/* SD30_DRIVE_SEL */ +#define DRIVER_TYPE_A 0x05 +#define DRIVER_TYPE_B 0x03 +#define DRIVER_TYPE_C 0x02 +#define DRIVER_TYPE_D 0x01 + +/* SD_BUS_STAT */ +#define SD_CLK_TOGGLE_EN 0x80 +#define SD_CLK_FORCE_STOP 0x40 +#define SD_DAT3_STATUS 0x10 +#define SD_DAT2_STATUS 0x08 +#define SD_DAT1_STATUS 0x04 +#define SD_DAT0_STATUS 0x02 +#define SD_CMD_STATUS 0x01 + +/* SD_PAD_CTL */ +#define SD_IO_USING_1V8 0x80 +#define SD_IO_USING_3V3 0x7F +#define TYPE_A_DRIVING 0x00 +#define TYPE_B_DRIVING 0x01 +#define TYPE_C_DRIVING 0x02 +#define TYPE_D_DRIVING 0x03 + +/* CARD_CLK_EN */ +#define SD_CLK_EN 0x04 +#define MS_CLK_EN 0x08 + +/* CARD_SELECT */ +#define SD_MOD_SEL 2 +#define MS_MOD_SEL 3 + +/* CARD_SHARE_MODE */ +#define CARD_SHARE_LQFP48 0x04 +#define CARD_SHARE_QFN24 0x00 +#define CARD_SHARE_LQFP_SEL 0x04 +#define CARD_SHARE_XD 0x00 +#define CARD_SHARE_SD 0x01 +#define CARD_SHARE_MS 0x02 +#define CARD_SHARE_MASK 0x03 + +/* SSC_CTL1 */ +#define SSC_RSTB 0x80 +#define SSC_8X_EN 0x40 +#define SSC_FIX_FRAC 0x20 +#define SSC_SEL_1M 0x00 +#define SSC_SEL_2M 0x08 +#define SSC_SEL_4M 0x10 +#define SSC_SEL_8M 0x18 + +/* SSC_CTL2 */ +#define SSC_DEPTH_MASK 0x03 +#define SSC_DEPTH_DISALBE 0x00 +#define SSC_DEPTH_2M 0x01 +#define SSC_DEPTH_1M 0x02 +#define SSC_DEPTH_512K 0x03 + +/* SD_VPCLK0_CTL */ +#define PHASE_CHANGE 0x80 +#define PHASE_NOT_RESET 0x40 + +/* SD_TRANSFER */ +#define SD_TRANSFER_START 0x80 +#define SD_TRANSFER_END 0x40 +#define SD_STAT_IDLE 0x20 +#define SD_TRANSFER_ERR 0x10 +#define SD_TM_NORMAL_WRITE 0x00 +#define SD_TM_AUTO_WRITE_3 0x01 +#define SD_TM_AUTO_WRITE_4 0x02 +#define SD_TM_AUTO_READ_3 0x05 +#define SD_TM_AUTO_READ_4 0x06 +#define SD_TM_CMD_RSP 0x08 +#define SD_TM_AUTO_WRITE_1 0x09 +#define SD_TM_AUTO_WRITE_2 0x0A +#define SD_TM_NORMAL_READ 0x0C +#define SD_TM_AUTO_READ_1 0x0D +#define SD_TM_AUTO_READ_2 0x0E +#define SD_TM_AUTO_TUNING 0x0F + +/* SD_CFG1 */ +#define SD_CLK_DIVIDE_0 0x00 +#define SD_CLK_DIVIDE_256 0xC0 +#define SD_CLK_DIVIDE_128 0x80 +#define SD_CLK_DIVIDE_MASK 0xC0 +#define SD_BUS_WIDTH_1BIT 0x00 +#define SD_BUS_WIDTH_4BIT 0x01 +#define SD_BUS_WIDTH_8BIT 0x02 +#define SD_ASYNC_FIFO_RST 0x10 +#define SD_20_MODE 0x00 +#define SD_DDR_MODE 0x04 +#define SD_30_MODE 0x08 + +/* SD_CFG2 */ +#define SD_CALCULATE_CRC7 0x00 +#define SD_NO_CALCULATE_CRC7 0x80 +#define SD_CHECK_CRC16 0x00 +#define SD_NO_CHECK_CRC16 0x40 +#define SD_WAIT_CRC_TO_EN 0x20 +#define SD_WAIT_BUSY_END 0x08 +#define SD_NO_WAIT_BUSY_END 0x00 +#define SD_CHECK_CRC7 0x00 +#define SD_NO_CHECK_CRC7 0x04 +#define SD_RSP_LEN_0 0x00 +#define SD_RSP_LEN_6 0x01 +#define SD_RSP_LEN_17 0x02 +#define SD_RSP_TYPE_R0 0x04 +#define SD_RSP_TYPE_R1 0x01 +#define SD_RSP_TYPE_R1b 0x09 +#define SD_RSP_TYPE_R2 0x02 +#define SD_RSP_TYPE_R3 0x05 +#define SD_RSP_TYPE_R4 0x05 +#define SD_RSP_TYPE_R5 0x01 +#define SD_RSP_TYPE_R6 0x01 +#define SD_RSP_TYPE_R7 0x01 + +/* SD_STAT1 */ +#define SD_CRC7_ERR 0x80 +#define SD_CRC16_ERR 0x40 +#define SD_CRC_WRITE_ERR 0x20 +#define SD_CRC_WRITE_ERR_MASK 0x1C +#define GET_CRC_TIME_OUT 0x02 +#define SD_TUNING_COMPARE_ERR 0x01 + +/* SD_DATA_STATE */ +#define SD_DATA_IDLE 0x80 + +/* CARD_DATA_SOURCE */ +#define PINGPONG_BUFFER 0x01 +#define RING_BUFFER 0x00 + +/* CARD_OE */ +#define SD_OUTPUT_EN 0x04 +#define MS_OUTPUT_EN 0x08 + +/* CARD_STOP */ +#define SD_STOP 0x04 +#define MS_STOP 0x08 +#define SD_CLR_ERR 0x40 +#define MS_CLR_ERR 0x80 + +/* CARD_CLK_SOURCE */ +#define CRC_FIX_CLK (0x00 << 0) +#define CRC_VAR_CLK0 (0x01 << 0) +#define CRC_VAR_CLK1 (0x02 << 0) +#define SD30_FIX_CLK (0x00 << 2) +#define SD30_VAR_CLK0 (0x01 << 2) +#define SD30_VAR_CLK1 (0x02 << 2) +#define SAMPLE_FIX_CLK (0x00 << 4) +#define SAMPLE_VAR_CLK0 (0x01 << 4) +#define SAMPLE_VAR_CLK1 (0x02 << 4) + +/* SD_SAMPLE_POINT_CTL */ +#define DDR_FIX_RX_DAT 0x00 +#define DDR_VAR_RX_DAT 0x80 +#define DDR_FIX_RX_DAT_EDGE 0x00 +#define DDR_FIX_RX_DAT_14_DELAY 0x40 +#define DDR_FIX_RX_CMD 0x00 +#define DDR_VAR_RX_CMD 0x20 +#define DDR_FIX_RX_CMD_POS_EDGE 0x00 +#define DDR_FIX_RX_CMD_14_DELAY 0x10 +#define SD20_RX_POS_EDGE 0x00 +#define SD20_RX_14_DELAY 0x08 +#define SD20_RX_SEL_MASK 0x08 + +/* SD_PUSH_POINT_CTL */ +#define DDR_FIX_TX_CMD_DAT 0x00 +#define DDR_VAR_TX_CMD_DAT 0x80 +#define DDR_FIX_TX_DAT_14_TSU 0x00 +#define DDR_FIX_TX_DAT_12_TSU 0x40 +#define DDR_FIX_TX_CMD_NEG_EDGE 0x00 +#define DDR_FIX_TX_CMD_14_AHEAD 0x20 +#define SD20_TX_NEG_EDGE 0x00 +#define SD20_TX_14_AHEAD 0x10 +#define SD20_TX_SEL_MASK 0x10 +#define DDR_VAR_SDCLK_POL_SWAP 0x01 + +/* MS_CFG */ +#define SAMPLE_TIME_RISING 0x00 +#define SAMPLE_TIME_FALLING 0x80 +#define PUSH_TIME_DEFAULT 0x00 +#define PUSH_TIME_ODD 0x40 +#define NO_EXTEND_TOGGLE 0x00 +#define EXTEND_TOGGLE_CHK 0x20 +#define MS_BUS_WIDTH_1 0x00 +#define MS_BUS_WIDTH_4 0x10 +#define MS_BUS_WIDTH_8 0x18 +#define MS_2K_SECTOR_MODE 0x04 +#define MS_512_SECTOR_MODE 0x00 +#define MS_TOGGLE_TIMEOUT_EN 0x00 +#define MS_TOGGLE_TIMEOUT_DISEN 0x01 +#define MS_NO_CHECK_INT 0x02 + +/* MS_TRANS_CFG */ +#define WAIT_INT 0x80 +#define NO_WAIT_INT 0x00 +#define NO_AUTO_READ_INT_REG 0x00 +#define AUTO_READ_INT_REG 0x40 +#define MS_CRC16_ERR 0x20 +#define MS_RDY_TIMEOUT 0x10 +#define MS_INT_CMDNK 0x08 +#define MS_INT_BREQ 0x04 +#define MS_INT_ERR 0x02 +#define MS_INT_CED 0x01 + +/* MS_TRANSFER */ +#define MS_TRANSFER_START 0x80 +#define MS_TRANSFER_END 0x40 +#define MS_TRANSFER_ERR 0x20 +#define MS_BS_STATE 0x10 +#define MS_TM_READ_BYTES 0x00 +#define MS_TM_NORMAL_READ 0x01 +#define MS_TM_WRITE_BYTES 0x04 +#define MS_TM_NORMAL_WRITE 0x05 +#define MS_TM_AUTO_READ 0x08 +#define MS_TM_AUTO_WRITE 0x0C +#define MS_TM_SET_CMD 0x06 +#define MS_TM_COPY_PAGE 0x07 +#define MS_TM_MULTI_READ 0x02 +#define MS_TM_MULTI_WRITE 0x03 + +/* MC_FIFO_CTL */ +#define FIFO_FLUSH 0x01 + +/* MC_DMA_RST */ +#define DMA_RESET 0x01 + +/* MC_DMA_CTL */ +#define DMA_TC_EQ_0 0x80 +#define DMA_DIR_TO_CARD 0x00 +#define DMA_DIR_FROM_CARD 0x02 +#define DMA_EN 0x01 +#define DMA_128 (0 << 2) +#define DMA_256 (1 << 2) +#define DMA_512 (2 << 2) +#define DMA_1024 (3 << 2) +#define DMA_PACK_SIZE_MASK 0x0C + +/* CARD_INT_PEND */ +#define XD_INT 0x10 +#define MS_INT 0x08 +#define SD_INT 0x04 + +/* LED operations*/ +static inline int rtsx_usb_turn_on_led(struct rtsx_ucr *ucr) +{ + return rtsx_usb_ep0_write_register(ucr, CARD_GPIO, 0x03, 0x02); +} + +static inline int rtsx_usb_turn_off_led(struct rtsx_ucr *ucr) +{ + return rtsx_usb_ep0_write_register(ucr, CARD_GPIO, 0x03, 0x03); +} + +/* HW error clearing */ +static inline void rtsx_usb_clear_fsm_err(struct rtsx_ucr *ucr) +{ + rtsx_usb_ep0_write_register(ucr, SFSM_ED, 0xf8, 0xf8); +} + +static inline void rtsx_usb_clear_dma_err(struct rtsx_ucr *ucr) +{ + rtsx_usb_ep0_write_register(ucr, MC_FIFO_CTL, + FIFO_FLUSH, FIFO_FLUSH); + rtsx_usb_ep0_write_register(ucr, MC_DMA_RST, DMA_RESET, DMA_RESET); +} +#endif /* __RTS51139_H */ -- cgit v1.2.3 From c1d12c784c49980e4cbe57b7e6cc14b406449099 Mon Sep 17 00:00:00 2001 From: "Opensource [Steve Twiss]" Date: Fri, 14 Feb 2014 14:08:11 +0000 Subject: mfd: da9063: Add support for production silicon variant code Add the correct silicon variant code ID (0x5) to the driver. This new code is the 'production' variant code ID for DA9063. This patch will remove the older variant code ID which matches the pre-production silicon ID (0x3) for the DA9063 chip. There is also some small amount of correction done in this patch: it splits the revision code and correctly names it according to the hardware specification and moves the dev_info() call before the variant ID test. Signed-off-by: Opensource [Steve Twiss] Signed-off-by: Lee Jones --- include/linux/mfd/da9063/core.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9063/core.h b/include/linux/mfd/da9063/core.h index 2d2a0af675fd..00a9aac5d1e8 100644 --- a/include/linux/mfd/da9063/core.h +++ b/include/linux/mfd/da9063/core.h @@ -33,6 +33,10 @@ enum da9063_models { PMIC_DA9063 = 0x61, }; +enum da9063_variant_codes { + PMIC_DA9063_BB = 0x5 +}; + /* Interrupts */ enum da9063_irqs { DA9063_IRQ_ONKEY = 0, @@ -72,7 +76,7 @@ struct da9063 { /* Device */ struct device *dev; unsigned short model; - unsigned short revision; + unsigned char variant_code; unsigned int flags; /* Control interface */ -- cgit v1.2.3 From 6cec365e3eba3dd8c864056d8d3fd9e73ab8dd7a Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Fri, 14 Feb 2014 15:01:53 +0100 Subject: mfd: lpc_ich: Convert ICH GPIOs IDs to enum All those IDs are arbitrary and so can be encapsulated into an enumeration. Signed-off-by: Vincent Donnefort Signed-off-by: Lee Jones --- include/linux/mfd/lpc_ich.h | 16 +++++++++------- 1 file changed, 9 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index 3e1df644c407..293b06209b9c 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -31,13 +31,15 @@ #define ICH_RES_GPE0 1 /* GPIO compatibility */ -#define ICH_I3100_GPIO 0x401 -#define ICH_V5_GPIO 0x501 -#define ICH_V6_GPIO 0x601 -#define ICH_V7_GPIO 0x701 -#define ICH_V9_GPIO 0x801 -#define ICH_V10CORP_GPIO 0xa01 -#define ICH_V10CONS_GPIO 0xa11 +enum { + ICH_I3100_GPIO, + ICH_V5_GPIO, + ICH_V6_GPIO, + ICH_V7_GPIO, + ICH_V9_GPIO, + ICH_V10CORP_GPIO, + ICH_V10CONS_GPIO, +}; struct lpc_ich_info { char name[32]; -- cgit v1.2.3 From facd9939403cb5769190054a600474399e776e3a Mon Sep 17 00:00:00 2001 From: Vincent Donnefort Date: Fri, 14 Feb 2014 15:01:54 +0100 Subject: mfd: lpc_ich: Add support for Intel Avoton GPIOs Signed-off-by: Vincent Donnefort Signed-off-by: Lee Jones --- include/linux/mfd/lpc_ich.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index 293b06209b9c..b2364dd026a6 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -39,6 +39,7 @@ enum { ICH_V9_GPIO, ICH_V10CORP_GPIO, ICH_V10CONS_GPIO, + AVOTON_GPIO, }; struct lpc_ich_info { -- cgit v1.2.3 From 16263f2879afecb7772f4bae238bca66352ecffe Mon Sep 17 00:00:00 2001 From: "Opensource [Anthony Olech]" Date: Wed, 19 Feb 2014 16:32:46 +0000 Subject: mfd: da9052: Extend support to a new chip Add the hash define for the new variant of the DA9053 PMIC called BC. Signed-off-by: Anthony Olech Signed-off-by: Lee Jones --- include/linux/mfd/da9052/da9052.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mfd/da9052/da9052.h b/include/linux/mfd/da9052/da9052.h index 21e21b81cc75..bba65f51a0b5 100644 --- a/include/linux/mfd/da9052/da9052.h +++ b/include/linux/mfd/da9052/da9052.h @@ -83,6 +83,7 @@ enum da9052_chip_id { DA9053_AA, DA9053_BA, DA9053_BB, + DA9053_BC, }; struct da9052_pdata; -- cgit v1.2.3 From bc866fc7a8c4322de40b694ffcfcdda50ab82f35 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 26 Feb 2014 10:59:19 -0800 Subject: mfd: pm8xxx: Move pm8xxx-irq.c contents into only driver that uses it The pm8xxx-irq.c code is practically mandatory given that the pm8921-core driver will WARN about it missing and the Kconfig marks it as default y when a PM8xxx chips is enabled. The only reason the file was split out was because we planned to support other pm8xxx chips with different pm8xxx-core.c files. Now that we have DT on ARM this isn't necessary because we should be able to support all the ssbi based PM8xxx chips in one driver and one file with no data bloat. Let's move this code into the only driver that uses it right now (pm8921) so that it's always compiled when needed. In the future we can rename pm8921-core.c to something more generic. Signed-off-by: Stephen Boyd Signed-off-by: Lee Jones --- include/linux/mfd/pm8xxx/irq.h | 23 ----------------------- 1 file changed, 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/pm8xxx/irq.h b/include/linux/mfd/pm8xxx/irq.h index f83d6b43ecbb..1a11b02383f0 100644 --- a/include/linux/mfd/pm8xxx/irq.h +++ b/include/linux/mfd/pm8xxx/irq.h @@ -33,27 +33,4 @@ struct pm8xxx_irq_platform_data { int irq_trigger_flag; }; -struct pm_irq_chip; - -#ifdef CONFIG_MFD_PM8XXX_IRQ -int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq); -struct pm_irq_chip *pm8xxx_irq_init(struct device *dev, - const struct pm8xxx_irq_platform_data *pdata); -int pm8xxx_irq_exit(struct pm_irq_chip *chip); -#else -static inline int pm8xxx_get_irq_stat(struct pm_irq_chip *chip, int irq) -{ - return -ENXIO; -} -static inline struct pm_irq_chip *pm8xxx_irq_init( - const struct device *dev, - const struct pm8xxx_irq_platform_data *pdata) -{ - return ERR_PTR(-ENXIO); -} -static inline int pm8xxx_irq_exit(struct pm_irq_chip *chip) -{ - return -ENXIO; -} -#endif /* CONFIG_MFD_PM8XXX_IRQ */ #endif /* __MFD_PM8XXX_IRQ_H */ -- cgit v1.2.3 From dc1a95ccaa1158948bbc6648d6dadc534a30ed92 Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 26 Feb 2014 10:59:21 -0800 Subject: mfd: pm8921: Migrate to irqdomains Convert this driver to use irqdomains so that the PMIC's child devices can be converted to devicetree. Acked-by: Lee Jones Signed-off-by: Stephen Boyd --- include/linux/mfd/pm8xxx/irq.h | 36 ------------------------------------ include/linux/mfd/pm8xxx/pm8921.h | 30 ------------------------------ 2 files changed, 66 deletions(-) delete mode 100644 include/linux/mfd/pm8xxx/irq.h delete mode 100644 include/linux/mfd/pm8xxx/pm8921.h (limited to 'include/linux') diff --git a/include/linux/mfd/pm8xxx/irq.h b/include/linux/mfd/pm8xxx/irq.h deleted file mode 100644 index 1a11b02383f0..000000000000 --- a/include/linux/mfd/pm8xxx/irq.h +++ /dev/null @@ -1,36 +0,0 @@ -/* - * Copyright (c) 2011, Code Aurora Forum. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ -/* - * Qualcomm PMIC irq 8xxx driver header file - * - */ - -#ifndef __MFD_PM8XXX_IRQ_H -#define __MFD_PM8XXX_IRQ_H - -#include -#include - -struct pm8xxx_irq_core_data { - u32 rev; - int nirqs; -}; - -struct pm8xxx_irq_platform_data { - int irq_base; - struct pm8xxx_irq_core_data irq_cdata; - int devirq; - int irq_trigger_flag; -}; - -#endif /* __MFD_PM8XXX_IRQ_H */ diff --git a/include/linux/mfd/pm8xxx/pm8921.h b/include/linux/mfd/pm8xxx/pm8921.h deleted file mode 100644 index 00fa3de7659d..000000000000 --- a/include/linux/mfd/pm8xxx/pm8921.h +++ /dev/null @@ -1,30 +0,0 @@ -/* - * Copyright (c) 2011, Code Aurora Forum. All rights reserved. - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 and - * only version 2 as published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - */ -/* - * Qualcomm PMIC 8921 driver header file - * - */ - -#ifndef __MFD_PM8921_H -#define __MFD_PM8921_H - -#include - -#define PM8921_NR_IRQS 256 - -struct pm8921_platform_data { - int irq_base; - struct pm8xxx_irq_platform_data *irq_pdata; -}; - -#endif -- cgit v1.2.3 From 559c04f6f116075a1ebead411666c7ab47dcc7fe Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Wed, 26 Feb 2014 10:59:22 -0800 Subject: mfd: ssbi: Add regmap read/write helpers Add read and write helper functions that the pm8921-core driver can use to read and write ssbi registers via a "no-bus" regmap. Signed-off-by: Stephen Boyd Signed-off-by: Lee Jones --- include/linux/ssbi.h | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ssbi.h b/include/linux/ssbi.h index bcbb642a7641..087b08a4d333 100644 --- a/include/linux/ssbi.h +++ b/include/linux/ssbi.h @@ -20,4 +20,24 @@ int ssbi_write(struct device *dev, u16 addr, const u8 *buf, int len); int ssbi_read(struct device *dev, u16 addr, u8 *buf, int len); +static inline int +ssbi_reg_read(void *context, unsigned int reg, unsigned int *val) +{ + int ret; + u8 v; + + ret = ssbi_read(context, reg, &v, 1); + if (!ret) + *val = v; + + return ret; +} + +static inline int +ssbi_reg_write(void *context, unsigned int reg, unsigned int val) +{ + u8 v = val; + return ssbi_write(context, reg, &v, 1); +} + #endif -- cgit v1.2.3 From 9fc2b9ca99428b54e6bbfd2d7bf0ccc1c594f1f5 Mon Sep 17 00:00:00 2001 From: "Opensource [Steve Twiss]" Date: Thu, 6 Mar 2014 16:40:02 +0000 Subject: mfd: da9063: Upgrade of register definitions to support production silicon This patch updates the register definitions for DA9063 to support the production silicon variant code ID (0x5). These changes are not backwards compatible with the previous register definitions and can only be used with the production variant of DA9063. Signed-off-by: Opensource [Steve Twiss] Signed-off-by: Lee Jones --- include/linux/mfd/da9063/registers.h | 120 ++++++++++++++++++----------------- 1 file changed, 62 insertions(+), 58 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/da9063/registers.h b/include/linux/mfd/da9063/registers.h index 5834813fb5f3..09a85c699da1 100644 --- a/include/linux/mfd/da9063/registers.h +++ b/include/linux/mfd/da9063/registers.h @@ -17,11 +17,7 @@ #define _DA9063_REG_H #define DA9063_I2C_PAGE_SEL_SHIFT 1 - #define DA9063_EVENT_REG_NUM 4 -#define DA9210_EVENT_REG_NUM 2 -#define DA9063_EXT_EVENT_REG_NUM (DA9063_EVENT_REG_NUM + \ - DA9210_EVENT_REG_NUM) /* Page selection I2C or SPI always in the begining of any page. */ /* Page 0 : I2C access 0x000 - 0x0FF SPI access 0x000 - 0x07F */ @@ -61,9 +57,9 @@ #define DA9063_REG_GPIO_10_11 0x1A #define DA9063_REG_GPIO_12_13 0x1B #define DA9063_REG_GPIO_14_15 0x1C -#define DA9063_REG_GPIO_MODE_0_7 0x1D -#define DA9063_REG_GPIO_MODE_8_15 0x1E -#define DA9063_REG_GPIO_SWITCH_CONT 0x1F +#define DA9063_REG_GPIO_MODE0_7 0x1D +#define DA9063_REG_GPIO_MODE8_15 0x1E +#define DA9063_REG_SWITCH_CONT 0x1F /* Regulator Control Registers */ #define DA9063_REG_BCORE2_CONT 0x20 @@ -83,7 +79,7 @@ #define DA9063_REG_LDO9_CONT 0x2E #define DA9063_REG_LDO10_CONT 0x2F #define DA9063_REG_LDO11_CONT 0x30 -#define DA9063_REG_VIB 0x31 +#define DA9063_REG_SUPPLIES 0x31 #define DA9063_REG_DVC_1 0x32 #define DA9063_REG_DVC_2 0x33 @@ -97,9 +93,9 @@ #define DA9063_REG_ADCIN1_RES 0x3A #define DA9063_REG_ADCIN2_RES 0x3B #define DA9063_REG_ADCIN3_RES 0x3C -#define DA9063_REG_MON1_RES 0x3D -#define DA9063_REG_MON2_RES 0x3E -#define DA9063_REG_MON3_RES 0x3F +#define DA9063_REG_MON_A8_RES 0x3D +#define DA9063_REG_MON_A9_RES 0x3E +#define DA9063_REG_MON_A10_RES 0x3F /* RTC Calendar and Alarm Registers */ #define DA9063_REG_COUNT_S 0x40 @@ -108,15 +104,16 @@ #define DA9063_REG_COUNT_D 0x43 #define DA9063_REG_COUNT_MO 0x44 #define DA9063_REG_COUNT_Y 0x45 -#define DA9063_REG_ALARM_MI 0x46 -#define DA9063_REG_ALARM_H 0x47 -#define DA9063_REG_ALARM_D 0x48 -#define DA9063_REG_ALARM_MO 0x49 -#define DA9063_REG_ALARM_Y 0x4A -#define DA9063_REG_SECOND_A 0x4B -#define DA9063_REG_SECOND_B 0x4C -#define DA9063_REG_SECOND_C 0x4D -#define DA9063_REG_SECOND_D 0x4E +#define DA9063_REG_ALARM_S 0x46 +#define DA9063_REG_ALARM_MI 0x47 +#define DA9063_REG_ALARM_H 0x48 +#define DA9063_REG_ALARM_D 0x49 +#define DA9063_REG_ALARM_MO 0x4A +#define DA9063_REG_ALARM_Y 0x4B +#define DA9063_REG_SECOND_A 0x4C +#define DA9063_REG_SECOND_B 0x4D +#define DA9063_REG_SECOND_C 0x4E +#define DA9063_REG_SECOND_D 0x4F /* Sequencer Control Registers */ #define DA9063_REG_SEQ 0x81 @@ -226,35 +223,37 @@ #define DA9063_REG_CONFIG_J 0x10F #define DA9063_REG_CONFIG_K 0x110 #define DA9063_REG_CONFIG_L 0x111 -#define DA9063_REG_MON_REG_1 0x112 -#define DA9063_REG_MON_REG_2 0x113 -#define DA9063_REG_MON_REG_3 0x114 -#define DA9063_REG_MON_REG_4 0x115 -#define DA9063_REG_MON_REG_5 0x116 -#define DA9063_REG_MON_REG_6 0x117 -#define DA9063_REG_TRIM_CLDR 0x118 - +#define DA9063_REG_CONFIG_M 0x112 +#define DA9063_REG_CONFIG_N 0x113 + +#define DA9063_REG_MON_REG_1 0x114 +#define DA9063_REG_MON_REG_2 0x115 +#define DA9063_REG_MON_REG_3 0x116 +#define DA9063_REG_MON_REG_4 0x117 +#define DA9063_REG_MON_REG_5 0x11E +#define DA9063_REG_MON_REG_6 0x11F +#define DA9063_REG_TRIM_CLDR 0x120 /* General Purpose Registers */ -#define DA9063_REG_GP_ID_0 0x119 -#define DA9063_REG_GP_ID_1 0x11A -#define DA9063_REG_GP_ID_2 0x11B -#define DA9063_REG_GP_ID_3 0x11C -#define DA9063_REG_GP_ID_4 0x11D -#define DA9063_REG_GP_ID_5 0x11E -#define DA9063_REG_GP_ID_6 0x11F -#define DA9063_REG_GP_ID_7 0x120 -#define DA9063_REG_GP_ID_8 0x121 -#define DA9063_REG_GP_ID_9 0x122 -#define DA9063_REG_GP_ID_10 0x123 -#define DA9063_REG_GP_ID_11 0x124 -#define DA9063_REG_GP_ID_12 0x125 -#define DA9063_REG_GP_ID_13 0x126 -#define DA9063_REG_GP_ID_14 0x127 -#define DA9063_REG_GP_ID_15 0x128 -#define DA9063_REG_GP_ID_16 0x129 -#define DA9063_REG_GP_ID_17 0x12A -#define DA9063_REG_GP_ID_18 0x12B -#define DA9063_REG_GP_ID_19 0x12C +#define DA9063_REG_GP_ID_0 0x121 +#define DA9063_REG_GP_ID_1 0x122 +#define DA9063_REG_GP_ID_2 0x123 +#define DA9063_REG_GP_ID_3 0x124 +#define DA9063_REG_GP_ID_4 0x125 +#define DA9063_REG_GP_ID_5 0x126 +#define DA9063_REG_GP_ID_6 0x127 +#define DA9063_REG_GP_ID_7 0x128 +#define DA9063_REG_GP_ID_8 0x129 +#define DA9063_REG_GP_ID_9 0x12A +#define DA9063_REG_GP_ID_10 0x12B +#define DA9063_REG_GP_ID_11 0x12C +#define DA9063_REG_GP_ID_12 0x12D +#define DA9063_REG_GP_ID_13 0x12E +#define DA9063_REG_GP_ID_14 0x12F +#define DA9063_REG_GP_ID_15 0x130 +#define DA9063_REG_GP_ID_16 0x131 +#define DA9063_REG_GP_ID_17 0x132 +#define DA9063_REG_GP_ID_18 0x133 +#define DA9063_REG_GP_ID_19 0x134 /* Chip ID and variant */ #define DA9063_REG_CHIP_ID 0x181 @@ -405,8 +404,10 @@ /* DA9063_REG_CONTROL_B (addr=0x0F) */ #define DA9063_CHG_SEL 0x01 #define DA9063_WATCHDOG_PD 0x02 +#define DA9063_RESET_BLINKING 0x04 #define DA9063_NRES_MODE 0x08 #define DA9063_NONKEY_LOCK 0x10 +#define DA9063_BUCK_SLOWSTART 0x80 /* DA9063_REG_CONTROL_C (addr=0x10) */ #define DA9063_DEBOUNCING_MASK 0x07 @@ -466,6 +467,7 @@ #define DA9063_GPADC_PAUSE 0x02 #define DA9063_PMIF_DIS 0x04 #define DA9063_HS2WIRE_DIS 0x08 +#define DA9063_CLDR_PAUSE 0x10 #define DA9063_BBAT_DIS 0x20 #define DA9063_OUT_32K_PAUSE 0x40 #define DA9063_PMCONT_DIS 0x80 @@ -660,7 +662,7 @@ #define DA9063_GPIO15_TYPE_GPO 0x04 #define DA9063_GPIO15_NO_WAKEUP 0x80 -/* DA9063_REG_GPIO_MODE_0_7 (addr=0x1D) */ +/* DA9063_REG_GPIO_MODE0_7 (addr=0x1D) */ #define DA9063_GPIO0_MODE 0x01 #define DA9063_GPIO1_MODE 0x02 #define DA9063_GPIO2_MODE 0x04 @@ -670,7 +672,7 @@ #define DA9063_GPIO6_MODE 0x40 #define DA9063_GPIO7_MODE 0x80 -/* DA9063_REG_GPIO_MODE_8_15 (addr=0x1E) */ +/* DA9063_REG_GPIO_MODE8_15 (addr=0x1E) */ #define DA9063_GPIO8_MODE 0x01 #define DA9063_GPIO9_MODE 0x02 #define DA9063_GPIO10_MODE 0x04 @@ -702,12 +704,12 @@ #define DA9063_SWITCH_SR_5MV 0x10 #define DA9063_SWITCH_SR_10MV 0x20 #define DA9063_SWITCH_SR_50MV 0x30 -#define DA9063_SWITCH_SR_DIS 0x40 +#define DA9063_CORE_SW_INTERNAL 0x40 #define DA9063_CP_EN_MODE 0x80 /* DA9063_REGL_Bxxxx_CONT common bits (addr=0x20-0x25) */ #define DA9063_BUCK_EN 0x01 -#define DA9063_BUCK_GPI_MASK 0x06 +#define DA9063_BUCK_GPI_MASK 0x06 #define DA9063_BUCK_GPI_OFF 0x00 #define DA9063_BUCK_GPI_GPIO1 0x02 #define DA9063_BUCK_GPI_GPIO2 0x04 @@ -841,25 +843,27 @@ #define DA9063_COUNT_YEAR_MASK 0x3F #define DA9063_MONITOR 0x40 -/* DA9063_REG_ALARM_MI (addr=0x46) */ +/* DA9063_REG_ALARM_S (addr=0x46) */ +#define DA9063_ALARM_S_MASK 0x3F #define DA9063_ALARM_STATUS_ALARM 0x80 #define DA9063_ALARM_STATUS_TICK 0x40 +/* DA9063_REG_ALARM_MI (addr=0x47) */ #define DA9063_ALARM_MIN_MASK 0x3F -/* DA9063_REG_ALARM_H (addr=0x47) */ +/* DA9063_REG_ALARM_H (addr=0x48) */ #define DA9063_ALARM_HOUR_MASK 0x1F -/* DA9063_REG_ALARM_D (addr=0x48) */ +/* DA9063_REG_ALARM_D (addr=0x49) */ #define DA9063_ALARM_DAY_MASK 0x1F -/* DA9063_REG_ALARM_MO (addr=0x49) */ +/* DA9063_REG_ALARM_MO (addr=0x4A) */ #define DA9063_TICK_WAKE 0x20 #define DA9063_TICK_TYPE 0x10 #define DA9063_TICK_TYPE_SEC 0x00 #define DA9063_TICK_TYPE_MIN 0x10 #define DA9063_ALARM_MONTH_MASK 0x0F -/* DA9063_REG_ALARM_Y (addr=0x4A) */ +/* DA9063_REG_ALARM_Y (addr=0x4B) */ #define DA9063_TICK_ON 0x80 #define DA9063_ALARM_ON 0x40 #define DA9063_ALARM_YEAR_MASK 0x3F @@ -906,7 +910,7 @@ /* DA9063_REG_Bxxxx_CFG common bits (addr=0x9D-0xA2) */ #define DA9063_BUCK_FB_MASK 0x07 -#define DA9063_BUCK_PD_DIS_SHIFT 5 +#define DA9063_BUCK_PD_DIS_MASK 0x20 #define DA9063_BUCK_MODE_MASK 0xC0 #define DA9063_BUCK_MODE_MANUAL 0x00 #define DA9063_BUCK_MODE_SLEEP 0x40 -- cgit v1.2.3 From 037b60f2ca9403d02cbee60ad0a10f3806f9dc4b Mon Sep 17 00:00:00 2001 From: Matt Porter Date: Tue, 11 Mar 2014 19:33:54 -0400 Subject: mfd: Add bcm590xx pmu driver Add a driver for the BCM590xx PMU multi-function devices. The driver initially supports regmap initialization and instantiation of the voltage regulator device function of the PMU. Signed-off-by: Matt Porter Reviewed-by: Tim Kryger Reviewed-by: Markus Mayer Signed-off-by: Lee Jones --- include/linux/mfd/bcm590xx.h | 31 +++++++++++++++++++++++++++++++ 1 file changed, 31 insertions(+) create mode 100644 include/linux/mfd/bcm590xx.h (limited to 'include/linux') diff --git a/include/linux/mfd/bcm590xx.h b/include/linux/mfd/bcm590xx.h new file mode 100644 index 000000000000..434df2d4e587 --- /dev/null +++ b/include/linux/mfd/bcm590xx.h @@ -0,0 +1,31 @@ +/* + * Broadcom BCM590xx PMU + * + * Copyright 2014 Linaro Limited + * Author: Matt Porter + * + * This program is free software; you can redistribute it and/or modify it + * under the terms of the GNU General Public License as published by the + * Free Software Foundation; either version 2 of the License, or (at your + * option) any later version. + * + */ + +#ifndef __LINUX_MFD_BCM590XX_H +#define __LINUX_MFD_BCM590XX_H + +#include +#include +#include + +/* max register address */ +#define BCM590XX_MAX_REGISTER 0xe7 + +struct bcm590xx { + struct device *dev; + struct i2c_client *i2c_client; + struct regmap *regmap; + unsigned int id; +}; + +#endif /* __LINUX_MFD_BCM590XX_H */ -- cgit v1.2.3 From eb71d4dec4a5e010e34b9d7afdb5af41884c388e Mon Sep 17 00:00:00 2001 From: Peter Tyser Date: Mon, 10 Mar 2014 16:34:54 -0500 Subject: mfd: lpc_ich: Add support for iTCO v3 Some newer Atom CPUs, eg Avoton and Bay Trail, use slightly different register layouts for the iTCO than the current v1 and v2 iTCO. Differences from previous iTCO versions include: - The ACPI space is enabled in the "ACPI base address" register instead of the "ACPI control register" - The "no reboot" functionality is set in the "Power Management Configuration" register instead of the "General Control and Status" (GCS) register or PCI configuration space. - The "ACPI Control Register" is not present on v3. The "Power Management Configuration Base Address" register resides at the same address is Avoton/Bay Trail. To differentiate these newer chipsets create a new v3 iTCO version and update the MFD driver to support them. Signed-off-by: Peter Tyser Tested-by: Rajat Jain Reviewed-by: Guenter Roeck Signed-off-by: Lee Jones --- include/linux/mfd/lpc_ich.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mfd/lpc_ich.h b/include/linux/mfd/lpc_ich.h index b2364dd026a6..8feac782fa83 100644 --- a/include/linux/mfd/lpc_ich.h +++ b/include/linux/mfd/lpc_ich.h @@ -21,10 +21,10 @@ #define LPC_ICH_H /* Watchdog resources */ -#define ICH_RES_IO_TCO 0 -#define ICH_RES_IO_SMI 1 -#define ICH_RES_MEM_OFF 2 -#define ICH_RES_MEM_GCS 0 +#define ICH_RES_IO_TCO 0 +#define ICH_RES_IO_SMI 1 +#define ICH_RES_MEM_OFF 2 +#define ICH_RES_MEM_GCS_PMC 0 /* GPIO resources */ #define ICH_RES_GPIO 0 -- cgit v1.2.3 From 93ae4f978ca7f26d17df915ac7afc919c1dd0353 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 11 Mar 2014 02:04:14 +0530 Subject: CPU hotplug: Provide lockless versions of callback registration functions The following method of CPU hotplug callback registration is not safe due to the possibility of an ABBA deadlock involving the cpu_add_remove_lock and the cpu_hotplug.lock. get_online_cpus(); for_each_online_cpu(cpu) init_cpu(cpu); register_cpu_notifier(&foobar_cpu_notifier); put_online_cpus(); The deadlock is shown below: CPU 0 CPU 1 ----- ----- Acquire cpu_hotplug.lock [via get_online_cpus()] CPU online/offline operation takes cpu_add_remove_lock [via cpu_maps_update_begin()] Try to acquire cpu_add_remove_lock [via register_cpu_notifier()] CPU online/offline operation tries to acquire cpu_hotplug.lock [via cpu_hotplug_begin()] *** DEADLOCK! *** The problem here is that callback registration takes the locks in one order whereas the CPU hotplug operations take the same locks in the opposite order. To avoid this issue and to provide a race-free method to register CPU hotplug callbacks (along with initialization of already online CPUs), introduce new variants of the callback registration APIs that simply register the callbacks without holding the cpu_add_remove_lock during the registration. That way, we can avoid the ABBA scenario. However, we will need to hold the cpu_add_remove_lock throughout the entire critical section, to protect updates to the callback/notifier chain. This can be achieved by writing the callback registration code as follows: cpu_maps_update_begin(); [ or cpu_notifier_register_begin(); see below ] for_each_online_cpu(cpu) init_cpu(cpu); /* This doesn't take the cpu_add_remove_lock */ __register_cpu_notifier(&foobar_cpu_notifier); cpu_maps_update_done(); [ or cpu_notifier_register_done(); see below ] Note that we can't use get_online_cpus() here instead of cpu_maps_update_begin() because the cpu_hotplug.lock is dropped during the invocation of CPU_POST_DEAD notifiers, and hence get_online_cpus() cannot provide the necessary synchronization to protect the callback/notifier chains against concurrent reads and writes. On the other hand, since the cpu_add_remove_lock protects the entire hotplug operation (including CPU_POST_DEAD), we can use cpu_maps_update_begin/done() to guarantee proper synchronization. Also, since cpu_maps_update_begin/done() is like a super-set of get/put_online_cpus(), the former naturally protects the critical sections from concurrent hotplug operations. Since the names cpu_maps_update_begin/done() don't make much sense in CPU hotplug callback registration scenarios, we'll introduce new APIs named cpu_notifier_register_begin/done() and map them to cpu_maps_update_begin/done(). In summary, introduce the lockless variants of un/register_cpu_notifier() and also export the cpu_notifier_register_begin/done() APIs for use by modules. This way, we provide a race-free way to register hotplug callbacks as well as perform initialization for the CPUs that are already online. Cc: Thomas Gleixner Cc: Andrew Morton Cc: Peter Zijlstra Cc: Ingo Molnar Acked-by: Oleg Nesterov Acked-by: Toshi Kani Reviewed-by: Gautham R. Shenoy Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/cpu.h | 47 +++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 47 insertions(+) (limited to 'include/linux') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 03e235ad1bba..488d6ebcf6a1 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -122,26 +122,46 @@ enum { { .notifier_call = fn, .priority = pri }; \ register_cpu_notifier(&fn##_nb); \ } + +#define __cpu_notifier(fn, pri) { \ + static struct notifier_block fn##_nb = \ + { .notifier_call = fn, .priority = pri }; \ + __register_cpu_notifier(&fn##_nb); \ +} #else /* #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ #define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) #endif /* #else #if defined(CONFIG_HOTPLUG_CPU) || !defined(MODULE) */ + #ifdef CONFIG_HOTPLUG_CPU extern int register_cpu_notifier(struct notifier_block *nb); +extern int __register_cpu_notifier(struct notifier_block *nb); extern void unregister_cpu_notifier(struct notifier_block *nb); +extern void __unregister_cpu_notifier(struct notifier_block *nb); #else #ifndef MODULE extern int register_cpu_notifier(struct notifier_block *nb); +extern int __register_cpu_notifier(struct notifier_block *nb); #else static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } + +static inline int __register_cpu_notifier(struct notifier_block *nb) +{ + return 0; +} #endif static inline void unregister_cpu_notifier(struct notifier_block *nb) { } + +static inline void __unregister_cpu_notifier(struct notifier_block *nb) +{ +} #endif int cpu_up(unsigned int cpu); @@ -149,19 +169,32 @@ void notify_cpu_starting(unsigned int cpu); extern void cpu_maps_update_begin(void); extern void cpu_maps_update_done(void); +#define cpu_notifier_register_begin cpu_maps_update_begin +#define cpu_notifier_register_done cpu_maps_update_done + #else /* CONFIG_SMP */ #define cpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __cpu_notifier(fn, pri) do { (void)(fn); } while (0) static inline int register_cpu_notifier(struct notifier_block *nb) { return 0; } +static inline int __register_cpu_notifier(struct notifier_block *nb) +{ + return 0; +} + static inline void unregister_cpu_notifier(struct notifier_block *nb) { } +static inline void __unregister_cpu_notifier(struct notifier_block *nb) +{ +} + static inline void cpu_maps_update_begin(void) { } @@ -170,6 +203,14 @@ static inline void cpu_maps_update_done(void) { } +static inline void cpu_notifier_register_begin(void) +{ +} + +static inline void cpu_notifier_register_done(void) +{ +} + #endif /* CONFIG_SMP */ extern struct bus_type cpu_subsys; @@ -183,8 +224,11 @@ extern void put_online_cpus(void); extern void cpu_hotplug_disable(void); extern void cpu_hotplug_enable(void); #define hotcpu_notifier(fn, pri) cpu_notifier(fn, pri) +#define __hotcpu_notifier(fn, pri) __cpu_notifier(fn, pri) #define register_hotcpu_notifier(nb) register_cpu_notifier(nb) +#define __register_hotcpu_notifier(nb) __register_cpu_notifier(nb) #define unregister_hotcpu_notifier(nb) unregister_cpu_notifier(nb) +#define __unregister_hotcpu_notifier(nb) __unregister_cpu_notifier(nb) void clear_tasks_mm_cpumask(int cpu); int cpu_down(unsigned int cpu); @@ -197,9 +241,12 @@ static inline void cpu_hotplug_done(void) {} #define cpu_hotplug_disable() do { } while (0) #define cpu_hotplug_enable() do { } while (0) #define hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) +#define __hotcpu_notifier(fn, pri) do { (void)(fn); } while (0) /* These aren't inline functions due to a GCC bug. */ #define register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) +#define __register_hotcpu_notifier(nb) ({ (void)(nb); 0; }) #define unregister_hotcpu_notifier(nb) ({ (void)(nb); }) +#define __unregister_hotcpu_notifier(nb) ({ (void)(nb); }) #endif /* CONFIG_HOTPLUG_CPU */ #ifdef CONFIG_PM_SLEEP_SMP -- cgit v1.2.3 From f0bdb5e0c72b7347c867da539367138ad95c6b24 Mon Sep 17 00:00:00 2001 From: "Srivatsa S. Bhat" Date: Tue, 11 Mar 2014 02:04:39 +0530 Subject: CPU hotplug, perf: Fix CPU hotplug callback registration Subsystems that want to register CPU hotplug callbacks, as well as perform initialization for the CPUs that are already online, often do it as shown below: get_online_cpus(); for_each_online_cpu(cpu) init_cpu(cpu); register_cpu_notifier(&foobar_cpu_notifier); put_online_cpus(); This is wrong, since it is prone to ABBA deadlocks involving the cpu_add_remove_lock and the cpu_hotplug.lock (when running concurrently with CPU hotplug operations). Instead, the correct and race-free way of performing the callback registration is: cpu_notifier_register_begin(); for_each_online_cpu(cpu) init_cpu(cpu); /* Note the use of the double underscored version of the API */ __register_cpu_notifier(&foobar_cpu_notifier); cpu_notifier_register_done(); Fix the perf subsystem's hotplug notifier by using this latter form of callback registration. Also provide a bare-bones version of perf_cpu_notifier() that doesn't invoke the notifiers for the already online CPUs. This would be useful for subsystems that need to perform a different set of initialization for the already online CPUs, or don't need the initialization altogether. Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Arnaldo Carvalho de Melo Signed-off-by: Srivatsa S. Bhat Signed-off-by: Rafael J. Wysocki --- include/linux/perf_event.h | 16 +++++++++++++++- 1 file changed, 15 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/perf_event.h b/include/linux/perf_event.h index e56b07f5c9b6..3356abcfff18 100644 --- a/include/linux/perf_event.h +++ b/include/linux/perf_event.h @@ -835,6 +835,8 @@ do { \ { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ unsigned long cpu = smp_processor_id(); \ unsigned long flags; \ + \ + cpu_notifier_register_begin(); \ fn(&fn##_nb, (unsigned long)CPU_UP_PREPARE, \ (void *)(unsigned long)cpu); \ local_irq_save(flags); \ @@ -843,9 +845,21 @@ do { \ local_irq_restore(flags); \ fn(&fn##_nb, (unsigned long)CPU_ONLINE, \ (void *)(unsigned long)cpu); \ - register_cpu_notifier(&fn##_nb); \ + __register_cpu_notifier(&fn##_nb); \ + cpu_notifier_register_done(); \ } while (0) +/* + * Bare-bones version of perf_cpu_notifier(), which doesn't invoke the + * callback for already online CPUs. + */ +#define __perf_cpu_notifier(fn) \ +do { \ + static struct notifier_block fn##_nb = \ + { .notifier_call = fn, .priority = CPU_PRI_PERF }; \ + \ + __register_cpu_notifier(&fn##_nb); \ +} while (0) struct perf_pmu_events_attr { struct device_attribute attr; -- cgit v1.2.3 From 099dd235113700bbb476e572cd191ddb77b9af46 Mon Sep 17 00:00:00 2001 From: "Eric W. Biederman" Date: Fri, 28 Feb 2014 20:36:55 -0800 Subject: audit: Send replies in the proper network namespace. In perverse cases of file descriptor passing the current network namespace of a process and the network namespace of a socket used by that socket may differ. Therefore use the network namespace of the appropiate socket to ensure replies always go to the appropiate socket. Signed-off-by: "Eric W. Biederman" Acked-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/linux/audit.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index aa865a9a4c4f..ec1464df4c60 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -43,6 +43,7 @@ struct mq_attr; struct mqstat; struct audit_watch; struct audit_tree; +struct sk_buff; struct audit_krule { int vers_ops; @@ -463,7 +464,7 @@ extern int audit_filter_user(int type); extern int audit_filter_type(int type); extern int audit_rule_change(int type, __u32 portid, int seq, void *data, size_t datasz); -extern int audit_list_rules_send(__u32 portid, int seq); +extern int audit_list_rules_send(struct sk_buff *request_skb, int seq); extern u32 audit_enabled; #else /* CONFIG_AUDIT */ -- cgit v1.2.3 From 4b58841149dcaa500ceba1d5378ae70622fe4899 Mon Sep 17 00:00:00 2001 From: AKASHI Takahiro Date: Sat, 15 Mar 2014 14:48:00 +0900 Subject: audit: Add generic compat syscall support lib/audit.c provides a generic function for auditing system calls. This patch extends it for compat syscall support on bi-architectures (32/64-bit) by adding lib/compat_audit.c. What is required to support this feature are: * add asm/unistd32.h for compat system call names * select CONFIG_AUDIT_ARCH_COMPAT_GENERIC Signed-off-by: AKASHI Takahiro Acked-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/linux/audit.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index ec1464df4c60..4b2983e25ce0 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -79,6 +79,14 @@ extern int is_audit_feature_set(int which); extern int __init audit_register_class(int class, unsigned *list); extern int audit_classify_syscall(int abi, unsigned syscall); extern int audit_classify_arch(int arch); +/* only for compat system calls */ +extern unsigned compat_write_class[]; +extern unsigned compat_read_class[]; +extern unsigned compat_dir_class[]; +extern unsigned compat_chattr_class[]; +extern unsigned compat_signal_class[]; + +extern int __weak audit_classify_compat_syscall(int abi, unsigned syscall); /* audit_names->type values */ #define AUDIT_TYPE_UNKNOWN 0 /* we don't know yet */ -- cgit v1.2.3 From ad36d28293936b03d6b7996e9d6aadfd73c0eb08 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Thu, 15 Aug 2013 18:05:12 -0400 Subject: pid: get pid_t ppid of task in init_pid_ns Added the functions task_ppid_nr_ns() and task_ppid_nr() to abstract the lookup of the PPID (real_parent's pid_t) of a process, including rcu locking, in the arbitrary and init_pid_ns. This provides an alternative to sys_getppid(), which is relative to the child process' pid namespace. (informed by ebiederman's 6c621b7e) Cc: stable@vger.kernel.org Cc: Eric W. Biederman Signed-off-by: Richard Guy Briggs --- include/linux/sched.h | 20 +++++++++++++++++++- 1 file changed, 19 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 53f97eb8dbc7..116e301fb13f 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1561,6 +1561,24 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk) } +static int pid_alive(const struct task_struct *p); +static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) +{ + pid_t pid = 0; + + rcu_read_lock(); + if (pid_alive(tsk)) + pid = task_tgid_nr_ns(rcu_dereference(tsk->real_parent), ns); + rcu_read_unlock(); + + return pid; +} + +static inline pid_t task_ppid_nr(const struct task_struct *tsk) +{ + return task_ppid_nr_ns(tsk, &init_pid_ns); +} + static inline pid_t task_pgrp_nr_ns(struct task_struct *tsk, struct pid_namespace *ns) { @@ -1600,7 +1618,7 @@ static inline pid_t task_pgrp_nr(struct task_struct *tsk) * * Return: 1 if the process is alive. 0 otherwise. */ -static inline int pid_alive(struct task_struct *p) +static inline int pid_alive(const struct task_struct *p) { return p->pids[PIDTYPE_PID].pid != NULL; } -- cgit v1.2.3 From 80e0b6e8a001361316a2d62b748fe677ec46b860 Mon Sep 17 00:00:00 2001 From: Richard Guy Briggs Date: Sun, 16 Mar 2014 14:00:19 -0400 Subject: sched: declare pid_alive as inline We accidentally declared pid_alive without any extern/inline connotation. Some platforms were fine with this, some like ia64 and mips were very angry. If the function is inline, the prototype should be inline! on ia64: include/linux/sched.h:1718: warning: 'pid_alive' declared inline after being called Signed-off-by: Richard Guy Briggs Signed-off-by: Eric Paris --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 116e301fb13f..0f72548732f1 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1561,7 +1561,7 @@ static inline pid_t task_tgid_vnr(struct task_struct *tsk) } -static int pid_alive(const struct task_struct *p); +static inline int pid_alive(const struct task_struct *p); static inline pid_t task_ppid_nr_ns(const struct task_struct *tsk, struct pid_namespace *ns) { pid_t pid = 0; -- cgit v1.2.3 From 58f86cc89c3372d3e61d5b71e5513ec5a0b02848 Mon Sep 17 00:00:00 2001 From: Rusty Russell Date: Mon, 24 Mar 2014 12:00:34 +1030 Subject: VERIFY_OCTAL_PERMISSIONS: stricter checking for sysfs perms. Summary of http://lkml.org/lkml/2014/3/14/363 : Ted: module_param(queue_depth, int, 444) Joe: 0444! Rusty: User perms >= group perms >= other perms? Joe: CLASS_ATTR, DEVICE_ATTR, SENSOR_ATTR and SENSOR_ATTR_2? Side effect of stricter permissions means removing the unnecessary S_IFREG from several callers. Note that the BUILD_BUG_ON_ZERO((perm) & 2) test was removed: a fair number of drivers fail this test, so that will be the debate for a future patch. Suggested-by: Joe Perches Acked-by: Bjorn Helgaas for drivers/pci/slot.c Acked-by: Greg Kroah-Hartman Cc: Miklos Szeredi Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Rusty Russell --- include/linux/kernel.h | 8 ++++++++ include/linux/moduleparam.h | 8 +++----- include/linux/sysfs.h | 3 ++- 3 files changed, 13 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 471090093c67..4679eddc110a 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -842,4 +842,12 @@ static inline void ftrace_dump(enum ftrace_dump_mode oops_dump_mode) { } # define REBUILD_DUE_TO_FTRACE_MCOUNT_RECORD #endif +/* Permissions on a sysfs file: you didn't miss the 0 prefix did you? */ +#define VERIFY_OCTAL_PERMISSIONS(perms) \ + (BUILD_BUG_ON_ZERO((perms) < 0) + \ + BUILD_BUG_ON_ZERO((perms) > 0777) + \ + /* User perms >= group perms >= other perms */ \ + BUILD_BUG_ON_ZERO(((perms) >> 6) < (((perms) >> 3) & 7)) + \ + BUILD_BUG_ON_ZERO((((perms) >> 3) & 7) < ((perms) & 7)) + \ + (perms)) #endif diff --git a/include/linux/moduleparam.h b/include/linux/moduleparam.h index 175f6995d1af..204a67743804 100644 --- a/include/linux/moduleparam.h +++ b/include/linux/moduleparam.h @@ -186,14 +186,12 @@ struct kparam_array parameters. */ #define __module_param_call(prefix, name, ops, arg, perm, level) \ /* Default value instead of permissions? */ \ - static int __param_perm_check_##name __attribute__((unused)) = \ - BUILD_BUG_ON_ZERO((perm) < 0 || (perm) > 0777 || ((perm) & 2)) \ - + BUILD_BUG_ON_ZERO(sizeof(""prefix) > MAX_PARAM_PREFIX_LEN); \ - static const char __param_str_##name[] = prefix #name; \ + static const char __param_str_##name[] = prefix #name; \ static struct kernel_param __moduleparam_const __param_##name \ __used \ __attribute__ ((unused,__section__ ("__param"),aligned(sizeof(void *)))) \ - = { __param_str_##name, ops, perm, level, { arg } } + = { __param_str_##name, ops, VERIFY_OCTAL_PERMISSIONS(perm), \ + level, { arg } } /* Obsolete - use module_param_cb() */ #define module_param_call(name, set, get, arg, perm) \ diff --git a/include/linux/sysfs.h b/include/linux/sysfs.h index 30b2ebee6439..f517e6e488c8 100644 --- a/include/linux/sysfs.h +++ b/include/linux/sysfs.h @@ -71,7 +71,8 @@ struct attribute_group { */ #define __ATTR(_name, _mode, _show, _store) { \ - .attr = {.name = __stringify(_name), .mode = _mode }, \ + .attr = {.name = __stringify(_name), \ + .mode = VERIFY_OCTAL_PERMISSIONS(_mode) }, \ .show = _show, \ .store = _store, \ } -- cgit v1.2.3 From 5a92e700af2e5e0e6404988d6a7f2ed3dad3f46f Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 21 Feb 2014 14:13:44 -0700 Subject: NVMe: RCU protected access to io queues This adds rcu protected access to nvme_queue to fix a race between a surprise removal freeing the queue and a thread with open reference on a NVMe block device using that queue. The queues do not need to be rcu protected during the initialization or shutdown parts, so I've added a helper function for raw deferencing to get around the sparse errors. There is still a hole in the IOCTL path for the same problem, which is fixed in a subsequent patch. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 69ae03f6eb15..98d367b06f9c 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -73,7 +73,7 @@ enum { */ struct nvme_dev { struct list_head node; - struct nvme_queue **queues; + struct nvme_queue __rcu **queues; u32 __iomem *dbs; struct pci_dev *pci_dev; struct dma_pool *prp_page_pool; -- cgit v1.2.3 From 4f5099af4f3d5f999d8ab7784472d93e810e3912 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 3 Mar 2014 16:39:13 -0700 Subject: NVMe: IOCTL path RCU protect queue access This adds rcu protected access to a queue in the nvme IOCTL path to fix potential races between a surprise removal and queue usage in nvme_submit_sync_cmd. The fix holds the rcu_read_lock() here to prevent the nvme_queue from freeing while this path is executing so it can't sleep, and so this path will no longer wait for a available command id should they all be in use at the time a passthrough IOCTL request is received. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 98d367b06f9c..7c3f85bc10f1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -151,10 +151,7 @@ struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, unsigned long addr, unsigned length); void nvme_unmap_user_pages(struct nvme_dev *dev, int write, struct nvme_iod *iod); -struct nvme_queue *get_nvmeq(struct nvme_dev *dev); -void put_nvmeq(struct nvme_queue *nvmeq); -int nvme_submit_sync_cmd(struct nvme_queue *nvmeq, struct nvme_command *cmd, - u32 *result, unsigned timeout); +int nvme_submit_io_cmd(struct nvme_dev *, struct nvme_command *, u32 *); int nvme_submit_flush_data(struct nvme_queue *nvmeq, struct nvme_ns *ns); int nvme_submit_admin_cmd(struct nvme_dev *, struct nvme_command *, u32 *result); -- cgit v1.2.3 From 356750e35e86485c464704c0a32c1d8dc77590d7 Mon Sep 17 00:00:00 2001 From: Eric Paris Date: Mon, 24 Mar 2014 12:13:48 -0400 Subject: audit: define audit_is_compat in kernel internal header We were exposing a function based on kernel config options to userspace. This is wrong. Move it to the audit internal header. Suggested-by: Chris Metcalf Signed-off-by: Eric Paris --- include/linux/audit.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 4b2983e25ce0..611a59a56f1a 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -102,6 +102,12 @@ struct filename; extern void audit_log_session_info(struct audit_buffer *ab); +#ifdef CONFIG_COMPAT +#define audit_is_compat(arch) (!((arch) & __AUDIT_ARCH_64BIT)) +#else +#define audit_is_compat(arch) false +#endif + #ifdef CONFIG_AUDITSYSCALL /* These are defined in auditsc.c */ /* Public API */ -- cgit v1.2.3 From 3f4eb14bdbe148fcc3a8e02f506ccc9b8c955ad4 Mon Sep 17 00:00:00 2001 From: Pekon Gupta Date: Thu, 20 Mar 2014 18:48:34 +0530 Subject: mtd: devices: elm: check for hardware engine's design constraints ELM hardware engine is used by BCH ecc-schemes for detecting and locating ECC errors. This patch adds the following checks for ELM hardware engine: - ELM internal buffers are of 1K, so it cannot process data with ecc-step-size > 1K. - ELM engine can execute upto maximum of 8 threads in parallel, so in *page-mode* (when complete page is processed in single iteration), ELM cannot support ecc-steps > 8. Signed-off-by: Pekon Gupta Reviewed-by: Ezequiel Garcia Signed-off-by: Brian Norris --- include/linux/platform_data/elm.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h index bf0a83b7ed9d..6e37156b0902 100644 --- a/include/linux/platform_data/elm.h +++ b/include/linux/platform_data/elm.h @@ -50,5 +50,6 @@ struct elm_errorvec { void elm_decode_bch_error_page(struct device *dev, u8 *ecc_calc, struct elm_errorvec *err_vec); -int elm_config(struct device *dev, enum bch_ecc bch_type); +int elm_config(struct device *dev, enum bch_ecc bch_type, + int ecc_steps, int ecc_step_size, int ecc_syndrome_size); #endif /* __ELM_H */ -- cgit v1.2.3 From ea0760244d235688b5fae4e5cdd9412c1fb1c2fe Mon Sep 17 00:00:00 2001 From: Pekon Gupta Date: Thu, 20 Mar 2014 18:48:35 +0530 Subject: mtd: devices: elm: clean elm_load_syndrome This patch refactors elm_load_syndrome() to make it scalable for newer ECC schemes by removing scheme specific macros (like ECC_BYTES*xx), and instead using ECC control information passed during elm_config. Signed-off-by: Pekon Gupta Reviewed-by: Ezequiel Garcia Signed-off-by: Brian Norris --- include/linux/platform_data/elm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/platform_data/elm.h b/include/linux/platform_data/elm.h index 6e37156b0902..4edb40676b3f 100644 --- a/include/linux/platform_data/elm.h +++ b/include/linux/platform_data/elm.h @@ -26,13 +26,6 @@ enum bch_ecc { /* ELM support 8 error syndrome process */ #define ERROR_VECTOR_MAX 8 -#define BCH8_ECC_OOB_BYTES 13 -#define BCH4_ECC_OOB_BYTES 7 -/* RBL requires 14 byte even though BCH8 uses only 13 byte */ -#define BCH8_SIZE (BCH8_ECC_OOB_BYTES + 1) -/* Uses 1 extra byte to handle erased pages */ -#define BCH4_SIZE (BCH4_ECC_OOB_BYTES + 1) - /** * struct elm_errorvec - error vector for elm * @error_reported: set true for vectors error is reported -- cgit v1.2.3 From e34fcb07a6d57411de6e15a47724fbe92c5caa42 Mon Sep 17 00:00:00 2001 From: David Mosberger Date: Fri, 21 Mar 2014 16:05:10 -0600 Subject: mtd: nand: fix GET/SET_FEATURES address on 16-bit devices GET_FEATURES and SET_FEATURES also need byte-addressing on 16-bit devices. Signed-off-by: David Mosberger Signed-off-by: Brian Norris --- include/linux/mtd/nand.h | 11 ++++++++++- 1 file changed, 10 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mtd/nand.h b/include/linux/mtd/nand.h index 0747fef2bfc6..450d61ec7f06 100644 --- a/include/linux/mtd/nand.h +++ b/include/linux/mtd/nand.h @@ -925,7 +925,16 @@ static inline bool nand_is_slc(struct nand_chip *chip) */ static inline int nand_opcode_8bits(unsigned int command) { - return command == NAND_CMD_READID || command == NAND_CMD_PARAM; + switch (command) { + case NAND_CMD_READID: + case NAND_CMD_PARAM: + case NAND_CMD_GET_FEATURES: + case NAND_CMD_SET_FEATURES: + return 1; + default: + break; + } + return 0; } /* return the supported JEDEC features. */ -- cgit v1.2.3 From e43a34e3ec5d1b14a11c3220f5a12aa797d73cd1 Mon Sep 17 00:00:00 2001 From: Kuninori Morimoto Date: Mon, 10 Mar 2014 18:11:50 -0700 Subject: shdma: add R-Car Audio DMAC peri peri driver Add support Audio DMAC peri peri driver for Renesas R-Car Gen2 SoC, using 'shdma-base' DMA driver framework. Signed-off-by: Kuninori Morimoto [fixed checkpatch error] Signed-off-by: Vinod Koul --- include/linux/platform_data/dma-rcar-audmapp.h | 34 ++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) create mode 100644 include/linux/platform_data/dma-rcar-audmapp.h (limited to 'include/linux') diff --git a/include/linux/platform_data/dma-rcar-audmapp.h b/include/linux/platform_data/dma-rcar-audmapp.h new file mode 100644 index 000000000000..471fffebbeb4 --- /dev/null +++ b/include/linux/platform_data/dma-rcar-audmapp.h @@ -0,0 +1,34 @@ +/* + * This is for Renesas R-Car Audio-DMAC-peri-peri. + * + * Copyright (C) 2014 Renesas Electronics Corporation + * Copyright (C) 2014 Kuninori Morimoto + * + * This file is based on the include/linux/sh_dma.h + * + * Header for the new SH dmaengine driver + * + * Copyright (C) 2010 Guennadi Liakhovetski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ +#ifndef SH_AUDMAPP_H +#define SH_AUDMAPP_H + +#include + +struct audmapp_slave_config { + int slave_id; + dma_addr_t src; + dma_addr_t dst; + u32 chcr; +}; + +struct audmapp_pdata { + struct audmapp_slave_config *slave; + int slave_num; +}; + +#endif /* SH_AUDMAPP_H */ -- cgit v1.2.3 From 83ddfebdd21da669918d7f9854fd592858625f4b Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 24 Mar 2014 11:58:59 +0800 Subject: SUNRPC: New helper for creating client with rpc_xprt Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/clnt.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/clnt.h b/include/linux/sunrpc/clnt.h index 8af2804bab16..70736b98c721 100644 --- a/include/linux/sunrpc/clnt.h +++ b/include/linux/sunrpc/clnt.h @@ -130,6 +130,8 @@ struct rpc_create_args { #define RPC_CLNT_CREATE_NO_RETRANS_TIMEOUT (1UL << 9) struct rpc_clnt *rpc_create(struct rpc_create_args *args); +struct rpc_clnt *rpc_create_xprt(struct rpc_create_args *args, + struct rpc_xprt *xprt); struct rpc_clnt *rpc_bind_new_program(struct rpc_clnt *, const struct rpc_program *, u32); void rpc_task_reset_client(struct rpc_task *task, struct rpc_clnt *clnt); -- cgit v1.2.3 From d531c008d7d9713456abe3d265fc577bba2e1cef Mon Sep 17 00:00:00 2001 From: Kinglong Mee Date: Mon, 24 Mar 2014 11:59:46 +0800 Subject: NFSD/SUNRPC: Check rpc_xprt out of xs_setup_bc_tcp Besides checking rpc_xprt out of xs_setup_bc_tcp, increase it's reference (it's important). Signed-off-by: Kinglong Mee Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/xprt.h | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/xprt.h b/include/linux/sunrpc/xprt.h index 8097b9df6773..3e5efb2b236e 100644 --- a/include/linux/sunrpc/xprt.h +++ b/include/linux/sunrpc/xprt.h @@ -295,13 +295,24 @@ int xprt_adjust_timeout(struct rpc_rqst *req); void xprt_release_xprt(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release_xprt_cong(struct rpc_xprt *xprt, struct rpc_task *task); void xprt_release(struct rpc_task *task); -struct rpc_xprt * xprt_get(struct rpc_xprt *xprt); void xprt_put(struct rpc_xprt *xprt); struct rpc_xprt * xprt_alloc(struct net *net, size_t size, unsigned int num_prealloc, unsigned int max_req); void xprt_free(struct rpc_xprt *); +/** + * xprt_get - return a reference to an RPC transport. + * @xprt: pointer to the transport + * + */ +static inline struct rpc_xprt *xprt_get(struct rpc_xprt *xprt) +{ + if (atomic_inc_not_zero(&xprt->count)) + return xprt; + return NULL; +} + static inline __be32 *xprt_skip_transport_header(struct rpc_xprt *xprt, __be32 *p) { return p + xprt->tsh_size; -- cgit v1.2.3 From 3064639423c48d6e0eb9ecc27c512a58e38c6c57 Mon Sep 17 00:00:00 2001 From: Stanislav Kinsbursky Date: Wed, 26 Feb 2014 16:50:01 +0300 Subject: nfsd: check passed socket's net matches NFSd superblock's one There could be a case, when NFSd file system is mounted in network, different to socket's one, like below: "ip netns exec" creates new network and mount namespace, which duplicates NFSd mount point, created in init_net context. And thus NFS server stop in nested network context leads to RPCBIND client destruction in init_net. Then, on NFSd start in nested network context, rpc.nfsd process creates socket in nested net and passes it into "write_ports", which leads to RPCBIND sockets creation in init_net context because of the same reason (NFSd monut point was created in init_net context). An attempt to register passed socket in nested net leads to panic, because no RPCBIND client present in nexted network namespace. This patch add check that passed socket's net matches NFSd superblock's one. And returns -EINVAL error to user psace otherwise. v2: Put socket on exit. Reported-by: Weng Meiling Signed-off-by: Stanislav Kinsbursky Cc: stable@vger.kernel.org Signed-off-by: J. Bruce Fields --- include/linux/sunrpc/svcsock.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 62fd1b756e99..947009ed5996 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -56,6 +56,7 @@ int svc_recv(struct svc_rqst *, long); int svc_send(struct svc_rqst *); void svc_drop(struct svc_rqst *); void svc_sock_update_bufs(struct svc_serv *serv); +bool svc_alien_sock(struct net *net, int fd); int svc_addsock(struct svc_serv *serv, const int fd, char *name_return, const size_t len); void svc_init_xprt_sock(void); -- cgit v1.2.3 From 24f870d8f0adcd38639f2f66e37aa7591a3fc408 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 12 Mar 2014 17:06:19 +0900 Subject: slab: fix wrongly used macro commit 'slab: restrict the number of objects in a slab' uses __builtin_constant_p() on #if macro. It is wrong usage of builtin function, but it is compiled on x86 without any problem, so I can't find it before 0 day build system find it. This commit fixes the situation by using KMALLOC_MIN_SIZE, instead of KMALLOC_SHIFT_LOW. KMALLOC_SHIFT_LOW is parsed to ilog2() on some architecture and this ilog2() uses __builtin_constant_p() and results in the problem. This problem would disappear by using KMALLOC_MIN_SIZE, since it is just constant. Tested-by: David Rientjes Signed-off-by: Joonsoo Kim Signed-off-by: Pekka Enberg --- include/linux/slab.h | 22 +++++++++++----------- 1 file changed, 11 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/slab.h b/include/linux/slab.h index d015dec02bf3..5df89f777a54 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -201,17 +201,6 @@ struct kmem_cache { #ifndef KMALLOC_SHIFT_LOW #define KMALLOC_SHIFT_LOW 5 #endif - -/* - * This restriction comes from byte sized index implementation. - * Page size is normally 2^12 bytes and, in this case, if we want to use - * byte sized index which can represent 2^8 entries, the size of the object - * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. - * If minimum size of kmalloc is less than 16, we use it as minimum object - * size and give up to use byte sized index. - */ -#define SLAB_OBJ_MIN_SIZE (KMALLOC_SHIFT_LOW < 4 ? \ - (1 << KMALLOC_SHIFT_LOW) : 16) #endif #ifdef CONFIG_SLUB @@ -253,6 +242,17 @@ struct kmem_cache { #define KMALLOC_MIN_SIZE (1 << KMALLOC_SHIFT_LOW) #endif +/* + * This restriction comes from byte sized index implementation. + * Page size is normally 2^12 bytes and, in this case, if we want to use + * byte sized index which can represent 2^8 entries, the size of the object + * should be equal or greater to 2^12 / 2^8 = 2^4 = 16. + * If minimum size of kmalloc is less than 16, we use it as minimum object + * size and give up to use byte sized index. + */ +#define SLAB_OBJ_MIN_SIZE (KMALLOC_MIN_SIZE < 16 ? \ + (KMALLOC_MIN_SIZE) : 16) + #ifndef CONFIG_SLOB extern struct kmem_cache *kmalloc_caches[KMALLOC_SHIFT_HIGH + 1]; #ifdef CONFIG_ZONE_DMA -- cgit v1.2.3 From f2ebb3a921c1ca1e2ddd9242e95a1989a50c4c68 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 27 Feb 2014 09:35:45 -0500 Subject: smarter propagate_mnt() The current mainline has copies propagated to *all* nodes, then tears down the copies we made for nodes that do not contain counterparts of the desired mountpoint. That sets the right propagation graph for the copies (at teardown time we move the slaves of removed node to a surviving peer or directly to master), but we end up paying a fairly steep price in useless allocations. It's fairly easy to create a situation where N calls of mount(2) create exactly N bindings, with O(N^2) vfsmounts allocated and freed in process. Fortunately, it is possible to avoid those allocations/freeings. The trick is to create copies in the right order and find which one would've eventually become a master with the current algorithm. It turns out to be possible in O(nodes getting propagation) time and with no extra allocations at all. One part is that we need to make sure that eventual master will be created before its slaves, so we need to walk the propagation tree in a different order - by peer groups. And iterate through the peers before dealing with the next group. Another thing is finding the (earlier) copy that will be a master of one we are about to create; to do that we are (temporary) marking the masters of mountpoints we are attaching the copies to. Either we are in a peer of the last mountpoint we'd dealt with, or we have the following situation: we are attaching to mountpoint M, the last copy S_0 had been attached to M_0 and there are sequences S_0...S_n, M_0...M_n such that S_{i+1} is a master of S_{i}, S_{i} mounted on M{i} and we need to create a slave of the first S_{k} such that M is getting propagation from M_{k}. It means that the master of M_{k} will be among the sequence of masters of M. On the other hand, the nearest marked node in that sequence will either be the master of M_{k} or the master of M_{k-1} (the latter - in the case if M_{k-1} is a slave of something M gets propagation from, but in a wrong peer group). So we go through the sequence of masters of M until we find a marked one (P). Let N be the one before it. Then we go through the sequence of masters of S_0 until we find one (say, S) mounted on a node D that has P as master and check if D is a peer of N. If it is, S will be the master of new copy, if not - the master of S will be. That's it for the hard part; the rest is fairly simple. Iterator is in next_group(), handling of one prospective mountpoint is propagate_one(). It seems to survive all tests and gives a noticably better performance than the current mainline for setups that are seriously using shared subtrees. Cc: stable@vger.kernel.org Signed-off-by: Al Viro --- include/linux/mount.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mount.h b/include/linux/mount.h index 371d346fa270..839bac270904 100644 --- a/include/linux/mount.h +++ b/include/linux/mount.h @@ -44,6 +44,8 @@ struct mnt_namespace; #define MNT_SHARED_MASK (MNT_UNBINDABLE) #define MNT_PROPAGATION_MASK (MNT_SHARED | MNT_UNBINDABLE) +#define MNT_INTERNAL_FLAGS (MNT_SHARED | MNT_WRITE_HOLD | MNT_INTERNAL | \ + MNT_DOOMED | MNT_SYNC_UMOUNT | MNT_MARKED) #define MNT_INTERNAL 0x4000 @@ -51,6 +53,7 @@ struct mnt_namespace; #define MNT_LOCKED 0x800000 #define MNT_DOOMED 0x1000000 #define MNT_SYNC_UMOUNT 0x2000000 +#define MNT_MARKED 0x4000000 struct vfsmount { struct dentry *mnt_root; /* root of the mounted tree */ -- cgit v1.2.3 From e25115786ee540fc428a14872ebd4f56252aba32 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Wed, 5 Mar 2014 20:41:36 -0500 Subject: switch nbd to sockfd_lookup/sockfd_put Signed-off-by: Al Viro --- include/linux/nbd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nbd.h b/include/linux/nbd.h index ae4981ebd18e..f62f78aef4ac 100644 --- a/include/linux/nbd.h +++ b/include/linux/nbd.h @@ -24,8 +24,7 @@ struct request; struct nbd_device { int flags; int harderror; /* Code of hard error */ - struct socket * sock; - struct file * file; /* If == NULL, device is not ready, yet */ + struct socket * sock; /* If == NULL, device is not ready, yet */ int magic; spinlock_t queue_lock; -- cgit v1.2.3 From 4597e695b8baa3e2620da89c7593be70cf20566b Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 10:06:32 -0400 Subject: get rid of DEBUG_WRITECOUNT it only makes control flow in __fput() and friends more convoluted. Signed-off-by: Al Viro --- include/linux/fs.h | 49 ------------------------------------------------- 1 file changed, 49 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 23b2a35d712e..e80659ed78fc 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -769,9 +769,6 @@ static inline int ra_has_index(struct file_ra_state *ra, pgoff_t index) index < ra->start + ra->size); } -#define FILE_MNT_WRITE_TAKEN 1 -#define FILE_MNT_WRITE_RELEASED 2 - struct file { union { struct llist_node fu_llist; @@ -809,9 +806,6 @@ struct file { struct list_head f_tfile_llink; #endif /* #ifdef CONFIG_EPOLL */ struct address_space *f_mapping; -#ifdef CONFIG_DEBUG_WRITECOUNT - unsigned long f_mnt_write_state; -#endif } __attribute__((aligned(4))); /* lest something weird decides that 2 is OK */ struct file_handle { @@ -829,49 +823,6 @@ static inline struct file *get_file(struct file *f) #define fput_atomic(x) atomic_long_add_unless(&(x)->f_count, -1, 1) #define file_count(x) atomic_long_read(&(x)->f_count) -#ifdef CONFIG_DEBUG_WRITECOUNT -static inline void file_take_write(struct file *f) -{ - WARN_ON(f->f_mnt_write_state != 0); - f->f_mnt_write_state = FILE_MNT_WRITE_TAKEN; -} -static inline void file_release_write(struct file *f) -{ - f->f_mnt_write_state |= FILE_MNT_WRITE_RELEASED; -} -static inline void file_reset_write(struct file *f) -{ - f->f_mnt_write_state = 0; -} -static inline void file_check_state(struct file *f) -{ - /* - * At this point, either both or neither of these bits - * should be set. - */ - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN); - WARN_ON(f->f_mnt_write_state == FILE_MNT_WRITE_RELEASED); -} -static inline int file_check_writeable(struct file *f) -{ - if (f->f_mnt_write_state == FILE_MNT_WRITE_TAKEN) - return 0; - printk(KERN_WARNING "writeable file with no " - "mnt_want_write()\n"); - WARN_ON(1); - return -EINVAL; -} -#else /* !CONFIG_DEBUG_WRITECOUNT */ -static inline void file_take_write(struct file *filp) {} -static inline void file_release_write(struct file *filp) {} -static inline void file_reset_write(struct file *filp) {} -static inline void file_check_state(struct file *filp) {} -static inline int file_check_writeable(struct file *filp) -{ - return 0; -} -#endif /* CONFIG_DEBUG_WRITECOUNT */ - #define MAX_NON_LFS ((1UL<<31) - 1) /* Page cache limit. The filesystems should put that into their s_maxbytes -- cgit v1.2.3 From 83f936c75e3689a63253d89c47a4d239c56d7410 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 12:02:47 -0400 Subject: mark struct file that had write access grabbed by open() new flag in ->f_mode - FMODE_WRITER. Set by do_dentry_open() in case when it has grabbed write access, checked by __fput() to decide whether it wants to drop the sucker. Allows to stop bothering with mnt_clone_write() in alloc_file(), along with fewer special_file() checks. Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e80659ed78fc..d9d88a0b456e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -125,6 +125,8 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, /* File needs atomic accesses to f_pos */ #define FMODE_ATOMIC_POS ((__force fmode_t)0x8000) +/* Write access to underlying fs */ +#define FMODE_WRITER ((__force fmode_t)0x10000) /* File was opened by fanotify and shouldn't generate fanotify events */ #define FMODE_NONOTIFY ((__force fmode_t)0x1000000) -- cgit v1.2.3 From 7f4b36f9bb930b3b2105a9a2cb0121fa7028c432 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 12:45:29 -0400 Subject: get rid of files_defer_init() the only thing it's doing these days is calculation of upper limit for fs.nr_open sysctl and that can be done statically Signed-off-by: Al Viro --- include/linux/fdtable.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fdtable.h b/include/linux/fdtable.h index 70e8e21c0a30..230f87bdf5ad 100644 --- a/include/linux/fdtable.h +++ b/include/linux/fdtable.h @@ -63,8 +63,6 @@ struct file_operations; struct vfsmount; struct dentry; -extern void __init files_defer_init(void); - #define rcu_dereference_check_fdtable(files, fdtfd) \ rcu_dereference_check((fdtfd), lockdep_is_held(&(files)->file_lock)) -- cgit v1.2.3 From 5d826c847b34de6415b4f1becd88a57ff619af50 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 14 Mar 2014 13:42:45 -0400 Subject: new helper: readlink_copy() Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index d9d88a0b456e..db181b542db1 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2517,7 +2517,7 @@ extern const struct file_operations generic_ro_fops; #define special_file(m) (S_ISCHR(m)||S_ISBLK(m)||S_ISFIFO(m)||S_ISSOCK(m)) -extern int vfs_readlink(struct dentry *, char __user *, int, const char *); +extern int readlink_copy(char __user *, int, const char *); extern int page_readlink(struct dentry *, char __user *, int); extern void *page_follow_link_light(struct dentry *, struct nameidata *); extern void page_put_link(struct dentry *, struct nameidata *, void *); -- cgit v1.2.3 From fbb32750a62df75d1ffea547f3908b21c5496d9f Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Feb 2014 21:09:54 -0500 Subject: pipe: kill ->map() and ->unmap() all pipe_buffer_operations have the same instances of those... Signed-off-by: Al Viro --- include/linux/pipe_fs_i.h | 19 ------------------- 1 file changed, 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pipe_fs_i.h b/include/linux/pipe_fs_i.h index ab5752692113..6dffcebe6105 100644 --- a/include/linux/pipe_fs_i.h +++ b/include/linux/pipe_fs_i.h @@ -82,23 +82,6 @@ struct pipe_buf_operations { */ int can_merge; - /* - * ->map() returns a virtual address mapping of the pipe buffer. - * The last integer flag reflects whether this should be an atomic - * mapping or not. The atomic map is faster, however you can't take - * page faults before calling ->unmap() again. So if you need to eg - * access user data through copy_to/from_user(), then you must get - * a non-atomic map. ->map() uses the kmap_atomic slot for - * atomic maps, you have to be careful if mapping another page as - * source or destination for a copy. - */ - void * (*map)(struct pipe_inode_info *, struct pipe_buffer *, int); - - /* - * Undoes ->map(), finishes the virtual mapping of the pipe buffer. - */ - void (*unmap)(struct pipe_inode_info *, struct pipe_buffer *, void *); - /* * ->confirm() verifies that the data in the pipe buffer is there * and that the contents are good. If the pages in the pipe belong @@ -150,8 +133,6 @@ struct pipe_inode_info *alloc_pipe_info(void); void free_pipe_info(struct pipe_inode_info *); /* Generic pipe buffer ops functions */ -void *generic_pipe_buf_map(struct pipe_inode_info *, struct pipe_buffer *, int); -void generic_pipe_buf_unmap(struct pipe_inode_info *, struct pipe_buffer *, void *); void generic_pipe_buf_get(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_confirm(struct pipe_inode_info *, struct pipe_buffer *); int generic_pipe_buf_steal(struct pipe_inode_info *, struct pipe_buffer *); -- cgit v1.2.3 From c186afb4dbd0050a537b96c7fbee2dba3b57fc38 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 2 Feb 2014 21:16:54 -0500 Subject: switch ->is_partially_uptodate() to saner arguments Signed-off-by: Al Viro --- include/linux/buffer_head.h | 4 ++-- include/linux/fs.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d77797a52b7b..c40302f909ce 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -210,8 +210,8 @@ int block_write_full_page(struct page *page, get_block_t *get_block, int block_write_full_page_endio(struct page *page, get_block_t *get_block, struct writeback_control *wbc, bh_end_io_t *handler); int block_read_full_page(struct page*, get_block_t*); -int block_is_partially_uptodate(struct page *page, read_descriptor_t *desc, - unsigned long from); +int block_is_partially_uptodate(struct page *page, unsigned long from, + unsigned long count); int block_write_begin(struct address_space *mapping, loff_t pos, unsigned len, unsigned flags, struct page **pagep, get_block_t *get_block); int __block_write_begin(struct page *page, loff_t pos, unsigned len, diff --git a/include/linux/fs.h b/include/linux/fs.h index db181b542db1..ddfff2ecef0b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -385,7 +385,7 @@ struct address_space_operations { int (*migratepage) (struct address_space *, struct page *, struct page *, enum migrate_mode); int (*launder_page) (struct page *); - int (*is_partially_uptodate) (struct page *, read_descriptor_t *, + int (*is_partially_uptodate) (struct page *, unsigned long, unsigned long); void (*is_dirty_writeback) (struct page *, bool *, bool *); int (*error_remove_page)(struct address_space *, struct page *); -- cgit v1.2.3 From 9223687863ffa63fa655f52ef64148ee08dee4d1 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 27 Nov 2013 16:29:46 -0800 Subject: iov_iter: Move iov_iter to uio.h Signed-off-by: Kent Overstreet --- include/linux/fs.h | 32 -------------------------------- include/linux/uio.h | 50 ++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 50 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index ddfff2ecef0b..2261ac8f0534 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -295,38 +295,6 @@ struct page; struct address_space; struct writeback_control; -struct iov_iter { - const struct iovec *iov; - unsigned long nr_segs; - size_t iov_offset; - size_t count; -}; - -size_t iov_iter_copy_from_user_atomic(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); -size_t iov_iter_copy_from_user(struct page *page, - struct iov_iter *i, unsigned long offset, size_t bytes); -void iov_iter_advance(struct iov_iter *i, size_t bytes); -int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); -size_t iov_iter_single_seg_count(const struct iov_iter *i); - -static inline void iov_iter_init(struct iov_iter *i, - const struct iovec *iov, unsigned long nr_segs, - size_t count, size_t written) -{ - i->iov = iov; - i->nr_segs = nr_segs; - i->iov_offset = 0; - i->count = count + written; - - iov_iter_advance(i, written); -} - -static inline size_t iov_iter_count(struct iov_iter *i) -{ - return i->count; -} - /* * "descriptor" for what we're up to with a read. * This allows us to use the same read code yet diff --git a/include/linux/uio.h b/include/linux/uio.h index c55ce243cc09..347d70ce098e 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -9,14 +9,23 @@ #ifndef __LINUX_UIO_H #define __LINUX_UIO_H +#include #include +struct page; struct kvec { void *iov_base; /* and that should *never* hold a userland pointer */ size_t iov_len; }; +struct iov_iter { + const struct iovec *iov; + unsigned long nr_segs; + size_t iov_offset; + size_t count; +}; + /* * Total number of bytes covered by an iovec. * @@ -34,8 +43,49 @@ static inline size_t iov_length(const struct iovec *iov, unsigned long nr_segs) return ret; } +static inline struct iovec iov_iter_iovec(const struct iov_iter *iter) +{ + return (struct iovec) { + .iov_base = iter->iov->iov_base + iter->iov_offset, + .iov_len = min(iter->count, + iter->iov->iov_len - iter->iov_offset), + }; +} + +#define iov_for_each(iov, iter, start) \ + for (iter = (start); \ + (iter).count && \ + ((iov = iov_iter_iovec(&(iter))), 1); \ + iov_iter_advance(&(iter), (iov).iov_len)) + unsigned long iov_shorten(struct iovec *iov, unsigned long nr_segs, size_t to); +size_t iov_iter_copy_from_user_atomic(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes); +size_t iov_iter_copy_from_user(struct page *page, + struct iov_iter *i, unsigned long offset, size_t bytes); +void iov_iter_advance(struct iov_iter *i, size_t bytes); +int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); +size_t iov_iter_single_seg_count(const struct iov_iter *i); + +static inline void iov_iter_init(struct iov_iter *i, + const struct iovec *iov, unsigned long nr_segs, + size_t count, size_t written) +{ + i->iov = iov; + i->nr_segs = nr_segs; + i->iov_offset = 0; + i->count = count + written; + + iov_iter_advance(i, written); +} + +static inline size_t iov_iter_count(struct iov_iter *i) +{ + return i->count; +} + int memcpy_fromiovec(unsigned char *kdata, struct iovec *iov, int len); int memcpy_toiovec(struct iovec *iov, unsigned char *kdata, int len); + #endif -- cgit v1.2.3 From 6e58e79db8a16222b31fc8da1ca2ac2dccfc4237 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Mon, 3 Feb 2014 17:07:03 -0500 Subject: introduce copy_page_to_iter, kill loop over iovec in generic_file_aio_read() generic_file_aio_read() was looping over the target iovec, with loop over (source) pages nested inside that. Just set an iov_iter up and pass *that* to do_generic_file_aio_read(). With copy_page_to_iter() doing all work of mapping and copying a page to iovec and advancing iov_iter. Switch shmem_file_aio_read() to the same and kill file_read_actor(), while we are at it. Signed-off-by: Al Viro --- include/linux/fs.h | 1 - include/linux/uio.h | 2 ++ 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2261ac8f0534..2a5b1744f80a 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2390,7 +2390,6 @@ extern int generic_file_mmap(struct file *, struct vm_area_struct *); extern int generic_file_readonly_mmap(struct file *, struct vm_area_struct *); extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, unsigned long size, pgoff_t pgoff); -extern int file_read_actor(read_descriptor_t * desc, struct page *page, unsigned long offset, unsigned long size); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, diff --git a/include/linux/uio.h b/include/linux/uio.h index 347d70ce098e..199bcc34241b 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -67,6 +67,8 @@ size_t iov_iter_copy_from_user(struct page *page, void iov_iter_advance(struct iov_iter *i, size_t bytes); int iov_iter_fault_in_readable(struct iov_iter *i, size_t bytes); size_t iov_iter_single_seg_count(const struct iov_iter *i); +size_t copy_page_to_iter(struct page *page, size_t offset, size_t bytes, + struct iov_iter *i); static inline void iov_iter_init(struct iov_iter *i, const struct iovec *iov, unsigned long nr_segs, -- cgit v1.2.3 From 86d564c84c38b1ec06d9f2120d6a7373dcaeff0c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sat, 8 Feb 2014 20:42:52 -0500 Subject: constify blk_rq_map_user_iov() and friends sg_iovec array passed to it can be const Signed-off-by: Al Viro --- include/linux/bio.h | 5 +++-- include/linux/blkdev.h | 4 ++-- 2 files changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 5a4d39b4686b..21e27208316c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -388,7 +388,7 @@ struct sg_iovec; struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct block_device *, - struct sg_iovec *, int, int, gfp_t); + const struct sg_iovec *, int, int, gfp_t); extern void bio_unmap_user(struct bio *); extern struct bio *bio_map_kern(struct request_queue *, void *, unsigned int, gfp_t); @@ -414,7 +414,8 @@ extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); extern struct bio *bio_copy_user(struct request_queue *, struct rq_map_data *, unsigned long, unsigned int, int, gfp_t); extern struct bio *bio_copy_user_iov(struct request_queue *, - struct rq_map_data *, struct sg_iovec *, + struct rq_map_data *, + const struct sg_iovec *, int, int, gfp_t); extern int bio_uncopy_user(struct bio *); void zero_fill_bio(struct bio *bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4afa4f8f6090..a639fd8a6d7b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -823,8 +823,8 @@ extern int blk_rq_map_user(struct request_queue *, struct request *, extern int blk_rq_unmap_user(struct bio *); extern int blk_rq_map_kern(struct request_queue *, struct request *, void *, unsigned int, gfp_t); extern int blk_rq_map_user_iov(struct request_queue *, struct request *, - struct rq_map_data *, struct sg_iovec *, int, - unsigned int, gfp_t); + struct rq_map_data *, const struct sg_iovec *, + int, unsigned int, gfp_t); extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, -- cgit v1.2.3 From 41fc56d573c35a212688b12b48af8c303f9bb6d2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Feb 2014 12:58:52 -0500 Subject: kill the 4th argument of __generic_file_aio_write() It's always equal to &iocb->ki_pos, where iocb is the value of the 1st argument. Signed-off-by: Al Viro --- include/linux/fs.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2a5b1744f80a..e677d1e1189f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2392,8 +2392,7 @@ extern int generic_file_remap_pages(struct vm_area_struct *, unsigned long addr, unsigned long size, pgoff_t pgoff); int generic_write_checks(struct file *file, loff_t *pos, size_t *count, int isblk); extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsigned long, loff_t); -extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, - loff_t *); +extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, loff_t *, size_t, size_t); -- cgit v1.2.3 From fcacafd269adc88f41b68cb77a3f957a66563428 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Feb 2014 13:37:49 -0500 Subject: kill the 5th argument of generic_file_buffered_write() same story - it's &iocb->ki_pos in all cases Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index e677d1e1189f..830e37420f5e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2397,7 +2397,7 @@ extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsi extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, loff_t *, size_t, size_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, - unsigned long, loff_t, loff_t *, size_t, ssize_t); + unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); extern int generic_segment_checks(const struct iovec *iov, -- cgit v1.2.3 From 5cb6c6c7eb1ed24744b41fad47d9a25b72207098 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 20:58:20 -0500 Subject: generic_file_direct_write(): get rid of ppos argument always equal to &iocb->ki_pos. Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 830e37420f5e..9dfd7c7ff8e3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2395,7 +2395,7 @@ extern ssize_t generic_file_aio_read(struct kiocb *, const struct iovec *, unsig extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long); extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, - unsigned long *, loff_t, loff_t *, size_t, size_t); + unsigned long *, loff_t, size_t, size_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); -- cgit v1.2.3 From 3b93f911d55cf8b3540f82fac4eeddfee7b5de0c Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 21:34:08 -0500 Subject: export generic_perform_write(), start getting rid of generic_file_buffer_write() Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 9dfd7c7ff8e3..87be51aff9d7 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -48,6 +48,7 @@ struct cred; struct swap_info_struct; struct seq_file; struct workqueue_struct; +struct iov_iter; extern void __init inode_init(void); extern void __init inode_init_early(void); @@ -2396,6 +2397,7 @@ extern ssize_t __generic_file_aio_write(struct kiocb *, const struct iovec *, un extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsigned long, loff_t); extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, size_t, size_t); +extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); -- cgit v1.2.3 From ccad2365668f12336dd497d9039e8584af836070 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Tue, 11 Feb 2014 22:36:48 -0500 Subject: kill generic_file_buffered_write() Signed-off-by: Al Viro --- include/linux/fs.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 87be51aff9d7..c309b7a0da2d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2398,8 +2398,6 @@ extern ssize_t generic_file_aio_write(struct kiocb *, const struct iovec *, unsi extern ssize_t generic_file_direct_write(struct kiocb *, const struct iovec *, unsigned long *, loff_t, size_t, size_t); extern ssize_t generic_perform_write(struct file *, struct iov_iter *, loff_t); -extern ssize_t generic_file_buffered_write(struct kiocb *, const struct iovec *, - unsigned long, loff_t, size_t, ssize_t); extern ssize_t do_sync_read(struct file *filp, char __user *buf, size_t len, loff_t *ppos); extern ssize_t do_sync_write(struct file *filp, const char __user *buf, size_t len, loff_t *ppos); extern int generic_segment_checks(const struct iovec *iov, -- cgit v1.2.3 From 9d521470a40f16110bd31018034155c60c1a1275 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 31 Jan 2014 17:54:26 +0200 Subject: libceph: a per-osdc crush scratch buffer With the addition of erasure coding support in the future, scratch variable-length array in crush_do_rule_ary() is going to grow to at least 200 bytes on average, on top of another 128 bytes consumed by rawosd/osd arrays in the call chain. Replace it with a buffer inside struct osdmap and a mutex. This shouldn't result in any contention, because all osd requests were already serialized by request_mutex at that point; the only unlocked caller was ceph_ioctl_get_dataloc(). Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/osdmap.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 49ff69f0746b..8c8b3cefc28b 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -84,6 +84,9 @@ struct ceph_osdmap { /* the CRUSH map specifies the mapping of placement groups to * the list of osds that store+replicate them. */ struct crush_map *crush; + + struct mutex crush_scratch_mutex; + int crush_scratch_ary[CEPH_PG_MAX_SIZE * 3]; }; static inline void ceph_oid_set_name(struct ceph_object_id *oid, -- cgit v1.2.3 From 7b25bf5f02c5c80adf96120e031dc3a1756ce54d Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 25 Feb 2014 16:22:26 +0200 Subject: libceph: encode CEPH_OSD_OP_FLAG_* op flags Encode ceph_osd_op::flags field so that it gets sent over the wire. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 1 + include/linux/ceph/rados.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index fd47e872ebcc..e94f5da251d6 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -76,6 +76,7 @@ struct ceph_osd_data { struct ceph_osd_req_op { u16 op; /* CEPH_OSD_OP_* */ + u32 flags; /* CEPH_OSD_OP_FLAG_* */ u32 payload_len; union { struct ceph_osd_data raw_data_in; diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 96292df4041b..8f9bf4570215 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -382,7 +382,7 @@ enum { */ struct ceph_osd_op { __le16 op; /* CEPH_OSD_OP_* */ - __le32 flags; /* CEPH_OSD_FLAG_* */ + __le32 flags; /* CEPH_OSD_OP_FLAG_* */ union { struct { __le64 offset, length; -- cgit v1.2.3 From c647b8a8c6366f849c2a237bfe525cb1d316d5f4 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 25 Feb 2014 16:22:27 +0200 Subject: libceph: add support for CEPH_OSD_OP_SETALLOCHINT osd op This is primarily for rbd's benefit and is supposed to combat fragmentation: "... knowing that rbd images have a 4m size, librbd can pass a hint that will let the osd do the xfs allocation size ioctl on new files so that they are allocated in 1m or 4m chunks. We've seen cases where users with rbd workloads have very high levels of fragmentation in xfs and this would mitigate that and probably have a pretty nice performance benefit." SETALLOCHINT is considered advisory, so our backwards compatibility mechanism here is to set FAILOK flag for all SETALLOCHINT ops. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 8 ++++++++ include/linux/ceph/rados.h | 7 +++++++ 2 files changed, 15 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index e94f5da251d6..c42d1ada685f 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -103,6 +103,10 @@ struct ceph_osd_req_op { u32 timeout; __u8 flag; } watch; + struct { + u64 expected_object_size; + u64 expected_write_size; + } alloc_hint; }; }; @@ -294,6 +298,10 @@ extern void osd_req_op_cls_init(struct ceph_osd_request *osd_req, extern void osd_req_op_watch_init(struct ceph_osd_request *osd_req, unsigned int which, u16 opcode, u64 cookie, u64 version, int flag); +extern void osd_req_op_alloc_hint_init(struct ceph_osd_request *osd_req, + unsigned int which, + u64 expected_object_size, + u64 expected_write_size); extern struct ceph_osd_request *ceph_osdc_alloc_request(struct ceph_osd_client *osdc, struct ceph_snap_context *snapc, diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 8f9bf4570215..2caabef8d369 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -227,6 +227,9 @@ enum { CEPH_OSD_OP_OMAPRMKEYS = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 24, CEPH_OSD_OP_OMAP_CMP = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_DATA | 25, + /* hints */ + CEPH_OSD_OP_SETALLOCHINT = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_DATA | 35, + /** multi **/ CEPH_OSD_OP_CLONERANGE = CEPH_OSD_OP_MODE_WR | CEPH_OSD_OP_TYPE_MULTI | 1, CEPH_OSD_OP_ASSERT_SRC_VERSION = CEPH_OSD_OP_MODE_RD | CEPH_OSD_OP_TYPE_MULTI | 2, @@ -416,6 +419,10 @@ struct ceph_osd_op { __le64 offset, length; __le64 src_offset; } __attribute__ ((packed)) clonerange; + struct { + __le64 expected_object_size; + __le64 expected_write_size; + } __attribute__ ((packed)) alloc_hint; }; __le32 payload_len; } __attribute__ ((packed)); -- cgit v1.2.3 From 7cc69d42e6950404587bef9489a5ed6f9f6bab4e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Tue, 25 Feb 2014 16:22:27 +0200 Subject: libceph: bump CEPH_OSD_MAX_OP to 3 Our longest osd request now contains 3 ops: copyup+hint+write. Also, CEPH_OSD_MAX_OP value in a BUG_ON in rbd_osd_req_callback() was hard-coded to 2. Fix it, and switch to rbd_assert while at it. Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil Reviewed-by: Alex Elder --- include/linux/ceph/osd_client.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index c42d1ada685f..94ec69672164 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -43,7 +43,7 @@ struct ceph_osd { }; -#define CEPH_OSD_MAX_OP 2 +#define CEPH_OSD_MAX_OP 3 enum ceph_osd_data_type { CEPH_OSD_DATA_TYPE_NONE = 0, -- cgit v1.2.3 From 19913b4eac4a230dccb548931358398f45dabe4c Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Thu, 6 Mar 2014 16:40:32 +0800 Subject: ceph: add get_name() NFS export callback Use the newly introduced LOOKUPNAME MDS request to connect child inode to its parent directory. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- include/linux/ceph/ceph_fs.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 25bfb0eff772..35f345f7b3a3 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -332,6 +332,7 @@ enum { CEPH_MDS_OP_LOOKUPHASH = 0x00102, CEPH_MDS_OP_LOOKUPPARENT = 0x00103, CEPH_MDS_OP_LOOKUPINO = 0x00104, + CEPH_MDS_OP_LOOKUPNAME = 0x00105, CEPH_MDS_OP_SETXATTR = 0x01105, CEPH_MDS_OP_RMXATTR = 0x01106, -- cgit v1.2.3 From 8e2f1a63f2217365223026422a2f8ba5967051d6 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 2 Apr 2014 20:52:57 +0200 Subject: packet: fix packet_direct_xmit for BQL enabled drivers Currently, in packet_direct_xmit() we test the assigned netdevice queue for netif_xmit_frozen_or_stopped() before doing an ndo_start_xmit(). This can have the side-effect that BQL enabled drivers which make use of netdev_tx_sent_queue() internally, set __QUEUE_STATE_STACK_XOFF from within the stack and would not fully fill the device's TX ring from packet sockets with PACKET_QDISC_BYPASS enabled. Instead, use a test without BQL bit so that bursts can be absorbed into the NICs TX ring. Fix and code suggested by Eric Dumazet, thanks! Signed-off-by: Eric Dumazet Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/netdevice.h | 24 +++++++++++++++++++----- 1 file changed, 19 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 775cc956ff78..7ed3a3aa6604 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -519,11 +519,18 @@ enum netdev_queue_state_t { __QUEUE_STATE_DRV_XOFF, __QUEUE_STATE_STACK_XOFF, __QUEUE_STATE_FROZEN, -#define QUEUE_STATE_ANY_XOFF ((1 << __QUEUE_STATE_DRV_XOFF) | \ - (1 << __QUEUE_STATE_STACK_XOFF)) -#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ - (1 << __QUEUE_STATE_FROZEN)) }; + +#define QUEUE_STATE_DRV_XOFF (1 << __QUEUE_STATE_DRV_XOFF) +#define QUEUE_STATE_STACK_XOFF (1 << __QUEUE_STATE_STACK_XOFF) +#define QUEUE_STATE_FROZEN (1 << __QUEUE_STATE_FROZEN) + +#define QUEUE_STATE_ANY_XOFF (QUEUE_STATE_DRV_XOFF | QUEUE_STATE_STACK_XOFF) +#define QUEUE_STATE_ANY_XOFF_OR_FROZEN (QUEUE_STATE_ANY_XOFF | \ + QUEUE_STATE_FROZEN) +#define QUEUE_STATE_DRV_XOFF_OR_FROZEN (QUEUE_STATE_DRV_XOFF | \ + QUEUE_STATE_FROZEN) + /* * __QUEUE_STATE_DRV_XOFF is used by drivers to stop the transmit queue. The * netif_tx_* functions below are used to manipulate this flag. The @@ -2252,11 +2259,18 @@ static inline bool netif_xmit_stopped(const struct netdev_queue *dev_queue) return dev_queue->state & QUEUE_STATE_ANY_XOFF; } -static inline bool netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) +static inline bool +netif_xmit_frozen_or_stopped(const struct netdev_queue *dev_queue) { return dev_queue->state & QUEUE_STATE_ANY_XOFF_OR_FROZEN; } +static inline bool +netif_xmit_frozen_or_drv_stopped(const struct netdev_queue *dev_queue) +{ + return dev_queue->state & QUEUE_STATE_DRV_XOFF_OR_FROZEN; +} + static inline void netdev_tx_sent_queue(struct netdev_queue *dev_queue, unsigned int bytes) { -- cgit v1.2.3 From d0290214de712150b118a532ded378a29255893b Mon Sep 17 00:00:00 2001 From: Jiri Pirko Date: Wed, 2 Apr 2014 23:09:31 +0200 Subject: net: add busy_poll device feature Currently there is no way how to find out if a device supports busy polling. So add a feature and make it dependent on ndo_busy_poll existence. Signed-off-by: Jiri Pirko Signed-off-by: David S. Miller --- include/linux/netdev_features.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netdev_features.h b/include/linux/netdev_features.h index 5a09a48f2658..c26d0ec2ef3a 100644 --- a/include/linux/netdev_features.h +++ b/include/linux/netdev_features.h @@ -63,6 +63,7 @@ enum { NETIF_F_HW_VLAN_STAG_RX_BIT, /* Receive VLAN STAG HW acceleration */ NETIF_F_HW_VLAN_STAG_FILTER_BIT,/* Receive filtering on VLAN STAGs */ NETIF_F_HW_L2FW_DOFFLOAD_BIT, /* Allow L2 Forwarding in Hardware */ + NETIF_F_BUSY_POLL_BIT, /* Busy poll */ /* * Add your fresh new feature above and remember to update @@ -118,6 +119,7 @@ enum { #define NETIF_F_HW_VLAN_STAG_RX __NETIF_F(HW_VLAN_STAG_RX) #define NETIF_F_HW_VLAN_STAG_TX __NETIF_F(HW_VLAN_STAG_TX) #define NETIF_F_HW_L2FW_DOFFLOAD __NETIF_F(HW_L2FW_DOFFLOAD) +#define NETIF_F_BUSY_POLL __NETIF_F(BUSY_POLL) /* Features valid for ethtool to change */ /* = all defined minus driver/device-class-related */ -- cgit v1.2.3 From 64400c3791d9fcebf23318a289f9da964547a6f3 Mon Sep 17 00:00:00 2001 From: Bryan Wu Date: Wed, 19 Feb 2014 14:48:36 -0800 Subject: gpu: host1x: export host1x_syncpt_incr_max() function Tegra V4L2 camera driver needs this function to do frame capture. Signed-off-by: Bryan Wu Signed-off-by: Thierry Reding --- include/linux/host1x.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/host1x.h b/include/linux/host1x.h index 3af847273277..d2b52999e771 100644 --- a/include/linux/host1x.h +++ b/include/linux/host1x.h @@ -136,6 +136,7 @@ u32 host1x_syncpt_id(struct host1x_syncpt *sp); u32 host1x_syncpt_read_min(struct host1x_syncpt *sp); u32 host1x_syncpt_read_max(struct host1x_syncpt *sp); int host1x_syncpt_incr(struct host1x_syncpt *sp); +u32 host1x_syncpt_incr_max(struct host1x_syncpt *sp, u32 incrs); int host1x_syncpt_wait(struct host1x_syncpt *sp, u32 thresh, long timeout, u32 *value); struct host1x_syncpt *host1x_syncpt_request(struct device *dev, -- cgit v1.2.3 From eb13e832f823f6c110ea53e3067bafe22b87de63 Mon Sep 17 00:00:00 2001 From: "Yan, Zheng" Date: Sun, 9 Mar 2014 23:16:40 +0800 Subject: ceph: use fl->fl_file as owner identifier of flock and posix lock flock and posix lock should use fl->fl_file instead of process ID as owner identifier. (posix lock uses fl->fl_owner. fl->fl_owner is usually equal to fl->fl_file, but it also can be a customized value). The process ID of who holds the lock is just for F_GETLK fcntl(2). The fix is rename the 'pid' fields of struct ceph_mds_request_args and struct ceph_filelock to 'owner', rename 'pid_namespace' fields to 'pid'. Assign fl->fl_file to the 'owner' field of lock messages. We also set the most significant bit of the 'owner' field. MDS can use that bit to distinguish between old and new clients. The MDS counterpart of this patch modifies the flock code to not take the 'pid_namespace' into consideration when checking conflict locks. Signed-off-by: Yan, Zheng Reviewed-by: Sage Weil --- include/linux/ceph/ceph_fs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_fs.h b/include/linux/ceph/ceph_fs.h index 35f345f7b3a3..5f6db18d72e8 100644 --- a/include/linux/ceph/ceph_fs.h +++ b/include/linux/ceph/ceph_fs.h @@ -421,8 +421,8 @@ union ceph_mds_request_args { struct { __u8 rule; /* currently fcntl or flock */ __u8 type; /* shared, exclusive, remove*/ + __le64 owner; /* owner of the lock */ __le64 pid; /* process id requesting the lock */ - __le64 pid_namespace; __le64 start; /* initial location to lock */ __le64 length; /* num bytes to lock from start */ __u8 wait; /* will caller wait for lock to become available? */ @@ -533,8 +533,8 @@ struct ceph_filelock { __le64 start;/* file offset to start lock at */ __le64 length; /* num bytes to lock; 0 for all following start */ __le64 client; /* which client holds the lock */ + __le64 owner; /* owner the lock */ __le64 pid; /* process id holding the lock on the client */ - __le64 pid_namespace; __u8 type; /* shared lock, exclusive lock, or unlock */ } __attribute__ ((packed)); -- cgit v1.2.3 From e2b149cc4ba00766aceb87950c6de72ea7fc8b2e Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 19 Mar 2014 16:58:37 +0200 Subject: crush: add chooseleaf_vary_r tunable The current crush_choose_firstn code will re-use the same 'r' value for the recursive call. That means that if we are hitting a collision or rejection for some reason (say, an OSD that is marked out) and need to retry, we will keep making the same (bad) choice in that recursive selection. Introduce a tunable that fixes that behavior by incorporating the parent 'r' value into the recursive starting point, so that a different path will be taken in subsequent placement attempts. Note that this was done from the get-go for the new crush_choose_indep algorithm. This was exposed by a user who was seeing PGs stuck in active+remapped after reweight-by-utilization because the up set mapped to a single OSD. Reflects ceph.git commit a8e6c9fbf88bad056dd05d3eb790e98a5e43451a. Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin --- include/linux/crush/crush.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index acaa5615d634..75f36a6c7f67 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -173,6 +173,12 @@ struct crush_map { * apply to a collision: in that case we will retry as we used * to. */ __u32 chooseleaf_descend_once; + + /* if non-zero, feed r into chooseleaf, bit-shifted right by (r-1) + * bits. a value of 1 is best for new clusters. for legacy clusters + * that want to limit reshuffling, a value of 3 or 4 will make the + * mappings line up a bit better with previous mappings. */ + __u8 chooseleaf_vary_r; }; -- cgit v1.2.3 From d83ed858f144e3cfbef883d4bc499113cdddabeb Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 19 Mar 2014 16:58:37 +0200 Subject: crush: add SET_CHOOSELEAF_VARY_R step This lets you adjust the vary_r tunable on a per-rule basis. Reflects ceph.git commit f944ccc20aee60a7d8da7e405ec75ad1cd449fac. Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin --- include/linux/crush/crush.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/crush/crush.h b/include/linux/crush/crush.h index 75f36a6c7f67..4fad5f8ee01d 100644 --- a/include/linux/crush/crush.h +++ b/include/linux/crush/crush.h @@ -51,6 +51,7 @@ enum { CRUSH_RULE_SET_CHOOSELEAF_TRIES = 9, /* override chooseleaf_descend_once */ CRUSH_RULE_SET_CHOOSE_LOCAL_TRIES = 10, CRUSH_RULE_SET_CHOOSE_LOCAL_FALLBACK_TRIES = 11, + CRUSH_RULE_SET_CHOOSELEAF_VARY_R = 12 }; /* -- cgit v1.2.3 From 07bd7de47a65767432ceb66d4ab30cdc05ed2b35 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 19 Mar 2014 16:58:37 +0200 Subject: crush: support chooseleaf_vary_r tunable (tunables3) by default Add TUNABLES3 feature (chooseleaf_vary_r tunable) to a set of features supported by default. Signed-off-by: Ilya Dryomov Reviewed-by: Josh Durgin --- include/linux/ceph/ceph_features.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 138448f766b4..77c097fe9ea9 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -43,6 +43,13 @@ #define CEPH_FEATURE_CRUSH_V2 (1ULL<<36) /* new indep; SET_* steps */ #define CEPH_FEATURE_EXPORT_PEER (1ULL<<37) #define CEPH_FEATURE_OSD_ERASURE_CODES (1ULL<<38) +#define CEPH_FEATURE_OSD_TMAP2OMAP (1ULL<<38) /* overlap with EC */ +/* The process supports new-style OSDMap encoding. Monitors also use + this bit to determine if peers support NAK messages. */ +#define CEPH_FEATURE_OSDMAP_ENC (1ULL<<39) +#define CEPH_FEATURE_MDS_INLINE_DATA (1ULL<<40) +#define CEPH_FEATURE_CRUSH_TUNABLES3 (1ULL<<41) +#define CEPH_FEATURE_OSD_PRIMARY_AFFINITY (1ULL<<41) /* overlap w/ tunables3 */ /* * The introduction of CEPH_FEATURE_OSD_SNAPMAPPER caused the feature @@ -82,7 +89,8 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSDHASHPSPOOL | \ CEPH_FEATURE_OSD_CACHEPOOL | \ CEPH_FEATURE_CRUSH_V2 | \ - CEPH_FEATURE_EXPORT_PEER) + CEPH_FEATURE_EXPORT_PEER | \ + CEPH_FEATURE_CRUSH_TUNABLES3) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ -- cgit v1.2.3 From a2505d63ee0541d9b4685250b033192e68222e97 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Thu, 13 Mar 2014 16:36:13 +0200 Subject: libceph: split osdmap allocation and decode steps Split osdmap allocation and initialization into a separate function, ceph_osdmap_decode(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 8c8b3cefc28b..46c3e304c3d8 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -156,7 +156,7 @@ static inline int ceph_decode_pgid(void **p, void *end, struct ceph_pg *pgid) return 0; } -extern struct ceph_osdmap *osdmap_decode(void **p, void *end); +extern struct ceph_osdmap *ceph_osdmap_decode(void **p, void *end); extern struct ceph_osdmap *osdmap_apply_incremental(void **p, void *end, struct ceph_osdmap *map, struct ceph_messenger *msgr); -- cgit v1.2.3 From 35a935d75d51abe58d3427a8b4ae3745a5a14e1c Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:29 +0200 Subject: libceph: generalize ceph_pg_mapping In preparation for adding support for primary_temp mappings, generalize struct ceph_pg_mapping so it can hold mappings other than pg_temp. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 46c3e304c3d8..4837e58e3203 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -60,8 +60,13 @@ struct ceph_object_id { struct ceph_pg_mapping { struct rb_node node; struct ceph_pg pgid; - int len; - int osds[]; + + union { + struct { + int len; + int osds[]; + } pg_temp; + }; }; struct ceph_osdmap { -- cgit v1.2.3 From 9686f94c8cfc06e8afb7b2233ab8f1f6ac01957f Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:29 +0200 Subject: libceph: primary_temp infrastructure Add primary_temp mappings infrastructure. struct ceph_pg_mapping is overloaded, primary_temp mappings are stored in an rb-tree, rooted at ceph_osdmap, in a manner similar to pg_temp mappings. Dump primary_temp mappings to /sys/kernel/debug/ceph//osdmap, one 'primary_temp ' per line, e.g: primary_temp 2.6 4 Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 4837e58e3203..db4fb6322aae 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -66,6 +66,9 @@ struct ceph_pg_mapping { int len; int osds[]; } pg_temp; + struct { + int osd; + } primary_temp; }; }; @@ -83,6 +86,8 @@ struct ceph_osdmap { struct ceph_entity_addr *osd_addr; struct rb_root pg_temp; + struct rb_root primary_temp; + struct rb_root pg_pools; u32 pool_max; -- cgit v1.2.3 From 2cfa34f2d67a36e292cbe6e4c1e60d212b7ba4d1 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:30 +0200 Subject: libceph: primary_affinity infrastructure Add primary_affinity infrastructure. primary_affinity values are stored in an max_osd-sized array, hanging off ceph_osdmap, similar to a osd_weight array. Introduce {get,set}_primary_affinity() helpers, primarily to return CEPH_OSD_DEFAULT_PRIMARY_AFFINITY when no affinity has been set and to abstract out osd_primary_affinity array allocation and initialization. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 3 +++ include/linux/ceph/rados.h | 4 ++++ 2 files changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index db4fb6322aae..6e030cb3c9ca 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -88,6 +88,8 @@ struct ceph_osdmap { struct rb_root pg_temp; struct rb_root primary_temp; + u32 *osd_primary_affinity; + struct rb_root pg_pools; u32 pool_max; @@ -134,6 +136,7 @@ static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) } extern char *ceph_osdmap_state_str(char *str, int len, int state); +extern u32 ceph_get_primary_affinity(struct ceph_osdmap *map, int osd); static inline struct ceph_entity_addr *ceph_osd_addr(struct ceph_osdmap *map, int osd) diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index 2caabef8d369..bb6f40c9cb0f 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -133,6 +133,10 @@ extern const char *ceph_osd_state_name(int s); #define CEPH_OSD_IN 0x10000 #define CEPH_OSD_OUT 0 +/* osd primary-affinity. fixed point value: 0x10000 == baseline */ +#define CEPH_OSD_MAX_PRIMARY_AFFINITY 0x10000 +#define CEPH_OSD_DEFAULT_PRIMARY_AFFINITY 0x10000 + /* * osd map flag bits -- cgit v1.2.3 From ddf3a21a03d0f01c5ba83deaecd2d0c381d5ef42 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Fri, 21 Mar 2014 19:05:31 +0200 Subject: libceph: enable OSDMAP_ENC feature bit Announce our support for "new" (v7 - split and separately versioned client and osd sections) osdmap enconding. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/ceph_features.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 77c097fe9ea9..7a4cab50b2cd 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -90,6 +90,7 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_OSD_CACHEPOOL | \ CEPH_FEATURE_CRUSH_V2 | \ CEPH_FEATURE_EXPORT_PEER | \ + CEPH_FEATURE_OSDMAP_ENC | \ CEPH_FEATURE_CRUSH_TUNABLES3) #define CEPH_FEATURES_REQUIRED_DEFAULT \ -- cgit v1.2.3 From 246138fa6787db6f4016f26604fdc05dc9f95627 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:46 +0200 Subject: libceph: ceph_osd_{exists,is_up,is_down}(osd) definitions Sync up with ceph.git definitions. Bring in ceph_osd_is_down(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 14 +++++++++++++- 1 file changed, 13 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 6e030cb3c9ca..0895797b9e28 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -125,9 +125,21 @@ static inline void ceph_oid_copy(struct ceph_object_id *dest, dest->name_len = src->name_len; } +static inline int ceph_osd_exists(struct ceph_osdmap *map, int osd) +{ + return osd >= 0 && osd < map->max_osd && + (map->osd_state[osd] & CEPH_OSD_EXISTS); +} + static inline int ceph_osd_is_up(struct ceph_osdmap *map, int osd) { - return (osd < map->max_osd) && (map->osd_state[osd] & CEPH_OSD_UP); + return ceph_osd_exists(map, osd) && + (map->osd_state[osd] & CEPH_OSD_UP); +} + +static inline int ceph_osd_is_down(struct ceph_osdmap *map, int osd) +{ + return !ceph_osd_is_up(map, osd); } static inline bool ceph_osdmap_flag(struct ceph_osdmap *map, int flag) -- cgit v1.2.3 From 2abebdbca7997422bfab6bf8b6559384a6b95294 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:47 +0200 Subject: libceph: ceph_can_shift_osds(pool) and pool type defines Bring in pg_pool_t::can_shift_osds() counterpart along with pool type defines. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 12 ++++++++++++ include/linux/ceph/rados.h | 5 +++-- 2 files changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 0895797b9e28..4e28c1e5d62f 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -41,6 +41,18 @@ struct ceph_pg_pool_info { char *name; }; +static inline bool ceph_can_shift_osds(struct ceph_pg_pool_info *pool) +{ + switch (pool->type) { + case CEPH_POOL_TYPE_REP: + return true; + case CEPH_POOL_TYPE_EC: + return false; + default: + BUG_ON(1); + } +} + struct ceph_object_locator { s64 pool; }; diff --git a/include/linux/ceph/rados.h b/include/linux/ceph/rados.h index bb6f40c9cb0f..f20e0d8a2155 100644 --- a/include/linux/ceph/rados.h +++ b/include/linux/ceph/rados.h @@ -81,8 +81,9 @@ struct ceph_pg_v1 { */ #define CEPH_NOPOOL ((__u64) (-1)) /* pool id not defined */ -#define CEPH_PG_TYPE_REP 1 -#define CEPH_PG_TYPE_RAID4 2 +#define CEPH_POOL_TYPE_REP 1 +#define CEPH_POOL_TYPE_RAID4 2 /* never implemented */ +#define CEPH_POOL_TYPE_EC 3 /* * stable_mod func is used to control number of placement groups. -- cgit v1.2.3 From ac972230e20581b044f5ce66dcaf3c5af8d57444 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:48 +0200 Subject: libceph: switch ceph_calc_pg_acting() to new helpers Switch ceph_calc_pg_acting() to new helpers: pg_to_raw_osds(), raw_to_up_osds() and apply_temps(). Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index 4e28c1e5d62f..b0c8f8490663 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -212,7 +212,7 @@ extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, - int *acting); + int *osds); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); -- cgit v1.2.3 From 8008ab1080c1768b02d232dcfd9e161cd47cc9f7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:48 +0200 Subject: libceph: return primary from ceph_calc_pg_acting() In preparation for adding support for primary_temp, stop assuming primaryness: add a primary out parameter to ceph_calc_pg_acting() and change call sites accordingly. Primary is now specified separately from the order of osds in the set. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/osdmap.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/osdmap.h b/include/linux/ceph/osdmap.h index b0c8f8490663..561ea896c657 100644 --- a/include/linux/ceph/osdmap.h +++ b/include/linux/ceph/osdmap.h @@ -212,7 +212,7 @@ extern int ceph_oloc_oid_to_pg(struct ceph_osdmap *osdmap, extern int ceph_calc_pg_acting(struct ceph_osdmap *osdmap, struct ceph_pg pgid, - int *osds); + int *osds, int *primary); extern int ceph_calc_pg_primary(struct ceph_osdmap *osdmap, struct ceph_pg pgid); -- cgit v1.2.3 From 18cb95af2d7c69aa136ab13f02dd55188c120e75 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 24 Mar 2014 17:12:50 +0200 Subject: libceph: enable PRIMARY_AFFINITY feature bit Announce our support for osdmaps with non-default primary affinity values. Signed-off-by: Ilya Dryomov Reviewed-by: Alex Elder --- include/linux/ceph/ceph_features.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index 7a4cab50b2cd..d12659ce550d 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -91,7 +91,8 @@ static inline u64 ceph_sanitize_features(u64 features) CEPH_FEATURE_CRUSH_V2 | \ CEPH_FEATURE_EXPORT_PEER | \ CEPH_FEATURE_OSDMAP_ENC | \ - CEPH_FEATURE_CRUSH_TUNABLES3) + CEPH_FEATURE_CRUSH_TUNABLES3 | \ + CEPH_FEATURE_OSD_PRIMARY_AFFINITY) #define CEPH_FEATURES_REQUIRED_DEFAULT \ (CEPH_FEATURE_NOSRCADDR | \ -- cgit v1.2.3 From 7f4b04614a273089ad65654f53771c033fadc65e Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Fri, 28 Mar 2014 19:11:47 +0530 Subject: cpufreq: create another field .flags in cpufreq_frequency_table Currently cpufreq frequency table has two fields: frequency and driver_data. driver_data is only for drivers' internal use and cpufreq core shouldn't use it at all. But with the introduction of BOOST frequencies, this assumption was broken and we started using it as a flag instead. There are two problems due to this: - It is against the description of this field, as driver's data is used by the core now. - if drivers fill it with -3 for any frequency, then those frequencies are never considered by cpufreq core as it is exactly same as value of CPUFREQ_BOOST_FREQ, i.e. ~2. The best way to get this fixed is by creating another field flags which will be used for such flags. This patch does that. Along with that various drivers need modifications due to the change of struct cpufreq_frequency_table. Reviewed-by: Gautham R Shenoy Signed-off-by: Viresh Kumar Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index c48e595f623e..5ae5100c1f24 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -455,11 +455,14 @@ extern struct cpufreq_governor cpufreq_gov_conservative; * FREQUENCY TABLE HELPERS * *********************************************************************/ -#define CPUFREQ_ENTRY_INVALID ~0 -#define CPUFREQ_TABLE_END ~1 -#define CPUFREQ_BOOST_FREQ ~2 +/* Special Values of .frequency field */ +#define CPUFREQ_ENTRY_INVALID ~0 +#define CPUFREQ_TABLE_END ~1 +/* Special Values of .flags field */ +#define CPUFREQ_BOOST_FREQ (1 << 0) struct cpufreq_frequency_table { + unsigned int flags; unsigned int driver_data; /* driver specific data, not used by core */ unsigned int frequency; /* kHz - doesn't need to be in ascending * order */ -- cgit v1.2.3 From 403c63cb6d7ec2caca2f9222ff843ac89d7d700a Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Mon, 29 Jul 2013 16:31:18 -0700 Subject: NTB: client event cleanup Provide a better event interface between the client and transport Signed-off-by: Jon Mason --- include/linux/ntb.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index f6a15205853b..cbc792cd745e 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -54,6 +54,11 @@ struct ntb_client { void (*remove) (struct pci_dev *pdev); }; +enum { + NTB_LINK_DOWN = 0, + NTB_LINK_UP, +}; + int ntb_register_client(struct ntb_client *drvr); void ntb_unregister_client(struct ntb_client *drvr); int ntb_register_client_dev(char *device_name); -- cgit v1.2.3 From 53ca4fea0bbe966b3123509125898b286a136f47 Mon Sep 17 00:00:00 2001 From: Jon Mason Date: Tue, 26 Nov 2013 11:21:50 -0700 Subject: NTB: Code Style Clean-up Some white space and 80 char overruns corrected. Signed-off-by: Jon Mason --- include/linux/ntb.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ntb.h b/include/linux/ntb.h index cbc792cd745e..9ac1a62fc6f5 100644 --- a/include/linux/ntb.h +++ b/include/linux/ntb.h @@ -50,8 +50,8 @@ struct ntb_transport_qp; struct ntb_client { struct device_driver driver; - int (*probe) (struct pci_dev *pdev); - void (*remove) (struct pci_dev *pdev); + int (*probe)(struct pci_dev *pdev); + void (*remove)(struct pci_dev *pdev); }; enum { @@ -65,11 +65,11 @@ int ntb_register_client_dev(char *device_name); void ntb_unregister_client_dev(char *device_name); struct ntb_queue_handlers { - void (*rx_handler) (struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*tx_handler) (struct ntb_transport_qp *qp, void *qp_data, - void *data, int len); - void (*event_handler) (void *data, int status); + void (*rx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*tx_handler)(struct ntb_transport_qp *qp, void *qp_data, + void *data, int len); + void (*event_handler)(void *data, int status); }; unsigned char ntb_transport_qp_num(struct ntb_transport_qp *qp); -- cgit v1.2.3 From b8780c363d808a726a34793caa900923d32b6b80 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Mon, 7 Apr 2014 17:33:06 +0200 Subject: sched: remove sleep_on() and friends This is the final piece in the puzzle, as all patches to remove the last users of \(interruptible_\|\)sleep_on\(_timeout\|\) have made it into the 3.15 merge window. The work was long overdue, and this interface in particular should not have survived the BKL removal that was done a couple of years ago. Citing Jon Corbet from http://lwn.net/2001/0201/kernel.php3": "[...] it was suggested that the janitors look for and fix all code that calls sleep_on() [...] since (1) almost all such code is incorrect, and (2) Linus has agreed that those functions should be removed in the 2.5 development series". We haven't quite made it for 2.5, but maybe we can merge this for 3.15. Signed-off-by: Arnd Bergmann Cc: Peter Zijlstra Cc: Ingo Molnar Signed-off-by: Linus Torvalds --- include/linux/wait.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/wait.h b/include/linux/wait.h index 559044c79232..e7d9d9ed14f5 100644 --- a/include/linux/wait.h +++ b/include/linux/wait.h @@ -803,17 +803,6 @@ do { \ __ret; \ }) - -/* - * These are the old interfaces to sleep waiting for an event. - * They are racy. DO NOT use them, use the wait_event* interfaces above. - * We plan to remove these interfaces. - */ -extern void sleep_on(wait_queue_head_t *q); -extern long sleep_on_timeout(wait_queue_head_t *q, signed long timeout); -extern void interruptible_sleep_on(wait_queue_head_t *q); -extern long interruptible_sleep_on_timeout(wait_queue_head_t *q, signed long timeout); - /* * Waitqueues which are removed from the waitqueue_head at wakeup time */ -- cgit v1.2.3 From a0715cc22601e8830ace98366c0c2bd8da52af52 Mon Sep 17 00:00:00 2001 From: Alex Thorlton Date: Mon, 7 Apr 2014 15:37:10 -0700 Subject: mm, thp: add VM_INIT_DEF_MASK and PRCTL_THP_DISABLE Add VM_INIT_DEF_MASK, to allow us to set the default flags for VMs. It also adds a prctl control which allows us to set the THP disable bit in mm->def_flags so that VMs will pick up the setting as they are created. Signed-off-by: Alex Thorlton Suggested-by: Oleg Nesterov Cc: Gerald Schaefer Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Christian Borntraeger Cc: Paolo Bonzini Cc: "Kirill A. Shutemov" Cc: Mel Gorman Acked-by: Rik van Riel Cc: Ingo Molnar Cc: Peter Zijlstra Cc: Andrea Arcangeli Cc: Oleg Nesterov Cc: "Eric W. Biederman" Cc: Alexander Viro Cc: Johannes Weiner Cc: David Rientjes Cc: Paolo Bonzini Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 35300f390eb6..c270fa68a32b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -177,6 +177,9 @@ extern unsigned int kobjsize(const void *objp); */ #define VM_SPECIAL (VM_IO | VM_DONTEXPAND | VM_PFNMAP | VM_MIXEDMAP) +/* This mask defines which mm->def_flags a process can inherit its parent */ +#define VM_INIT_DEF_MASK VM_NOHUGEPAGE + /* * mapping from the currently active vm_flags protection bits (the * low four bits) to a page protection mask.. -- cgit v1.2.3 From 8c6e50b0290c4c708a3e6462729e1e9151a9a7df Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 7 Apr 2014 15:37:18 -0700 Subject: mm: introduce vm_ops->map_pages() Here's new version of faultaround patchset. It took a while to tune it and collect performance data. First patch adds new callback ->map_pages to vm_operations_struct. ->map_pages() is called when VM asks to map easy accessible pages. Filesystem should find and map pages associated with offsets from "pgoff" till "max_pgoff". ->map_pages() is called with page table locked and must not block. If it's not possible to reach a page without blocking, filesystem should skip it. Filesystem should use do_set_pte() to setup page table entry. Pointer to entry associated with offset "pgoff" is passed in "pte" field in vm_fault structure. Pointers to entries for other offsets should be calculated relative to "pte". Currently VM use ->map_pages only on read page fault path. We try to map FAULT_AROUND_PAGES a time. FAULT_AROUND_PAGES is 16 for now. Performance data for different FAULT_AROUND_ORDER is below. TODO: - implement ->map_pages() for shmem/tmpfs; - modify get_user_pages() to be able to use ->map_pages() and implement mmap(MAP_POPULATE|MAP_NONBLOCK) on top. ========================================================================= Tested on 4-socket machine (120 threads) with 128GiB of RAM. Few real-world workloads. The sweet spot for FAULT_AROUND_ORDER here is somewhere between 3 and 5. Let's say 4 :) Linux build (make -j60) FAULT_AROUND_ORDER Baseline 1 3 4 5 7 9 minor-faults 283,301,572 247,151,987 212,215,789 204,772,882 199,568,944 194,703,779 193,381,485 time, seconds 151.227629483 153.920996480 151.356125472 150.863792049 150.879207877 151.150764954 151.450962358 Linux rebuild (make -j60) FAULT_AROUND_ORDER Baseline 1 3 4 5 7 9 minor-faults 5,396,854 4,148,444 2,855,286 2,577,282 2,361,957 2,169,573 2,112,643 time, seconds 27.404543757 27.559725591 27.030057426 26.855045126 26.678618635 26.974523490 26.761320095 Git test suite (make -j60 test) FAULT_AROUND_ORDER Baseline 1 3 4 5 7 9 minor-faults 129,591,823 99,200,751 66,106,718 57,606,410 51,510,808 45,776,813 44,085,515 time, seconds 66.087215026 64.784546905 64.401156567 65.282708668 66.034016829 66.793780811 67.237810413 Two synthetic tests: access every word in file in sequential/random order. It doesn't improve much after FAULT_AROUND_ORDER == 4. Sequential access 16GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 7 9 1 thread minor-faults 4,195,437 2,098,275 525,068 262,251 131,170 32,856 8,282 time, seconds 7.250461742 6.461711074 5.493859139 5.488488147 5.707213983 5.898510832 5.109232856 8 threads minor-faults 33,557,540 16,892,728 4,515,848 2,366,999 1,423,382 442,732 142,339 time, seconds 16.649304881 9.312555263 6.612490639 6.394316732 6.669827501 6.75078944 6.371900528 32 threads minor-faults 134,228,222 67,526,810 17,725,386 9,716,537 4,763,731 1,668,921 537,200 time, seconds 49.164430543 29.712060103 12.938649729 10.175151004 11.840094583 9.594081325 9.928461797 60 threads minor-faults 251,687,988 126,146,952 32,919,406 18,208,804 10,458,947 2,733,907 928,217 time, seconds 86.260656897 49.626551828 22.335007632 17.608243696 16.523119035 16.339489186 16.326390902 120 threads minor-faults 503,352,863 252,939,677 67,039,168 35,191,827 19,170,091 4,688,357 1,471,862 time, seconds 124.589206333 79.757867787 39.508707872 32.167281632 29.972989292 28.729834575 28.042251622 Random access 1GiB file 1 thread minor-faults 262,636 132,743 34,369 17,299 8,527 3,451 1,222 time, seconds 15.351890914 16.613802482 16.569227308 15.179220992 16.557356122 16.578247824 15.365266994 8 threads minor-faults 2,098,948 1,061,871 273,690 154,501 87,110 25,663 7,384 time, seconds 15.040026343 15.096933500 14.474757288 14.289129964 14.411537468 14.296316837 14.395635804 32 threads minor-faults 8,390,734 4,231,023 1,054,432 528,847 269,242 97,746 26,881 time, seconds 20.430433109 21.585235358 22.115062928 14.872878951 14.880856305 14.883370649 14.821261690 60 threads minor-faults 15,733,258 7,892,809 1,973,393 988,266 594,789 164,994 51,691 time, seconds 26.577302548 25.692397770 18.728863715 20.153026398 21.619101933 17.745086260 17.613215273 120 threads minor-faults 31,471,111 15,816,616 3,959,209 1,978,685 1,008,299 264,635 96,010 time, seconds 41.835322703 40.459786095 36.085306105 35.313894834 35.814445675 36.552633793 34.289210594 Touch only one page in page table in 16GiB file FAULT_AROUND_ORDER Baseline 1 3 4 5 7 9 1 thread minor-faults 8,372 8,324 8,270 8,260 8,249 8,239 8,237 time, seconds 0.039892712 0.045369149 0.051846126 0.063681685 0.079095975 0.17652406 0.541213386 8 threads minor-faults 65,731 65,681 65,628 65,620 65,608 65,599 65,596 time, seconds 0.124159196 0.488600638 0.156854426 0.191901957 0.242631486 0.543569456 1.677303984 32 threads minor-faults 262,388 262,341 262,285 262,276 262,266 262,257 263,183 time, seconds 0.452421421 0.488600638 0.565020946 0.648229739 0.789850823 1.651584361 5.000361559 60 threads minor-faults 491,822 491,792 491,723 491,711 491,701 491,691 491,825 time, seconds 0.763288616 0.869620515 0.980727360 1.161732354 1.466915814 3.04041448 9.308612938 120 threads minor-faults 983,466 983,655 983,366 983,372 983,363 984,083 984,164 time, seconds 1.595846553 1.667902182 2.008959376 2.425380942 2.941368804 5.977807890 18.401846125 This patch (of 2): Introduce new vm_ops callback ->map_pages() and uses it for mapping easy accessible pages around fault address. On read page fault, if filesystem provides ->map_pages(), we try to map up to FAULT_AROUND_PAGES pages around page fault address in hope to reduce number of minor page faults. We call ->map_pages first and use ->fault() as fallback if page by the offset is not ready to be mapped (cold page cache or something). Signed-off-by: Kirill A. Shutemov Acked-by: Linus Torvalds Cc: Mel Gorman Cc: Rik van Riel Cc: Andi Kleen Cc: Matthew Wilcox Cc: Dave Hansen Cc: Alexander Viro Cc: Dave Chinner Cc: Ning Qu Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index c270fa68a32b..f710d32291e8 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -213,6 +213,10 @@ struct vm_fault { * is set (which is also implied by * VM_FAULT_ERROR). */ + /* for ->map_pages() only */ + pgoff_t max_pgoff; /* map pages for offset from pgoff till + * max_pgoff inclusive */ + pte_t *pte; /* pte entry associated with ->pgoff */ }; /* @@ -224,6 +228,7 @@ struct vm_operations_struct { void (*open)(struct vm_area_struct * area); void (*close)(struct vm_area_struct * area); int (*fault)(struct vm_area_struct *vma, struct vm_fault *vmf); + void (*map_pages)(struct vm_area_struct *vma, struct vm_fault *vmf); /* notification that a previously read-only page is about to become * writable, if an error is returned it will cause a SIGBUS */ @@ -584,6 +589,9 @@ static inline pte_t maybe_mkwrite(pte_t pte, struct vm_area_struct *vma) pte = pte_mkwrite(pte); return pte; } + +void do_set_pte(struct vm_area_struct *vma, unsigned long address, + struct page *page, pte_t *pte, bool write, bool anon); #endif /* -- cgit v1.2.3 From f1820361f83d556a7f0a9f629100f3825e594328 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 7 Apr 2014 15:37:19 -0700 Subject: mm: implement ->map_pages for page cache filemap_map_pages() is generic implementation of ->map_pages() for filesystems who uses page cache. It should be safe to use filemap_map_pages() for ->map_pages() if filesystem use filemap_fault() for ->fault(). Signed-off-by: Kirill A. Shutemov Acked-by: Linus Torvalds Cc: Mel Gorman Cc: Rik van Riel Cc: Andi Kleen Cc: Matthew Wilcox Cc: Dave Hansen Cc: Alexander Viro Cc: Dave Chinner Cc: Ning Qu Cc: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index f710d32291e8..9132faed1a41 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1847,6 +1847,7 @@ extern void truncate_inode_pages_final(struct address_space *); /* generic vm_area_ops exported for stackable file systems */ extern int filemap_fault(struct vm_area_struct *, struct vm_fault *); +extern void filemap_map_pages(struct vm_area_struct *vma, struct vm_fault *vmf); extern int filemap_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf); /* mm/page-writeback.c */ -- cgit v1.2.3 From 615d6e8756c87149f2d4c1b93d471bca002bd849 Mon Sep 17 00:00:00 2001 From: Davidlohr Bueso Date: Mon, 7 Apr 2014 15:37:25 -0700 Subject: mm: per-thread vma caching This patch is a continuation of efforts trying to optimize find_vma(), avoiding potentially expensive rbtree walks to locate a vma upon faults. The original approach (https://lkml.org/lkml/2013/11/1/410), where the largest vma was also cached, ended up being too specific and random, thus further comparison with other approaches were needed. There are two things to consider when dealing with this, the cache hit rate and the latency of find_vma(). Improving the hit-rate does not necessarily translate in finding the vma any faster, as the overhead of any fancy caching schemes can be too high to consider. We currently cache the last used vma for the whole address space, which provides a nice optimization, reducing the total cycles in find_vma() by up to 250%, for workloads with good locality. On the other hand, this simple scheme is pretty much useless for workloads with poor locality. Analyzing ebizzy runs shows that, no matter how many threads are running, the mmap_cache hit rate is less than 2%, and in many situations below 1%. The proposed approach is to replace this scheme with a small per-thread cache, maximizing hit rates at a very low maintenance cost. Invalidations are performed by simply bumping up a 32-bit sequence number. The only expensive operation is in the rare case of a seq number overflow, where all caches that share the same address space are flushed. Upon a miss, the proposed replacement policy is based on the page number that contains the virtual address in question. Concretely, the following results are seen on an 80 core, 8 socket x86-64 box: 1) System bootup: Most programs are single threaded, so the per-thread scheme does improve ~50% hit rate by just adding a few more slots to the cache. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 50.61% | 19.90 | | patched | 73.45% | 13.58 | +----------------+----------+------------------+ 2) Kernel build: This one is already pretty good with the current approach as we're dealing with good locality. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 75.28% | 11.03 | | patched | 88.09% | 9.31 | +----------------+----------+------------------+ 3) Oracle 11g Data Mining (4k pages): Similar to the kernel build workload. +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 70.66% | 17.14 | | patched | 91.15% | 12.57 | +----------------+----------+------------------+ 4) Ebizzy: There's a fair amount of variation from run to run, but this approach always shows nearly perfect hit rates, while baseline is just about non-existent. The amounts of cycles can fluctuate between anywhere from ~60 to ~116 for the baseline scheme, but this approach reduces it considerably. For instance, with 80 threads: +----------------+----------+------------------+ | caching scheme | hit-rate | cycles (billion) | +----------------+----------+------------------+ | baseline | 1.06% | 91.54 | | patched | 99.97% | 14.18 | +----------------+----------+------------------+ [akpm@linux-foundation.org: fix nommu build, per Davidlohr] [akpm@linux-foundation.org: document vmacache_valid() logic] [akpm@linux-foundation.org: attempt to untangle header files] [akpm@linux-foundation.org: add vmacache_find() BUG_ON] [hughd@google.com: add vmacache_valid_mm() (from Oleg)] [akpm@linux-foundation.org: coding-style fixes] [akpm@linux-foundation.org: adjust and enhance comments] Signed-off-by: Davidlohr Bueso Reviewed-by: Rik van Riel Acked-by: Linus Torvalds Reviewed-by: Michel Lespinasse Cc: Oleg Nesterov Tested-by: Hugh Dickins Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm_types.h | 4 ++-- include/linux/sched.h | 7 +++++++ include/linux/vmacache.h | 38 ++++++++++++++++++++++++++++++++++++++ 3 files changed, 47 insertions(+), 2 deletions(-) create mode 100644 include/linux/vmacache.h (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 290901a8c1de..2b58d192ea24 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -342,9 +342,9 @@ struct mm_rss_stat { struct kioctx_table; struct mm_struct { - struct vm_area_struct * mmap; /* list of VMAs */ + struct vm_area_struct *mmap; /* list of VMAs */ struct rb_root mm_rb; - struct vm_area_struct * mmap_cache; /* last find_vma result */ + u32 vmacache_seqnum; /* per-thread vmacache */ #ifdef CONFIG_MMU unsigned long (*get_unmapped_area) (struct file *filp, unsigned long addr, unsigned long len, diff --git a/include/linux/sched.h b/include/linux/sched.h index 7cb07fd26680..642477dd814a 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -132,6 +132,10 @@ struct perf_event_context; struct blk_plug; struct filename; +#define VMACACHE_BITS 2 +#define VMACACHE_SIZE (1U << VMACACHE_BITS) +#define VMACACHE_MASK (VMACACHE_SIZE - 1) + /* * List of flags we want to share for kernel threads, * if only because they are not used by them anyway. @@ -1235,6 +1239,9 @@ struct task_struct { #ifdef CONFIG_COMPAT_BRK unsigned brk_randomized:1; #endif + /* per-thread vma caching */ + u32 vmacache_seqnum; + struct vm_area_struct *vmacache[VMACACHE_SIZE]; #if defined(SPLIT_RSS_COUNTING) struct task_rss_stat rss_stat; #endif diff --git a/include/linux/vmacache.h b/include/linux/vmacache.h new file mode 100644 index 000000000000..c3fa0fd43949 --- /dev/null +++ b/include/linux/vmacache.h @@ -0,0 +1,38 @@ +#ifndef __LINUX_VMACACHE_H +#define __LINUX_VMACACHE_H + +#include +#include + +/* + * Hash based on the page number. Provides a good hit rate for + * workloads with good locality and those with random accesses as well. + */ +#define VMACACHE_HASH(addr) ((addr >> PAGE_SHIFT) & VMACACHE_MASK) + +static inline void vmacache_flush(struct task_struct *tsk) +{ + memset(tsk->vmacache, 0, sizeof(tsk->vmacache)); +} + +extern void vmacache_flush_all(struct mm_struct *mm); +extern void vmacache_update(unsigned long addr, struct vm_area_struct *newvma); +extern struct vm_area_struct *vmacache_find(struct mm_struct *mm, + unsigned long addr); + +#ifndef CONFIG_MMU +extern struct vm_area_struct *vmacache_find_exact(struct mm_struct *mm, + unsigned long start, + unsigned long end); +#endif + +static inline void vmacache_invalidate(struct mm_struct *mm) +{ + mm->vmacache_seqnum++; + + /* deal with overflows */ + if (unlikely(mm->vmacache_seqnum == 0)) + vmacache_flush_all(mm); +} + +#endif /* __LINUX_VMACACHE_H */ -- cgit v1.2.3 From 2a389610a7331d22344698f23ef2e8c55b2cde7b Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 7 Apr 2014 15:37:29 -0700 Subject: mm, mempolicy: rename slab_node for clarity slab_node() is actually a mempolicy function, so rename it to mempolicy_slab_node() to make it clearer that it used for processes with mempolicies. At the same time, cleanup its code by saving numa_mem_id() in a local variable (since we require a node with memory, not just any node) and remove an obsolete comment that assumes the mempolicy is actually passed into the function. Signed-off-by: David Rientjes Acked-by: Christoph Lameter Cc: Johannes Weiner Cc: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Christoph Lameter Cc: Pekka Enberg Cc: Tejun Heo Cc: Mel Gorman Cc: Oleg Nesterov Cc: Rik van Riel Cc: Jianguo Wu Cc: Tim Hockin Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index 5f1ea756aace..cfe55dfca015 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -151,7 +151,7 @@ extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, extern bool init_nodemask_of_mempolicy(nodemask_t *mask); extern bool mempolicy_nodemask_intersects(struct task_struct *tsk, const nodemask_t *mask); -extern unsigned slab_node(void); +extern unsigned int mempolicy_slab_node(void); extern enum zone_type policy_zone; -- cgit v1.2.3 From f0432d159601f96839f514f286eaa5b75c4112dc Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 7 Apr 2014 15:37:30 -0700 Subject: mm, mempolicy: remove per-process flag PF_MEMPOLICY is an unnecessary optimization for CONFIG_SLAB users. There's no significant performance degradation to checking current->mempolicy rather than current->flags & PF_MEMPOLICY in the allocation path, especially since this is considered unlikely(). Running TCP_RR with netperf-2.4.5 through localhost on 16 cpu machine with 64GB of memory and without a mempolicy: threads before after 16 1249409 1244487 32 1281786 1246783 48 1239175 1239138 64 1244642 1241841 80 1244346 1248918 96 1266436 1254316 112 1307398 1312135 128 1327607 1326502 Per-process flags are a scarce resource so we should free them up whenever possible and make them available. We'll be using it shortly for memcg oom reserves. Signed-off-by: David Rientjes Cc: Johannes Weiner Cc: Michal Hocko Cc: KAMEZAWA Hiroyuki Cc: Christoph Lameter Cc: Pekka Enberg Cc: Tejun Heo Cc: Mel Gorman Cc: Oleg Nesterov Cc: Rik van Riel Cc: Jianguo Wu Cc: Tim Hockin Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mempolicy.h | 1 - include/linux/sched.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mempolicy.h b/include/linux/mempolicy.h index cfe55dfca015..3c1b968da0ca 100644 --- a/include/linux/mempolicy.h +++ b/include/linux/mempolicy.h @@ -143,7 +143,6 @@ extern void numa_policy_init(void); extern void mpol_rebind_task(struct task_struct *tsk, const nodemask_t *new, enum mpol_rebind_step step); extern void mpol_rebind_mm(struct mm_struct *mm, nodemask_t *new); -extern void mpol_fix_fork_child_flag(struct task_struct *p); extern struct zonelist *huge_zonelist(struct vm_area_struct *vma, unsigned long addr, gfp_t gfp_flags, diff --git a/include/linux/sched.h b/include/linux/sched.h index 642477dd814a..6c70645eb3b6 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1851,7 +1851,6 @@ extern void thread_group_cputime_adjusted(struct task_struct *p, cputime_t *ut, #define PF_SPREAD_SLAB 0x02000000 /* Spread some slab caches over cpuset */ #define PF_NO_SETAFFINITY 0x04000000 /* Userland is not allowed to meddle with cpus_allowed */ #define PF_MCE_EARLY 0x08000000 /* Early kill for mce process policy */ -#define PF_MEMPOLICY 0x10000000 /* Non-default NUMA mempolicy */ #define PF_MUTEX_TESTER 0x20000000 /* Thread belongs to the rt mutex tester */ #define PF_FREEZER_SKIP 0x40000000 /* Freezer should not count it as freezable */ #define PF_SUSPEND_TASK 0x80000000 /* this thread called freeze_processes and should not be frozen */ -- cgit v1.2.3 From 539a13b47e462d28c48f076c63871580f694a366 Mon Sep 17 00:00:00 2001 From: David Rientjes Date: Mon, 7 Apr 2014 15:37:32 -0700 Subject: res_counter: remove interface for locked charging and uncharging The res_counter_{charge,uncharge}_locked() variants are not used in the kernel outside of the resource counter code itself, so remove the interface. Signed-off-by: David Rientjes Acked-by: Michal Hocko Cc: Johannes Weiner Cc: KAMEZAWA Hiroyuki Cc: Christoph Lameter Cc: Pekka Enberg Cc: Tejun Heo Cc: Mel Gorman Cc: Oleg Nesterov Cc: Rik van Riel Cc: Jianguo Wu Cc: Tim Hockin Cc: Christoph Lameter Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/res_counter.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/res_counter.h b/include/linux/res_counter.h index 201a69749659..56b7bc32db4f 100644 --- a/include/linux/res_counter.h +++ b/include/linux/res_counter.h @@ -104,15 +104,13 @@ void res_counter_init(struct res_counter *counter, struct res_counter *parent); * units, e.g. numbers, bytes, Kbytes, etc * * returns 0 on success and <0 if the counter->usage will exceed the - * counter->limit _locked call expects the counter->lock to be taken + * counter->limit * * charge_nofail works the same, except that it charges the resource * counter unconditionally, and returns < 0 if the after the current * charge we are over limit. */ -int __must_check res_counter_charge_locked(struct res_counter *counter, - unsigned long val, bool force); int __must_check res_counter_charge(struct res_counter *counter, unsigned long val, struct res_counter **limit_fail_at); int res_counter_charge_nofail(struct res_counter *counter, @@ -125,12 +123,10 @@ int res_counter_charge_nofail(struct res_counter *counter, * @val: the amount of the resource * * these calls check for usage underflow and show a warning on the console - * _locked call expects the counter->lock to be taken * * returns the total charges still present in @counter. */ -u64 res_counter_uncharge_locked(struct res_counter *counter, unsigned long val); u64 res_counter_uncharge(struct res_counter *counter, unsigned long val); u64 res_counter_uncharge_until(struct res_counter *counter, -- cgit v1.2.3 From d230dec18dc9a581f153c14cb5371640cd62543b Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Mon, 7 Apr 2014 15:37:38 -0700 Subject: mm: use 'const char *' insted of 'char *' for reason in dump_page() I tried to use 'dump_page(page, __func__)' for debugging, but it triggers warning: warning: passing argument 2 of `dump_page' discards `const' qualifier from pointer target type [enabled by default] Let's convert 'reason' to 'const char *' in dump_page() and friends: we shouldn't modify it anyway. Signed-off-by: Kirill A. Shutemov Cc: Dave Hansen Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mmdebug.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mmdebug.h b/include/linux/mmdebug.h index 5042c036dda9..2d57efa64cc1 100644 --- a/include/linux/mmdebug.h +++ b/include/linux/mmdebug.h @@ -3,8 +3,8 @@ struct page; -extern void dump_page(struct page *page, char *reason); -extern void dump_page_badflags(struct page *page, char *reason, +extern void dump_page(struct page *page, const char *reason); +extern void dump_page_badflags(struct page *page, const char *reason, unsigned long badflags); #ifdef CONFIG_DEBUG_VM -- cgit v1.2.3 From df381975463996178d685f6ef7d3555c5f887201 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Mon, 7 Apr 2014 15:37:43 -0700 Subject: memcg: get_mem_cgroup_from_mm() Instead of returning NULL from try_get_mem_cgroup_from_mm() when the mm owner is exiting, just return root_mem_cgroup. This makes sense for all callsites and gets rid of some of them having to fallback manually. [fengguang.wu@intel.com: fix warnings] Signed-off-by: Johannes Weiner Signed-off-by: Fengguang Wu Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index eccfb4a4b379..134636f835f7 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -94,7 +94,6 @@ bool task_in_mem_cgroup(struct task_struct *task, extern struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page); extern struct mem_cgroup *mem_cgroup_from_task(struct task_struct *p); -extern struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm); extern struct mem_cgroup *parent_mem_cgroup(struct mem_cgroup *memcg); extern struct mem_cgroup *mem_cgroup_from_css(struct cgroup_subsys_state *css); @@ -294,11 +293,6 @@ static inline struct mem_cgroup *try_get_mem_cgroup_from_page(struct page *page) return NULL; } -static inline struct mem_cgroup *try_get_mem_cgroup_from_mm(struct mm_struct *mm) -{ - return NULL; -} - static inline bool mm_match_cgroup(struct mm_struct *mm, struct mem_cgroup *memcg) { -- cgit v1.2.3 From d715ae08f2ff87508a081c4df78061bf4f7211d6 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Mon, 7 Apr 2014 15:37:46 -0700 Subject: memcg: rename high level charging functions mem_cgroup_newpage_charge is used only for charging anonymous memory so it is better to rename it to mem_cgroup_charge_anon. mem_cgroup_cache_charge is used for file backed memory so rename it to mem_cgroup_charge_file. Signed-off-by: Michal Hocko Cc: Johannes Weiner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 134636f835f7..96f3fc87ab96 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -65,7 +65,7 @@ struct mem_cgroup_reclaim_cookie { * (Of course, if memcg does memory allocation in future, GFP_KERNEL is sane.) */ -extern int mem_cgroup_newpage_charge(struct page *page, struct mm_struct *mm, +extern int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); /* for swap handling */ extern int mem_cgroup_try_charge_swapin(struct mm_struct *mm, @@ -74,7 +74,7 @@ extern void mem_cgroup_commit_charge_swapin(struct page *page, struct mem_cgroup *memcg); extern void mem_cgroup_cancel_charge_swapin(struct mem_cgroup *memcg); -extern int mem_cgroup_cache_charge(struct page *page, struct mm_struct *mm, +extern int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, gfp_t gfp_mask); struct lruvec *mem_cgroup_zone_lruvec(struct zone *, struct mem_cgroup *); @@ -233,13 +233,13 @@ void mem_cgroup_print_bad_page(struct page *page); #else /* CONFIG_MEMCG */ struct mem_cgroup; -static inline int mem_cgroup_newpage_charge(struct page *page, +static inline int mem_cgroup_charge_anon(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; } -static inline int mem_cgroup_cache_charge(struct page *page, +static inline int mem_cgroup_charge_file(struct page *page, struct mm_struct *mm, gfp_t gfp_mask) { return 0; -- cgit v1.2.3 From ed6d7c8e578331cad594ee70d60e2e146b5dce7b Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Mon, 7 Apr 2014 15:37:51 -0700 Subject: mm: remove unused arg of set_page_dirty_balance() There's only one caller of set_page_dirty_balance() and that will call it with page_mkwrite == 0. The page_mkwrite argument was unused since commit b827e496c893 "mm: close page_mkwrite races". Signed-off-by: Miklos Szeredi Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/writeback.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 021b8a319b9e..5777c13849ba 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -178,7 +178,7 @@ int write_cache_pages(struct address_space *mapping, struct writeback_control *wbc, writepage_t writepage, void *data); int do_writepages(struct address_space *mapping, struct writeback_control *wbc); -void set_page_dirty_balance(struct page *page, int page_mkwrite); +void set_page_dirty_balance(struct page *page); void writeback_set_ratelimit(void); void tag_pages_for_writeback(struct address_space *mapping, pgoff_t start, pgoff_t end); -- cgit v1.2.3 From 29f175d125f0f3a9503af8a5596f93d714cceb08 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Mon, 7 Apr 2014 15:37:55 -0700 Subject: mm/readahead.c: inline ra_submit Commit f9acc8c7b35a ("readahead: sanify file_ra_state names") left ra_submit with a single function call. Move ra_submit to internal.h and inline it to save some stack. Thanks to Andrew Morton for commenting different versions. Signed-off-by: Fabian Frederick Suggested-by: Andrew Morton Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 9132faed1a41..6edcea720ddd 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1875,9 +1875,6 @@ void page_cache_async_readahead(struct address_space *mapping, unsigned long size); unsigned long max_sane_readahead(unsigned long nr); -unsigned long ra_submit(struct file_ra_state *ra, - struct address_space *mapping, - struct file *filp); /* Generic expand stack which grows the stack according to GROWS{UP,DOWN} */ extern int expand_stack(struct vm_area_struct *vma, unsigned long address); -- cgit v1.2.3 From 834a964a098e7726fc296d7cd8f65ed3eeedd412 Mon Sep 17 00:00:00 2001 From: Srikar Dronamraju Date: Mon, 7 Apr 2014 15:37:57 -0700 Subject: numa: use LAST_CPUPID_SHIFT to calculate LAST_CPUPID_MASK LAST_CPUPID_MASK is calculated using LAST_CPUPID_WIDTH. However LAST_CPUPID_WIDTH itself can be 0. (when LAST_CPUPID_NOT_IN_PAGE_FLAGS is set). In such a case LAST_CPUPID_MASK turns out to be 0. But with recent commit 1ae71d0319: (mm: numa: bugfix for LAST_CPUPID_NOT_IN_PAGE_FLAGS) if LAST_CPUPID_MASK is 0, page_cpupid_xchg_last() and page_cpupid_reset_last() causes page->_last_cpupid to be set to 0. This causes performance regression. Its almost as if numa_balancing is off. Fix LAST_CPUPID_MASK by using LAST_CPUPID_SHIFT instead of LAST_CPUPID_WIDTH. Some performance numbers and perf stats with and without the fix. (3.14-rc6) ---------- numa01 Performance counter stats for '/usr/bin/time -f %e %S %U %c %w -o start_bench.out -a ./numa01': 12,27,462 cs [100.00%] 2,41,957 migrations [100.00%] 1,68,01,713 faults [100.00%] 7,99,35,29,041 cache-misses 98,808 migrate:mm_migrate_pages [100.00%] 1407.690148814 seconds time elapsed numa02 Performance counter stats for '/usr/bin/time -f %e %S %U %c %w -o start_bench.out -a ./numa02': 63,065 cs [100.00%] 14,364 migrations [100.00%] 2,08,118 faults [100.00%] 25,32,59,404 cache-misses 12 migrate:mm_migrate_pages [100.00%] 63.840827219 seconds time elapsed (3.14-rc6 with fix) ------------------- numa01 Performance counter stats for '/usr/bin/time -f %e %S %U %c %w -o start_bench.out -a ./numa01': 9,68,911 cs [100.00%] 1,01,414 migrations [100.00%] 88,38,697 faults [100.00%] 4,42,92,51,042 cache-misses 4,25,060 migrate:mm_migrate_pages [100.00%] 685.965331189 seconds time elapsed numa02 Performance counter stats for '/usr/bin/time -f %e %S %U %c %w -o start_bench.out -a ./numa02': 17,543 cs [100.00%] 2,962 migrations [100.00%] 1,17,843 faults [100.00%] 11,80,61,644 cache-misses 12,358 migrate:mm_migrate_pages [100.00%] 20.380132343 seconds time elapsed Signed-off-by: Srikar Dronamraju Cc: Liu Ping Fan Reviewed-by: Aneesh Kumar K.V Cc: Peter Zijlstra Cc: Benjamin Herrenschmidt Cc: Paul Mackerras Cc: Mel Gorman Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 6edcea720ddd..abc848412e3c 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -695,7 +695,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, #define ZONES_MASK ((1UL << ZONES_WIDTH) - 1) #define NODES_MASK ((1UL << NODES_WIDTH) - 1) #define SECTIONS_MASK ((1UL << SECTIONS_WIDTH) - 1) -#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_WIDTH) - 1) +#define LAST_CPUPID_MASK ((1UL << LAST_CPUPID_SHIFT) - 1) #define ZONEID_MASK ((1UL << ZONEID_SHIFT) - 1) static inline enum zone_type page_zonenum(const struct page *page) -- cgit v1.2.3 From 23aebe1691a3d98a79676db6c0fd813e16478804 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 7 Apr 2014 15:38:39 -0700 Subject: exec: kill bprm->tcomm[], simplify the "basename" logic Starting from commit c4ad8f98bef7 ("execve: use 'struct filename *' for executable name passing") bprm->filename can not go away after flush_old_exec(), so we do not need to save the binary name in bprm->tcomm[] added by 96e02d158678 ("exec: fix use-after-free bug in setup_new_exec()"). And there was never need for filename_to_taskname-like code, we can simply do set_task_comm(kbasename(filename). This patch has to change set_task_comm() and trace_task_rename() to accept "const char *", but I think this change is also good. Signed-off-by: Oleg Nesterov Cc: Heiko Carstens Cc: Steven Rostedt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 1 - include/linux/sched.h | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index b4a745d7d9a9..61f29e5ea840 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -44,7 +44,6 @@ struct linux_binprm { unsigned interp_flags; unsigned interp_data; unsigned long loader, exec; - char tcomm[TASK_COMM_LEN]; }; #define BINPRM_FLAGS_ENFORCE_NONDUMP_BIT 0 diff --git a/include/linux/sched.h b/include/linux/sched.h index 6c70645eb3b6..f8497059f88c 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -2357,7 +2357,7 @@ extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, i struct task_struct *fork_idle(int); extern pid_t kernel_thread(int (*fn)(void *), void *arg, unsigned long flags); -extern void set_task_comm(struct task_struct *tsk, char *from); +extern void set_task_comm(struct task_struct *tsk, const char *from); extern char *get_task_comm(char *to, struct task_struct *tsk); #ifdef CONFIG_SMP -- cgit v1.2.3 From abd50b39e783e1b6c75c7534c37f1eb2d94a89cd Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 7 Apr 2014 15:38:42 -0700 Subject: wait: introduce EXIT_TRACE to avoid the racy EXIT_DEAD->EXIT_ZOMBIE transition wait_task_zombie() first does EXIT_ZOMBIE->EXIT_DEAD transition and drops tasklist_lock. If this task is not the natural child and it is traced, we change its state back to EXIT_ZOMBIE for ->real_parent. The last transition is racy, this is even documented in 50b8d257486a "ptrace: partially fix the do_wait(WEXITED) vs EXIT_DEAD->EXIT_ZOMBIE race". wait_consider_task() tries to detect this transition and clear ->notask_error but we can't rely on ptrace_reparented(), debugger can exit and do ptrace_unlink() before its sub-thread sets EXIT_ZOMBIE. And there is another problem which were missed before: this transition can also race with reparent_leader() which doesn't reset >exit_signal if EXIT_DEAD, assuming that this task must be reaped by someone else. So the tracee can be re-parented with ->exit_signal != SIGCHLD, and if /sbin/init doesn't use __WALL it becomes unreapable. This was fixed by the previous commit, but it was the temporary hack. 1. Add the new exit_state, EXIT_TRACE. It means that the task is the traced zombie, debugger is going to detach and notify its natural parent. This new state is actually EXIT_ZOMBIE | EXIT_DEAD. This way we can avoid the changes in proc/kgdb code, get_task_state() still reports "X (dead)" in this case. Note: with or without this change userspace can see Z -> X -> Z transition. Not really bad, but probably makes sense to fix. 2. Change wait_task_zombie() to use EXIT_TRACE instead of EXIT_DEAD if we need to notify the ->real_parent. 3. Revert the previous hack in reparent_leader(), now that EXIT_DEAD is always the final state we can safely ignore such a task. 4. Change wait_consider_task() to check EXIT_TRACE separately and kill the racy and no longer needed ptrace_reparented() case. If ptrace == T an EXIT_TRACE thread should be simply ignored, the owner of this state is going to ptrace_unlink() this task. We can pretend that it was already removed from ->ptraced list. Otherwise we should skip this thread too but clear ->notask_error, we must be the natural parent and debugger is going to untrace and notify us. IOW, this doesn't differ from "EXIT_ZOMBIE && p->ptrace" even if the task was already untraced. Signed-off-by: Oleg Nesterov Reported-by: Jan Kratochvil Reported-by: Michal Schmidt Tested-by: Michal Schmidt Cc: Al Viro Cc: Lennart Poettering Cc: Roland McGrath Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index f8497059f88c..7781de5e5e7b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -212,6 +212,7 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); /* in tsk->exit_state */ #define EXIT_ZOMBIE 16 #define EXIT_DEAD 32 +#define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* in tsk->state again */ #define TASK_DEAD 64 #define TASK_WAKEKILL 128 -- cgit v1.2.3 From ad86622b478eaafdc25b74237df82b10fce6326d Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Mon, 7 Apr 2014 15:38:46 -0700 Subject: wait: swap EXIT_ZOMBIE and EXIT_DEAD to hide EXIT_TRACE from user-space get_task_state() uses the most significant bit to report the state to user-space, this means that EXIT_ZOMBIE->EXIT_TRACE->EXIT_DEAD transition can be noticed via /proc as Z -> X -> Z change. Note that this was possible even before EXIT_TRACE was introduced. This is not really bad but imho it make sense to hide EXIT_TRACE from user-space completely. So the patch simply swaps EXIT_ZOMBIE and EXIT_DEAD, this way EXIT_TRACE will be seen as EXIT_ZOMBIE by user-space. Signed-off-by: Oleg Nesterov Cc: Jan Kratochvil Cc: Michal Schmidt Cc: Al Viro Cc: Lennart Poettering Cc: Roland McGrath Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 7781de5e5e7b..075b3056c0c0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -210,8 +210,8 @@ print_cfs_rq(struct seq_file *m, int cpu, struct cfs_rq *cfs_rq); #define __TASK_STOPPED 4 #define __TASK_TRACED 8 /* in tsk->exit_state */ -#define EXIT_ZOMBIE 16 -#define EXIT_DEAD 32 +#define EXIT_DEAD 16 +#define EXIT_ZOMBIE 32 #define EXIT_TRACE (EXIT_ZOMBIE | EXIT_DEAD) /* in tsk->state again */ #define TASK_DEAD 64 -- cgit v1.2.3 From 82e0703b6ca8b549952c1e4f04746f27eaec012d Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Mon, 7 Apr 2014 15:38:50 -0700 Subject: include/linux/crash_dump.h: add vmcore_cleanup() prototype Eliminate the following warning in proc/vmcore.c: fs/proc/vmcore.c:1088:6: warning: no previous prototype for `vmcore_cleanup' [-Wmissing-prototypes] [akpm@linux-foundation.org: clean up powerpc, remove unneeded EXPORT_SYMBOL] Signed-off-by: Rashika Kheria Reviewed-by: Josh Triplett Cc: Benjamin Herrenschmidt Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/crash_dump.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/crash_dump.h b/include/linux/crash_dump.h index 7032518f8542..72ab536ad3de 100644 --- a/include/linux/crash_dump.h +++ b/include/linux/crash_dump.h @@ -25,6 +25,7 @@ extern int __weak remap_oldmem_pfn_range(struct vm_area_struct *vma, extern ssize_t copy_oldmem_page(unsigned long, char *, size_t, unsigned long, int); +void vmcore_cleanup(void); /* Architecture code defines this if there are other possible ELF * machine types, e.g. on bi-arch capable hardware. */ -- cgit v1.2.3 From 90ae3ae539246984d36e43b0e23554bca941476f Mon Sep 17 00:00:00 2001 From: Stephen Hemminger Date: Mon, 7 Apr 2014 15:38:52 -0700 Subject: idr: remove dead code Remove no longer used deprecated code, and make local functions static. Signed-off-by: Stephen Hemminger Acked-by: Jean Delvare Acked-by: Tejun Heo Cc: Jeff Layton Cc: Philipp Reisner Cc: Jens Axboe Cc: George Spelvin Cc: Randy Dunlap Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/idr.h | 63 ----------------------------------------------------- 1 file changed, 63 deletions(-) (limited to 'include/linux') diff --git a/include/linux/idr.h b/include/linux/idr.h index f669585c4fc5..6af3400b9b2f 100644 --- a/include/linux/idr.h +++ b/include/linux/idr.h @@ -132,69 +132,6 @@ static inline void *idr_find(struct idr *idr, int id) #define idr_for_each_entry(idp, entry, id) \ for (id = 0; ((entry) = idr_get_next(idp, &(id))) != NULL; ++id) -/* - * Don't use the following functions. These exist only to suppress - * deprecated warnings on EXPORT_SYMBOL()s. - */ -int __idr_pre_get(struct idr *idp, gfp_t gfp_mask); -int __idr_get_new_above(struct idr *idp, void *ptr, int starting_id, int *id); -void __idr_remove_all(struct idr *idp); - -/** - * idr_pre_get - reserve resources for idr allocation - * @idp: idr handle - * @gfp_mask: memory allocation flags - * - * Part of old alloc interface. This is going away. Use - * idr_preload[_end]() and idr_alloc() instead. - */ -static inline int __deprecated idr_pre_get(struct idr *idp, gfp_t gfp_mask) -{ - return __idr_pre_get(idp, gfp_mask); -} - -/** - * idr_get_new_above - allocate new idr entry above or equal to a start id - * @idp: idr handle - * @ptr: pointer you want associated with the id - * @starting_id: id to start search at - * @id: pointer to the allocated handle - * - * Part of old alloc interface. This is going away. Use - * idr_preload[_end]() and idr_alloc() instead. - */ -static inline int __deprecated idr_get_new_above(struct idr *idp, void *ptr, - int starting_id, int *id) -{ - return __idr_get_new_above(idp, ptr, starting_id, id); -} - -/** - * idr_get_new - allocate new idr entry - * @idp: idr handle - * @ptr: pointer you want associated with the id - * @id: pointer to the allocated handle - * - * Part of old alloc interface. This is going away. Use - * idr_preload[_end]() and idr_alloc() instead. - */ -static inline int __deprecated idr_get_new(struct idr *idp, void *ptr, int *id) -{ - return __idr_get_new_above(idp, ptr, 0, id); -} - -/** - * idr_remove_all - remove all ids from the given idr tree - * @idp: idr handle - * - * If you're trying to destroy @idp, calling idr_destroy() is enough. - * This is going away. Don't use. - */ -static inline void __deprecated idr_remove_all(struct idr *idp) -{ - __idr_remove_all(idp); -} - /* * IDA - IDR based id allocator, use when translation from id to * pointer isn't necessary. -- cgit v1.2.3 From 2aaf308b95b24649a6dcfed89cd956e972089b2a Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Mon, 7 Apr 2014 15:38:56 -0700 Subject: rapidio: rework device hierarchy and introduce mport class of devices This patch removes an artificial RapidIO bus root device and establishes actual device hierarchy by providing reference to real parent devices. It also introduces device class for RapidIO controller devices (on-chip or an eternal bridge, known as "mport"). Existing implementation was sufficient for SoC-based platforms that have a single RapidIO controller. With introduction of devices using multiple RapidIO controllers and PCIe-to-RapidIO bridges the old scheme is very limiting or does not work at all. The implemented changes allow to properly reference platform's local RapidIO mport devices and provide device details needed for upper layers. This change to RapidIO device hierarchy does not break any known existing kernel or user space interfaces. Signed-off-by: Alexandre Bounine Cc: Matt Porter Cc: Li Yang Cc: Kumar Gala Cc: Andre van Herk Cc: Stef van Os Cc: Jerry Jacobs Cc: Arno Tiemersma Cc: Rob Landley Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rio.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/rio.h b/include/linux/rio.h index b71d5738e683..6bda06f21930 100644 --- a/include/linux/rio.h +++ b/include/linux/rio.h @@ -83,7 +83,7 @@ #define RIO_CTAG_UDEVID 0x0001ffff /* Unique device identifier */ extern struct bus_type rio_bus_type; -extern struct device rio_bus; +extern struct class rio_mport_class; struct rio_mport; struct rio_dev; @@ -201,6 +201,7 @@ struct rio_dev { #define rio_dev_f(n) list_entry(n, struct rio_dev, net_list) #define to_rio_dev(n) container_of(n, struct rio_dev, dev) #define sw_to_rio_dev(n) container_of(n, struct rio_dev, rswitch[0]) +#define to_rio_mport(n) container_of(n, struct rio_mport, dev) /** * struct rio_msg - RIO message event @@ -248,6 +249,7 @@ enum rio_phy_type { * @phy_type: RapidIO phy type * @phys_efptr: RIO port extended features pointer * @name: Port name string + * @dev: device structure associated with an mport * @priv: Master port private data * @dma: DMA device associated with mport * @nscan: RapidIO network enumeration/discovery operations @@ -272,6 +274,7 @@ struct rio_mport { enum rio_phy_type phy_type; /* RapidIO phy type */ u32 phys_efptr; unsigned char name[RIO_MAX_MPORT_NAME]; + struct device dev; void *priv; /* Master port private data */ #ifdef CONFIG_RAPIDIO_DMA_ENGINE struct dma_device dma; -- cgit v1.2.3 From ce816fa88cca083c47ab9000b2138a83043a78be Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Uwe=20Kleine-K=C3=B6nig?= Date: Mon, 7 Apr 2014 15:39:19 -0700 Subject: Kconfig: rename HAS_IOPORT to HAS_IOPORT_MAP MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit If the renamed symbol is defined lib/iomap.c implements ioport_map and ioport_unmap and currently (nearly) all platforms define the port accessor functions outb/inb and friend unconditionally. So HAS_IOPORT_MAP is the better name for this. Consequently NO_IOPORT is renamed to NO_IOPORT_MAP. The motivation for this change is to reintroduce a symbol HAS_IOPORT that signals if outb/int et al are available. I will address that at least one merge window later though to keep surprises to a minimum and catch new introductions of (HAS|NO)_IOPORT. The changes in this commit were done using: $ git grep -l -E '(NO|HAS)_IOPORT' | xargs perl -p -i -e 's/\b((?:CONFIG_)?(?:NO|HAS)_IOPORT)\b/$1_MAP/' Signed-off-by: Uwe Kleine-König Acked-by: Arnd Bergmann Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/io.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/io.h b/include/linux/io.h index 8a18e75600cc..b76e6e545806 100644 --- a/include/linux/io.h +++ b/include/linux/io.h @@ -41,7 +41,7 @@ static inline int ioremap_page_range(unsigned long addr, unsigned long end, /* * Managed iomap interface */ -#ifdef CONFIG_HAS_IOPORT +#ifdef CONFIG_HAS_IOPORT_MAP void __iomem * devm_ioport_map(struct device *dev, unsigned long port, unsigned int nr); void devm_ioport_unmap(struct device *dev, void __iomem *addr); -- cgit v1.2.3 From 5722d094ad2b56fa2c1cb3adaf40071a55bbf242 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Mon, 7 Apr 2014 15:39:24 -0700 Subject: memcg, slab: cleanup memcg cache creation This patch cleans up the memcg cache creation path as follows: - Move memcg cache name creation to a separate function to be called from kmem_cache_create_memcg(). This allows us to get rid of the mutex protecting the temporary buffer used for the name formatting, because the whole cache creation path is protected by the slab_mutex. - Get rid of memcg_create_kmem_cache(). This function serves as a proxy to kmem_cache_create_memcg(). After separating the cache name creation path, it would be reduced to a function call, so let's inline it. Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Cc: Pekka Enberg Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 96f3fc87ab96..ab7f02884983 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -491,6 +491,9 @@ void __memcg_kmem_commit_charge(struct page *page, void __memcg_kmem_uncharge_pages(struct page *page, int order); int memcg_cache_id(struct mem_cgroup *memcg); + +char *memcg_create_cache_name(struct mem_cgroup *memcg, + struct kmem_cache *root_cache); int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, struct kmem_cache *root_cache); void memcg_free_cache_params(struct kmem_cache *s); @@ -635,6 +638,12 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return -1; } +static inline char *memcg_create_cache_name(struct mem_cgroup *memcg, + struct kmem_cache *root_cache) +{ + return NULL; +} + static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, struct kmem_cache *root_cache) { -- cgit v1.2.3 From 794b1248be4e7e157f5535c3ee49168aa4643349 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Mon, 7 Apr 2014 15:39:26 -0700 Subject: memcg, slab: separate memcg vs root cache creation paths Memcg-awareness turned kmem_cache_create() into a dirty interweaving of memcg-only and except-for-memcg calls. To clean this up, let's move the code responsible for memcg cache creation to a separate function. Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Cc: Pekka Enberg Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 ------ include/linux/slab.h | 6 +++--- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index ab7f02884983..02d3072841e9 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -638,12 +638,6 @@ static inline int memcg_cache_id(struct mem_cgroup *memcg) return -1; } -static inline char *memcg_create_cache_name(struct mem_cgroup *memcg, - struct kmem_cache *root_cache) -{ - return NULL; -} - static inline int memcg_alloc_cache_params(struct mem_cgroup *memcg, struct kmem_cache *s, struct kmem_cache *root_cache) { diff --git a/include/linux/slab.h b/include/linux/slab.h index b5b2df60299e..3dd389aa91c7 100644 --- a/include/linux/slab.h +++ b/include/linux/slab.h @@ -115,9 +115,9 @@ int slab_is_available(void); struct kmem_cache *kmem_cache_create(const char *, size_t, size_t, unsigned long, void (*)(void *)); -struct kmem_cache * -kmem_cache_create_memcg(struct mem_cgroup *, const char *, size_t, size_t, - unsigned long, void (*)(void *), struct kmem_cache *); +#ifdef CONFIG_MEMCG_KMEM +void kmem_cache_create_memcg(struct mem_cgroup *, struct kmem_cache *); +#endif void kmem_cache_destroy(struct kmem_cache *); int kmem_cache_shrink(struct kmem_cache *); void kmem_cache_free(struct kmem_cache *, void *); -- cgit v1.2.3 From b8529907ba35d625fa4b85d3e4dc8021be97c1f3 Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Mon, 7 Apr 2014 15:39:28 -0700 Subject: memcg, slab: do not destroy children caches if parent has aliases Currently we destroy children caches at the very beginning of kmem_cache_destroy(). This is wrong, because the root cache will not necessarily be destroyed in the end - if it has aliases (refcount > 0), kmem_cache_destroy() will simply decrement its refcount and return. In this case, at best we will get a bunch of warnings in dmesg, like this one: kmem_cache_destroy kmalloc-32:0: Slab cache still has objects CPU: 1 PID: 7139 Comm: modprobe Tainted: G B W 3.13.0+ #117 Call Trace: dump_stack+0x49/0x5b kmem_cache_destroy+0xdf/0xf0 kmem_cache_destroy_memcg_children+0x97/0xc0 kmem_cache_destroy+0xf/0xf0 xfs_mru_cache_uninit+0x21/0x30 [xfs] exit_xfs_fs+0x2e/0xc44 [xfs] SyS_delete_module+0x198/0x1f0 system_call_fastpath+0x16/0x1b At worst - if kmem_cache_destroy() will race with an allocation from a memcg cache - the kernel will panic. This patch fixes this by moving children caches destruction after the check if the cache has aliases. Plus, it forbids destroying a root cache if it still has children caches, because each children cache keeps a reference to its parent. Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Cc: Pekka Enberg Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/memcontrol.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memcontrol.h b/include/linux/memcontrol.h index 02d3072841e9..b569b8be5c5a 100644 --- a/include/linux/memcontrol.h +++ b/include/linux/memcontrol.h @@ -507,7 +507,7 @@ struct kmem_cache * __memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp); void mem_cgroup_destroy_cache(struct kmem_cache *cachep); -void kmem_cache_destroy_memcg_children(struct kmem_cache *s); +int __kmem_cache_destroy_memcg_children(struct kmem_cache *s); /** * memcg_kmem_newpage_charge: verify if a new kmem allocation is allowed. @@ -661,10 +661,6 @@ memcg_kmem_get_cache(struct kmem_cache *cachep, gfp_t gfp) { return cachep; } - -static inline void kmem_cache_destroy_memcg_children(struct kmem_cache *s) -{ -} #endif /* CONFIG_MEMCG_KMEM */ #endif /* _LINUX_MEMCONTROL_H */ -- cgit v1.2.3 From 9a41707bd3a0811919000daf094e9d50ea65f7da Mon Sep 17 00:00:00 2001 From: Vladimir Davydov Date: Mon, 7 Apr 2014 15:39:31 -0700 Subject: slub: rework sysfs layout for memcg caches Currently, we try to arrange sysfs entries for memcg caches in the same manner as for global caches. Apart from turning /sys/kernel/slab into a mess when there are a lot of kmem-active memcgs created, it actually does not work properly - we won't create more than one link to a memcg cache in case its parent is merged with another cache. For instance, if A is a root cache merged with another root cache B, we will have the following sysfs setup: X A -> X B -> X where X is some unique id (see create_unique_id()). Now if memcgs M and N start to allocate from cache A (or B, which is the same), we will get: X X:M X:N A -> X B -> X A:M -> X:M A:N -> X:N Since B is an alias for A, we won't get entries B:M and B:N, which is confusing. It is more logical to have entries for memcg caches under the corresponding root cache's sysfs directory. This would allow us to keep sysfs layout clean, and avoid such inconsistencies like one described above. This patch does the trick. It creates a "cgroup" kset in each root cache kobject to keep its children caches there. Signed-off-by: Vladimir Davydov Cc: Michal Hocko Cc: Johannes Weiner Cc: David Rientjes Cc: Pekka Enberg Cc: Glauber Costa Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/slub_def.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/slub_def.h b/include/linux/slub_def.h index f56bfa9e4526..f2f7398848cf 100644 --- a/include/linux/slub_def.h +++ b/include/linux/slub_def.h @@ -87,6 +87,9 @@ struct kmem_cache { #ifdef CONFIG_MEMCG_KMEM struct memcg_cache_params *memcg_params; int max_attr_size; /* for propagation, maximum size of a stored attr */ +#ifdef CONFIG_SYSFS + struct kset *memcg_kset; +#endif #endif #ifdef CONFIG_NUMA -- cgit v1.2.3 From b3ca1c10d7b32fdfdfaf5484eda486323f52d9be Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 7 Apr 2014 15:39:34 -0700 Subject: percpu: add raw_cpu_ops The kernel has never been audited to ensure that this_cpu operations are consistently used throughout the kernel. The code generated in many places can be improved through the use of this_cpu operations (which uses a segment register for relocation of per cpu offsets instead of performing address calculations). The patch set also addresses various consistency issues in general with the per cpu macros. A. The semantics of __this_cpu_ptr() differs from this_cpu_ptr only because checks are skipped. This is typically shown through a raw_ prefix. So this patch set changes the places where __this_cpu_ptr() is used to raw_cpu_ptr(). B. There has been the long term wish by some that __this_cpu operations would check for preemption. However, there are cases where preemption checks need to be skipped. This patch set adds raw_cpu operations that do not check for preemption and then adds preemption checks to the __this_cpu operations. C. The use of __get_cpu_var is always a reference to a percpu variable that can also be handled via a this_cpu operation. This patch set replaces all uses of __get_cpu_var with this_cpu operations. D. We can then use this_cpu RMW operations in various places replacing sequences of instructions by a single one. E. The use of this_cpu operations throughout will allow other arches than x86 to implement optimized references and RMV operations to work with per cpu local data. F. The use of this_cpu operations opens up the possibility to further optimize code that relies on synchronization through per cpu data. The patch set works in a couple of stages: I. Patch 1 adds the additional raw_cpu operations and raw_cpu_ptr(). Also converts the existing __this_cpu_xx_# primitive in the x86 code to raw_cpu_xx_#. II. Patch 2-4 use the raw_cpu operations in places that would give us false positives once they are enabled. III. Patch 5 adds preemption checks to __this_cpu operations to allow checking if preemption is properly disabled when these functions are used. IV. Patches 6-20 are patches that simply replace uses of __get_cpu_var with this_cpu_ptr. They do not depend on any changes to the percpu code. No preemption tests are skipped if they are applied. V. Patches 21-46 are conversion patches that use this_cpu operations in various kernel subsystems/drivers or arch code. VI. Patches 47/48 (not included in this series) remove no longer used functions (__this_cpu_ptr and __get_cpu_var). These should only be applied after all the conversion patches have made it and after we have done additional passes through the kernel to ensure that none of the uses of these functions remain. This patch (of 46): The patches following this one will add preemption checks to __this_cpu ops so we need to have an alternative way to use this_cpu operations without preemption checks. raw_cpu_ops will be the basis for all other ops since these will be the operations that do not implement any checks. Primitive operations are renamed by this patch from __this_cpu_xxx to raw_cpu_xxxx. Also change the uses of the x86 percpu primitives in preempt.h. These depend directly on asm/percpu.h (header #include nesting issue). Signed-off-by: Peter Zijlstra Signed-off-by: Christoph Lameter Acked-by: Ingo Molnar Cc: Tejun Heo Cc: "James E.J. Bottomley" Cc: "Paul E. McKenney" Cc: Alex Shi Cc: Arnd Bergmann Cc: Benjamin Herrenschmidt Cc: Bryan Wu Cc: Catalin Marinas Cc: Chris Metcalf Cc: Daniel Lezcano Cc: David Daney Cc: David Miller Cc: David S. Miller Cc: Dimitri Sivanich Cc: Dipankar Sarma Cc: Eric Dumazet Cc: Fenghua Yu Cc: Frederic Weisbecker Cc: Greg Kroah-Hartman Cc: H. Peter Anvin Cc: Haavard Skinnemoen Cc: Hans-Christian Egtvedt Cc: Hedi Berriche Cc: Heiko Carstens Cc: Helge Deller Cc: Ivan Kokshaysky Cc: James Hogan Cc: Jens Axboe Cc: John Stultz Cc: Martin Schwidefsky Cc: Masami Hiramatsu Cc: Matt Turner Cc: Mike Frysinger Cc: Mike Travis Cc: Neil Brown Cc: Nicolas Pitre Cc: Paul Mackerras Cc: Paul Mundt Cc: Rafael J. Wysocki Cc: Ralf Baechle Cc: Richard Henderson Cc: Robert Richter Cc: Russell King Cc: Russell King Cc: Rusty Russell Cc: Steven Rostedt Cc: Thomas Gleixner Cc: Tony Luck Cc: Will Deacon Cc: Wim Van Sebroeck Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 331 +++++++++++++++++++++++++++++-------------------- 1 file changed, 195 insertions(+), 136 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index e3817d2441b6..4e4d2afcc0c7 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -243,6 +243,8 @@ do { \ } while (0) /* + * this_cpu operations (C) 2008-2013 Christoph Lameter + * * Optimized manipulation for memory allocated through the per cpu * allocator or for addresses of per cpu variables. * @@ -296,7 +298,7 @@ do { \ do { \ unsigned long flags; \ raw_local_irq_save(flags); \ - *__this_cpu_ptr(&(pcp)) op val; \ + *raw_cpu_ptr(&(pcp)) op val; \ raw_local_irq_restore(flags); \ } while (0) @@ -381,8 +383,8 @@ do { \ typeof(pcp) ret__; \ unsigned long flags; \ raw_local_irq_save(flags); \ - __this_cpu_add(pcp, val); \ - ret__ = __this_cpu_read(pcp); \ + raw_cpu_add(pcp, val); \ + ret__ = raw_cpu_read(pcp); \ raw_local_irq_restore(flags); \ ret__; \ }) @@ -411,8 +413,8 @@ do { \ ({ typeof(pcp) ret__; \ unsigned long flags; \ raw_local_irq_save(flags); \ - ret__ = __this_cpu_read(pcp); \ - __this_cpu_write(pcp, nval); \ + ret__ = raw_cpu_read(pcp); \ + raw_cpu_write(pcp, nval); \ raw_local_irq_restore(flags); \ ret__; \ }) @@ -439,9 +441,9 @@ do { \ typeof(pcp) ret__; \ unsigned long flags; \ raw_local_irq_save(flags); \ - ret__ = __this_cpu_read(pcp); \ + ret__ = raw_cpu_read(pcp); \ if (ret__ == (oval)) \ - __this_cpu_write(pcp, nval); \ + raw_cpu_write(pcp, nval); \ raw_local_irq_restore(flags); \ ret__; \ }) @@ -476,7 +478,7 @@ do { \ int ret__; \ unsigned long flags; \ raw_local_irq_save(flags); \ - ret__ = __this_cpu_generic_cmpxchg_double(pcp1, pcp2, \ + ret__ = raw_cpu_generic_cmpxchg_double(pcp1, pcp2, \ oval1, oval2, nval1, nval2); \ raw_local_irq_restore(flags); \ ret__; \ @@ -504,12 +506,8 @@ do { \ #endif /* - * Generic percpu operations for context that are safe from preemption/interrupts. - * Either we do not care about races or the caller has the - * responsibility of handling preemption/interrupt issues. Arch code can still - * override these instructions since the arch per cpu code may be more - * efficient and may actually get race freeness for free (that is the - * case for x86 for example). + * Generic percpu operations for contexts where we do not want to do + * any checks for preemptiosn. * * If there is no other protection through preempt disable and/or * disabling interupts then one of these RMW operations can show unexpected @@ -517,211 +515,272 @@ do { \ * or an interrupt occurred and the same percpu variable was modified from * the interrupt context. */ -#ifndef __this_cpu_read -# ifndef __this_cpu_read_1 -# define __this_cpu_read_1(pcp) (*__this_cpu_ptr(&(pcp))) +#ifndef raw_cpu_read +# ifndef raw_cpu_read_1 +# define raw_cpu_read_1(pcp) (*raw_cpu_ptr(&(pcp))) # endif -# ifndef __this_cpu_read_2 -# define __this_cpu_read_2(pcp) (*__this_cpu_ptr(&(pcp))) +# ifndef raw_cpu_read_2 +# define raw_cpu_read_2(pcp) (*raw_cpu_ptr(&(pcp))) # endif -# ifndef __this_cpu_read_4 -# define __this_cpu_read_4(pcp) (*__this_cpu_ptr(&(pcp))) +# ifndef raw_cpu_read_4 +# define raw_cpu_read_4(pcp) (*raw_cpu_ptr(&(pcp))) # endif -# ifndef __this_cpu_read_8 -# define __this_cpu_read_8(pcp) (*__this_cpu_ptr(&(pcp))) +# ifndef raw_cpu_read_8 +# define raw_cpu_read_8(pcp) (*raw_cpu_ptr(&(pcp))) # endif -# define __this_cpu_read(pcp) __pcpu_size_call_return(__this_cpu_read_, (pcp)) +# define raw_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) #endif -#define __this_cpu_generic_to_op(pcp, val, op) \ +#define raw_cpu_generic_to_op(pcp, val, op) \ do { \ - *__this_cpu_ptr(&(pcp)) op val; \ + *raw_cpu_ptr(&(pcp)) op val; \ } while (0) -#ifndef __this_cpu_write -# ifndef __this_cpu_write_1 -# define __this_cpu_write_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) + +#ifndef raw_cpu_write +# ifndef raw_cpu_write_1 +# define raw_cpu_write_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif -# ifndef __this_cpu_write_2 -# define __this_cpu_write_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) +# ifndef raw_cpu_write_2 +# define raw_cpu_write_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif -# ifndef __this_cpu_write_4 -# define __this_cpu_write_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) +# ifndef raw_cpu_write_4 +# define raw_cpu_write_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif -# ifndef __this_cpu_write_8 -# define __this_cpu_write_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), =) +# ifndef raw_cpu_write_8 +# define raw_cpu_write_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), =) # endif -# define __this_cpu_write(pcp, val) __pcpu_size_call(__this_cpu_write_, (pcp), (val)) +# define raw_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) #endif -#ifndef __this_cpu_add -# ifndef __this_cpu_add_1 -# define __this_cpu_add_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) +#ifndef raw_cpu_add +# ifndef raw_cpu_add_1 +# define raw_cpu_add_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) # endif -# ifndef __this_cpu_add_2 -# define __this_cpu_add_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) +# ifndef raw_cpu_add_2 +# define raw_cpu_add_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) # endif -# ifndef __this_cpu_add_4 -# define __this_cpu_add_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) +# ifndef raw_cpu_add_4 +# define raw_cpu_add_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) # endif -# ifndef __this_cpu_add_8 -# define __this_cpu_add_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), +=) +# ifndef raw_cpu_add_8 +# define raw_cpu_add_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), +=) # endif -# define __this_cpu_add(pcp, val) __pcpu_size_call(__this_cpu_add_, (pcp), (val)) +# define raw_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) #endif -#ifndef __this_cpu_sub -# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) +#ifndef raw_cpu_sub +# define raw_cpu_sub(pcp, val) raw_cpu_add((pcp), -(val)) #endif -#ifndef __this_cpu_inc -# define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1) +#ifndef raw_cpu_inc +# define raw_cpu_inc(pcp) raw_cpu_add((pcp), 1) #endif -#ifndef __this_cpu_dec -# define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1) +#ifndef raw_cpu_dec +# define raw_cpu_dec(pcp) raw_cpu_sub((pcp), 1) #endif -#ifndef __this_cpu_and -# ifndef __this_cpu_and_1 -# define __this_cpu_and_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) +#ifndef raw_cpu_and +# ifndef raw_cpu_and_1 +# define raw_cpu_and_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) # endif -# ifndef __this_cpu_and_2 -# define __this_cpu_and_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) +# ifndef raw_cpu_and_2 +# define raw_cpu_and_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) # endif -# ifndef __this_cpu_and_4 -# define __this_cpu_and_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) +# ifndef raw_cpu_and_4 +# define raw_cpu_and_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) # endif -# ifndef __this_cpu_and_8 -# define __this_cpu_and_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), &=) +# ifndef raw_cpu_and_8 +# define raw_cpu_and_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), &=) # endif -# define __this_cpu_and(pcp, val) __pcpu_size_call(__this_cpu_and_, (pcp), (val)) +# define raw_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) #endif -#ifndef __this_cpu_or -# ifndef __this_cpu_or_1 -# define __this_cpu_or_1(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) +#ifndef raw_cpu_or +# ifndef raw_cpu_or_1 +# define raw_cpu_or_1(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif -# ifndef __this_cpu_or_2 -# define __this_cpu_or_2(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) +# ifndef raw_cpu_or_2 +# define raw_cpu_or_2(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif -# ifndef __this_cpu_or_4 -# define __this_cpu_or_4(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) +# ifndef raw_cpu_or_4 +# define raw_cpu_or_4(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif -# ifndef __this_cpu_or_8 -# define __this_cpu_or_8(pcp, val) __this_cpu_generic_to_op((pcp), (val), |=) +# ifndef raw_cpu_or_8 +# define raw_cpu_or_8(pcp, val) raw_cpu_generic_to_op((pcp), (val), |=) # endif -# define __this_cpu_or(pcp, val) __pcpu_size_call(__this_cpu_or_, (pcp), (val)) +# define raw_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) #endif -#define __this_cpu_generic_add_return(pcp, val) \ +#define raw_cpu_generic_add_return(pcp, val) \ ({ \ - __this_cpu_add(pcp, val); \ - __this_cpu_read(pcp); \ + raw_cpu_add(pcp, val); \ + raw_cpu_read(pcp); \ }) -#ifndef __this_cpu_add_return -# ifndef __this_cpu_add_return_1 -# define __this_cpu_add_return_1(pcp, val) __this_cpu_generic_add_return(pcp, val) +#ifndef raw_cpu_add_return +# ifndef raw_cpu_add_return_1 +# define raw_cpu_add_return_1(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif -# ifndef __this_cpu_add_return_2 -# define __this_cpu_add_return_2(pcp, val) __this_cpu_generic_add_return(pcp, val) +# ifndef raw_cpu_add_return_2 +# define raw_cpu_add_return_2(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif -# ifndef __this_cpu_add_return_4 -# define __this_cpu_add_return_4(pcp, val) __this_cpu_generic_add_return(pcp, val) +# ifndef raw_cpu_add_return_4 +# define raw_cpu_add_return_4(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif -# ifndef __this_cpu_add_return_8 -# define __this_cpu_add_return_8(pcp, val) __this_cpu_generic_add_return(pcp, val) +# ifndef raw_cpu_add_return_8 +# define raw_cpu_add_return_8(pcp, val) raw_cpu_generic_add_return(pcp, val) # endif -# define __this_cpu_add_return(pcp, val) \ - __pcpu_size_call_return2(__this_cpu_add_return_, pcp, val) +# define raw_cpu_add_return(pcp, val) \ + __pcpu_size_call_return2(raw_add_return_, pcp, val) #endif -#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) -#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) -#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) +#define raw_cpu_sub_return(pcp, val) raw_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define raw_cpu_inc_return(pcp) raw_cpu_add_return(pcp, 1) +#define raw_cpu_dec_return(pcp) raw_cpu_add_return(pcp, -1) -#define __this_cpu_generic_xchg(pcp, nval) \ +#define raw_cpu_generic_xchg(pcp, nval) \ ({ typeof(pcp) ret__; \ - ret__ = __this_cpu_read(pcp); \ - __this_cpu_write(pcp, nval); \ + ret__ = raw_cpu_read(pcp); \ + raw_cpu_write(pcp, nval); \ ret__; \ }) -#ifndef __this_cpu_xchg -# ifndef __this_cpu_xchg_1 -# define __this_cpu_xchg_1(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +#ifndef raw_cpu_xchg +# ifndef raw_cpu_xchg_1 +# define raw_cpu_xchg_1(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif -# ifndef __this_cpu_xchg_2 -# define __this_cpu_xchg_2(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +# ifndef raw_cpu_xchg_2 +# define raw_cpu_xchg_2(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif -# ifndef __this_cpu_xchg_4 -# define __this_cpu_xchg_4(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +# ifndef raw_cpu_xchg_4 +# define raw_cpu_xchg_4(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif -# ifndef __this_cpu_xchg_8 -# define __this_cpu_xchg_8(pcp, nval) __this_cpu_generic_xchg(pcp, nval) +# ifndef raw_cpu_xchg_8 +# define raw_cpu_xchg_8(pcp, nval) raw_cpu_generic_xchg(pcp, nval) # endif -# define __this_cpu_xchg(pcp, nval) \ - __pcpu_size_call_return2(__this_cpu_xchg_, (pcp), nval) +# define raw_cpu_xchg(pcp, nval) \ + __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) #endif -#define __this_cpu_generic_cmpxchg(pcp, oval, nval) \ +#define raw_cpu_generic_cmpxchg(pcp, oval, nval) \ ({ \ typeof(pcp) ret__; \ - ret__ = __this_cpu_read(pcp); \ + ret__ = raw_cpu_read(pcp); \ if (ret__ == (oval)) \ - __this_cpu_write(pcp, nval); \ + raw_cpu_write(pcp, nval); \ ret__; \ }) -#ifndef __this_cpu_cmpxchg -# ifndef __this_cpu_cmpxchg_1 -# define __this_cpu_cmpxchg_1(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +#ifndef raw_cpu_cmpxchg +# ifndef raw_cpu_cmpxchg_1 +# define raw_cpu_cmpxchg_1(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif -# ifndef __this_cpu_cmpxchg_2 -# define __this_cpu_cmpxchg_2(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +# ifndef raw_cpu_cmpxchg_2 +# define raw_cpu_cmpxchg_2(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif -# ifndef __this_cpu_cmpxchg_4 -# define __this_cpu_cmpxchg_4(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +# ifndef raw_cpu_cmpxchg_4 +# define raw_cpu_cmpxchg_4(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif -# ifndef __this_cpu_cmpxchg_8 -# define __this_cpu_cmpxchg_8(pcp, oval, nval) __this_cpu_generic_cmpxchg(pcp, oval, nval) +# ifndef raw_cpu_cmpxchg_8 +# define raw_cpu_cmpxchg_8(pcp, oval, nval) raw_cpu_generic_cmpxchg(pcp, oval, nval) # endif -# define __this_cpu_cmpxchg(pcp, oval, nval) \ - __pcpu_size_call_return2(__this_cpu_cmpxchg_, pcp, oval, nval) +# define raw_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) #endif -#define __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ +#define raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ ({ \ int __ret = 0; \ - if (__this_cpu_read(pcp1) == (oval1) && \ - __this_cpu_read(pcp2) == (oval2)) { \ - __this_cpu_write(pcp1, (nval1)); \ - __this_cpu_write(pcp2, (nval2)); \ + if (raw_cpu_read(pcp1) == (oval1) && \ + raw_cpu_read(pcp2) == (oval2)) { \ + raw_cpu_write(pcp1, (nval1)); \ + raw_cpu_write(pcp2, (nval2)); \ __ret = 1; \ } \ (__ret); \ }) -#ifndef __this_cpu_cmpxchg_double -# ifndef __this_cpu_cmpxchg_double_1 -# define __this_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +#ifndef raw_cpu_cmpxchg_double +# ifndef raw_cpu_cmpxchg_double_1 +# define raw_cpu_cmpxchg_double_1(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif -# ifndef __this_cpu_cmpxchg_double_2 -# define __this_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# ifndef raw_cpu_cmpxchg_double_2 +# define raw_cpu_cmpxchg_double_2(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif -# ifndef __this_cpu_cmpxchg_double_4 -# define __this_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# ifndef raw_cpu_cmpxchg_double_4 +# define raw_cpu_cmpxchg_double_4(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif -# ifndef __this_cpu_cmpxchg_double_8 -# define __this_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __this_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) +# ifndef raw_cpu_cmpxchg_double_8 +# define raw_cpu_cmpxchg_double_8(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + raw_cpu_generic_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) # endif +# define raw_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ + __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) +#endif + +/* + * Generic percpu operations for context that are safe from preemption/interrupts. + * Checks will be added here soon. + */ +#ifndef __this_cpu_read +# define __this_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) +#endif + +#ifndef __this_cpu_write +# define __this_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) +#endif + +#ifndef __this_cpu_add +# define __this_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) +#endif + +#ifndef __this_cpu_sub +# define __this_cpu_sub(pcp, val) __this_cpu_add((pcp), -(typeof(pcp))(val)) +#endif + +#ifndef __this_cpu_inc +# define __this_cpu_inc(pcp) __this_cpu_add((pcp), 1) +#endif + +#ifndef __this_cpu_dec +# define __this_cpu_dec(pcp) __this_cpu_sub((pcp), 1) +#endif + +#ifndef __this_cpu_and +# define __this_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) +#endif + +#ifndef __this_cpu_or +# define __this_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) +#endif + +#ifndef __this_cpu_add_return +# define __this_cpu_add_return(pcp, val) \ + __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) +#endif + +#define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) +#define __this_cpu_inc_return(pcp) __this_cpu_add_return(pcp, 1) +#define __this_cpu_dec_return(pcp) __this_cpu_add_return(pcp, -1) + +#ifndef __this_cpu_xchg +# define __this_cpu_xchg(pcp, nval) \ + __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) +#endif + +#ifndef __this_cpu_cmpxchg +# define __this_cpu_cmpxchg(pcp, oval, nval) \ + __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) +#endif + +#ifndef __this_cpu_cmpxchg_double # define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_bool(__this_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) + __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) #endif #endif /* __LINUX_PERCPU_H */ -- cgit v1.2.3 From dc322a99d31fff5d3f8acfa061ad033953efdebe Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 7 Apr 2014 15:39:38 -0700 Subject: mm: use raw_cpu ops for determining current NUMA node With the preempt checking logic for __this_cpu_ops we will get false positives from locations in the code that use numa_node_id. Before the __this_cpu ops where introduced there were no checks for preemption present either. smp_raw_processor_id() was used. See http://www.spinics.net/lists/linux-numa/msg00641.html Therefore we need to use raw_cpu_read here to avoid false postives. Note that this issue has been discussed in prior years. If the process changes nodes after retrieving the current numa node then that is acceptable since most uses of numa_node etc are for optimization and not for correctness. There were suggestions to implement a raw_numa_node_id in order to do preempt checks for numa_node_id as well. But I think we better defer that to another patch since that would mean investigating how numa_node_id() is used throughout the kernel which would increase the scope of this patchset significantly. After all preemption was never checked before when numa_node_id() was used. Some sample traces: __this_cpu_read operation in preemptible [00000000] code: login/1456 caller is __this_cpu_preempt_check+0x2b/0x2d CPU: 0 PID: 1456 Comm: login Not tainted 3.12.0-rc4-cl-00062-g2fe80d3-dirty #185 Call Trace: dump_stack+0x4e/0x82 check_preemption_disabled+0xc5/0xe0 __this_cpu_preempt_check+0x2b/0x2d get_task_policy+0x1d/0x49 get_vma_policy+0x14/0x76 alloc_pages_vma+0x35/0xff handle_mm_fault+0x290/0x73b __do_page_fault+0x3fe/0x44d do_page_fault+0x9/0xc page_fault+0x22/0x30 generic_file_aio_read+0x38e/0x624 do_sync_read+0x54/0x73 vfs_read+0x9d/0x12a SyS_read+0x47/0x7e cstar_dispatch+0x7/0x23 caller is __this_cpu_preempt_check+0x2b/0x2d CPU: 0 PID: 1456 Comm: login Not tainted 3.12.0-rc4-cl-00062-g2fe80d3-dirty #185 Call Trace: dump_stack+0x4e/0x82 check_preemption_disabled+0xc5/0xe0 __this_cpu_preempt_check+0x2b/0x2d alloc_pages_current+0x8f/0xbc __page_cache_alloc+0xb/0xd __do_page_cache_readahead+0xf4/0x219 ra_submit+0x1c/0x20 ondemand_readahead+0x28c/0x2b4 page_cache_sync_readahead+0x38/0x3a generic_file_aio_read+0x261/0x624 do_sync_read+0x54/0x73 vfs_read+0x9d/0x12a SyS_read+0x47/0x7e cstar_dispatch+0x7/0x23 Signed-off-by: Christoph Lameter Acked-by: Ingo Molnar Cc: Alex Shi Cc: Tejun Heo Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/topology.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/topology.h b/include/linux/topology.h index 12ae6ce997d6..7062330a1329 100644 --- a/include/linux/topology.h +++ b/include/linux/topology.h @@ -188,7 +188,7 @@ DECLARE_PER_CPU(int, numa_node); /* Returns the number of the current Node. */ static inline int numa_node_id(void) { - return __this_cpu_read(numa_node); + return raw_cpu_read(numa_node); } #endif @@ -245,7 +245,7 @@ static inline void set_numa_mem(int node) /* Returns the number of the nearest Node with memory */ static inline int numa_mem_id(void) { - return __this_cpu_read(_numa_mem_); + return raw_cpu_read(_numa_mem_); } #endif -- cgit v1.2.3 From 293b6a4c875c3b49853bff7de99954f49f59aa75 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 7 Apr 2014 15:39:43 -0700 Subject: vmstat: use raw_cpu_ops to avoid false positives on preemption checks vm counters are allowed to be racy. Use raw_cpu_ops to avoid the local_irq_disable overhead and to avoid preemption checks which will be added to the __this_cpu operations. [akpm@linux-foundation.org: Add comment. Again.] Signed-off-by: Christoph Lameter Reported-by: Sergey Senozhatsky Cc: Dave Chinner Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/vmstat.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index ea4476157e00..45c9cd1daf7a 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -27,9 +27,13 @@ struct vm_event_state { DECLARE_PER_CPU(struct vm_event_state, vm_event_states); +/* + * vm counters are allowed to be racy. Use raw_cpu_ops to avoid the + * local_irq_disable overhead. + */ static inline void __count_vm_event(enum vm_event_item item) { - __this_cpu_inc(vm_event_states.event[item]); + raw_cpu_inc(vm_event_states.event[item]); } static inline void count_vm_event(enum vm_event_item item) @@ -39,7 +43,7 @@ static inline void count_vm_event(enum vm_event_item item) static inline void __count_vm_events(enum vm_event_item item, long delta) { - __this_cpu_add(vm_event_states.event[item], delta); + raw_cpu_add(vm_event_states.event[item], delta); } static inline void count_vm_events(enum vm_event_item item, long delta) -- cgit v1.2.3 From 188a81409ff7de1c5aae947a96356ddd8ff4aaa3 Mon Sep 17 00:00:00 2001 From: Christoph Lameter Date: Mon, 7 Apr 2014 15:39:44 -0700 Subject: percpu: add preemption checks to __this_cpu ops We define a check function in order to avoid trouble with the include files. Then the higher level __this_cpu macros are modified to invoke the preemption check. [akpm@linux-foundation.org: coding-style fixes] Signed-off-by: Christoph Lameter Acked-by: Ingo Molnar Cc: Tejun Heo Tested-by: Grygorii Strashko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/percpu.h | 39 +++++++++++++++++++++++++++++---------- 1 file changed, 29 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/percpu.h b/include/linux/percpu.h index 4e4d2afcc0c7..e7a0b95ed527 100644 --- a/include/linux/percpu.h +++ b/include/linux/percpu.h @@ -173,6 +173,12 @@ extern phys_addr_t per_cpu_ptr_to_phys(void *addr); extern void __bad_size_call_parameter(void); +#ifdef CONFIG_DEBUG_PREEMPT +extern void __this_cpu_preempt_check(const char *op); +#else +static inline void __this_cpu_preempt_check(const char *op) { } +#endif + #define __pcpu_size_call_return(stem, variable) \ ({ typeof(variable) pscr_ret__; \ __verify_pcpu_ptr(&(variable)); \ @@ -725,18 +731,24 @@ do { \ /* * Generic percpu operations for context that are safe from preemption/interrupts. - * Checks will be added here soon. */ #ifndef __this_cpu_read -# define __this_cpu_read(pcp) __pcpu_size_call_return(raw_cpu_read_, (pcp)) +# define __this_cpu_read(pcp) \ + (__this_cpu_preempt_check("read"),__pcpu_size_call_return(raw_cpu_read_, (pcp))) #endif #ifndef __this_cpu_write -# define __this_cpu_write(pcp, val) __pcpu_size_call(raw_cpu_write_, (pcp), (val)) +# define __this_cpu_write(pcp, val) \ +do { __this_cpu_preempt_check("write"); \ + __pcpu_size_call(raw_cpu_write_, (pcp), (val)); \ +} while (0) #endif #ifndef __this_cpu_add -# define __this_cpu_add(pcp, val) __pcpu_size_call(raw_cpu_add_, (pcp), (val)) +# define __this_cpu_add(pcp, val) \ +do { __this_cpu_preempt_check("add"); \ + __pcpu_size_call(raw_cpu_add_, (pcp), (val)); \ +} while (0) #endif #ifndef __this_cpu_sub @@ -752,16 +764,23 @@ do { \ #endif #ifndef __this_cpu_and -# define __this_cpu_and(pcp, val) __pcpu_size_call(raw_cpu_and_, (pcp), (val)) +# define __this_cpu_and(pcp, val) \ +do { __this_cpu_preempt_check("and"); \ + __pcpu_size_call(raw_cpu_and_, (pcp), (val)); \ +} while (0) + #endif #ifndef __this_cpu_or -# define __this_cpu_or(pcp, val) __pcpu_size_call(raw_cpu_or_, (pcp), (val)) +# define __this_cpu_or(pcp, val) \ +do { __this_cpu_preempt_check("or"); \ + __pcpu_size_call(raw_cpu_or_, (pcp), (val)); \ +} while (0) #endif #ifndef __this_cpu_add_return # define __this_cpu_add_return(pcp, val) \ - __pcpu_size_call_return2(raw_cpu_add_return_, pcp, val) + (__this_cpu_preempt_check("add_return"),__pcpu_size_call_return2(raw_cpu_add_return_, pcp, val)) #endif #define __this_cpu_sub_return(pcp, val) __this_cpu_add_return(pcp, -(typeof(pcp))(val)) @@ -770,17 +789,17 @@ do { \ #ifndef __this_cpu_xchg # define __this_cpu_xchg(pcp, nval) \ - __pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval) + (__this_cpu_preempt_check("xchg"),__pcpu_size_call_return2(raw_cpu_xchg_, (pcp), nval)) #endif #ifndef __this_cpu_cmpxchg # define __this_cpu_cmpxchg(pcp, oval, nval) \ - __pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval) + (__this_cpu_preempt_check("cmpxchg"),__pcpu_size_call_return2(raw_cpu_cmpxchg_, pcp, oval, nval)) #endif #ifndef __this_cpu_cmpxchg_double # define __this_cpu_cmpxchg_double(pcp1, pcp2, oval1, oval2, nval1, nval2) \ - __pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2)) + (__this_cpu_preempt_check("cmpxchg_double"),__pcpu_double_call_return_bool(raw_cpu_cmpxchg_double_, (pcp1), (pcp2), (oval1), (oval2), (nval1), (nval2))) #endif #endif /* __LINUX_PERCPU_H */ -- cgit v1.2.3 From 64b47e8fdb40a0d46e8cf458dd3e24f8afa073f6 Mon Sep 17 00:00:00 2001 From: Josh Triplett Date: Mon, 7 Apr 2014 15:39:45 -0700 Subject: lglock: map to spinlock when !CONFIG_SMP When the system has only one CPU, lglock is effectively a spinlock; map it directly to spinlock to eliminate the indirection and duplicate code. In addition to removing overhead, this drops 1.6k of code with a defconfig modified to have !CONFIG_SMP, and 1.1k with a minimal config. Signed-off-by: Josh Triplett Cc: Rusty Russell Cc: Michal Marek Cc: Thomas Gleixner Cc: David Howells Cc: "H. Peter Anvin" Cc: Nick Piggin Cc: Peter Zijlstra Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/lglock.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lglock.h b/include/linux/lglock.h index 96549abe8842..0081f000e34b 100644 --- a/include/linux/lglock.h +++ b/include/linux/lglock.h @@ -25,6 +25,8 @@ #include #include +#ifdef CONFIG_SMP + #ifdef CONFIG_DEBUG_LOCK_ALLOC #define LOCKDEP_INIT_MAP lockdep_init_map #else @@ -57,4 +59,18 @@ void lg_local_unlock_cpu(struct lglock *lg, int cpu); void lg_global_lock(struct lglock *lg); void lg_global_unlock(struct lglock *lg); +#else +/* When !CONFIG_SMP, map lglock to spinlock */ +#define lglock spinlock +#define DEFINE_LGLOCK(name) DEFINE_SPINLOCK(name) +#define DEFINE_STATIC_LGLOCK(name) static DEFINE_SPINLOCK(name) +#define lg_lock_init(lg, name) spin_lock_init(lg) +#define lg_local_lock spin_lock +#define lg_local_unlock spin_unlock +#define lg_local_lock_cpu(lg, cpu) spin_lock(lg) +#define lg_local_unlock_cpu(lg, cpu) spin_unlock(lg) +#define lg_global_lock spin_lock +#define lg_global_unlock spin_unlock +#endif + #endif -- cgit v1.2.3 From 5fb6b953bb7aa86a9c8ea760934982cedc45c52b Mon Sep 17 00:00:00 2001 From: Heiko Carstens Date: Tue, 8 Apr 2014 12:55:46 +0200 Subject: include/linux/syscalls.h: add sys_renameat2() prototype Signed-off-by: Heiko Carstens Signed-off-by: Linus Torvalds --- include/linux/syscalls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 2aa8b749f13d..697ceb70a9a9 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -748,6 +748,9 @@ asmlinkage long sys_linkat(int olddfd, const char __user *oldname, int newdfd, const char __user *newname, int flags); asmlinkage long sys_renameat(int olddfd, const char __user * oldname, int newdfd, const char __user * newname); +asmlinkage long sys_renameat2(int olddfd, const char __user *oldname, + int newdfd, const char __user *newname, + unsigned int flags); asmlinkage long sys_futimesat(int dfd, const char __user *filename, struct timeval __user *utimes); asmlinkage long sys_faccessat(int dfd, const char __user *filename, int mode); -- cgit v1.2.3 From de7b2973903c6cc50b31ee5682a69b2219b9919d Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Tue, 8 Apr 2014 17:26:21 -0400 Subject: tracepoint: Use struct pointer instead of name hash for reg/unreg tracepoints Register/unregister tracepoint probes with struct tracepoint pointer rather than tracepoint name. This change, which vastly simplifies tracepoint.c, has been proposed by Steven Rostedt. It also removes 8.8kB (mostly of text) to the vmlinux size. From this point on, the tracers need to pass a struct tracepoint pointer to probe register/unregister. A probe can now only be connected to a tracepoint that exists. Moreover, tracers are responsible for unregistering the probe before the module containing its associated tracepoint is unloaded. text data bss dec hex filename 10443444 4282528 10391552 25117524 17f4354 vmlinux.orig 10434930 4282848 10391552 25109330 17f2352 vmlinux Link: http://lkml.kernel.org/r/1396992381-23785-2-git-send-email-mathieu.desnoyers@efficios.com CC: Ingo Molnar CC: Frederic Weisbecker CC: Andrew Morton CC: Frank Ch. Eigler CC: Johannes Berg Signed-off-by: Mathieu Desnoyers [ SDR - fixed return val in void func in tracepoint_module_going() ] Signed-off-by: Steven Rostedt --- include/linux/ftrace_event.h | 22 ++++++++++++++++++++-- include/linux/tracepoint.h | 41 +++++++++++++++++++++++++---------------- 2 files changed, 45 insertions(+), 18 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ftrace_event.h b/include/linux/ftrace_event.h index cdc30111d2f8..d16da3e53bc7 100644 --- a/include/linux/ftrace_event.h +++ b/include/linux/ftrace_event.h @@ -7,6 +7,7 @@ #include #include #include +#include struct trace_array; struct trace_buffer; @@ -232,6 +233,7 @@ enum { TRACE_EVENT_FL_IGNORE_ENABLE_BIT, TRACE_EVENT_FL_WAS_ENABLED_BIT, TRACE_EVENT_FL_USE_CALL_FILTER_BIT, + TRACE_EVENT_FL_TRACEPOINT_BIT, }; /* @@ -244,6 +246,7 @@ enum { * (used for module unloading, if a module event is enabled, * it is best to clear the buffers that used it). * USE_CALL_FILTER - For ftrace internal events, don't use file filter + * TRACEPOINT - Event is a tracepoint */ enum { TRACE_EVENT_FL_FILTERED = (1 << TRACE_EVENT_FL_FILTERED_BIT), @@ -252,12 +255,17 @@ enum { TRACE_EVENT_FL_IGNORE_ENABLE = (1 << TRACE_EVENT_FL_IGNORE_ENABLE_BIT), TRACE_EVENT_FL_WAS_ENABLED = (1 << TRACE_EVENT_FL_WAS_ENABLED_BIT), TRACE_EVENT_FL_USE_CALL_FILTER = (1 << TRACE_EVENT_FL_USE_CALL_FILTER_BIT), + TRACE_EVENT_FL_TRACEPOINT = (1 << TRACE_EVENT_FL_TRACEPOINT_BIT), }; struct ftrace_event_call { struct list_head list; struct ftrace_event_class *class; - char *name; + union { + char *name; + /* Set TRACE_EVENT_FL_TRACEPOINT flag when using "tp" */ + struct tracepoint *tp; + }; struct trace_event event; const char *print_fmt; struct event_filter *filter; @@ -271,6 +279,7 @@ struct ftrace_event_call { * bit 3: ftrace internal event (do not enable) * bit 4: Event was enabled by module * bit 5: use call filter rather than file filter + * bit 6: Event is a tracepoint */ int flags; /* static flags of different events */ @@ -283,6 +292,15 @@ struct ftrace_event_call { #endif }; +static inline const char * +ftrace_event_name(struct ftrace_event_call *call) +{ + if (call->flags & TRACE_EVENT_FL_TRACEPOINT) + return call->tp ? call->tp->name : NULL; + else + return call->name; +} + struct trace_array; struct ftrace_subsystem_dir; @@ -353,7 +371,7 @@ struct ftrace_event_file { #define __TRACE_EVENT_FLAGS(name, value) \ static int __init trace_init_flags_##name(void) \ { \ - event_##name.flags = value; \ + event_##name.flags |= value; \ return 0; \ } \ early_initcall(trace_init_flags_##name); diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 812b2553dfd8..08150e265761 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -6,7 +6,7 @@ * * See Documentation/trace/tracepoints.txt. * - * (C) Copyright 2008 Mathieu Desnoyers + * Copyright (C) 2008-2014 Mathieu Desnoyers * * Heavily inspired from the Linux Kernel Markers. * @@ -21,6 +21,7 @@ struct module; struct tracepoint; +struct notifier_block; struct tracepoint_func { void *func; @@ -35,18 +36,13 @@ struct tracepoint { struct tracepoint_func __rcu *funcs; }; -/* - * Connect a probe to a tracepoint. - * Internal API, should not be used directly. - */ -extern int tracepoint_probe_register(const char *name, void *probe, void *data); - -/* - * Disconnect a probe from a tracepoint. - * Internal API, should not be used directly. - */ extern int -tracepoint_probe_unregister(const char *name, void *probe, void *data); +tracepoint_probe_register(struct tracepoint *tp, void *probe, void *data); +extern int +tracepoint_probe_unregister(struct tracepoint *tp, void *probe, void *data); +extern void +for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), + void *priv); #ifdef CONFIG_MODULES struct tp_module { @@ -54,12 +50,25 @@ struct tp_module { unsigned int num_tracepoints; struct tracepoint * const *tracepoints_ptrs; }; + bool trace_module_has_bad_taint(struct module *mod); +extern int register_tracepoint_module_notifier(struct notifier_block *nb); +extern int unregister_tracepoint_module_notifier(struct notifier_block *nb); #else static inline bool trace_module_has_bad_taint(struct module *mod) { return false; } +static inline +int register_tracepoint_module_notifier(struct notifier_block *nb) +{ + return 0; +} +static inline +int unregister_tracepoint_module_notifier(struct notifier_block *nb) +{ + return 0; +} #endif /* CONFIG_MODULES */ /* @@ -160,14 +169,14 @@ static inline void tracepoint_synchronize_unregister(void) static inline int \ register_trace_##name(void (*probe)(data_proto), void *data) \ { \ - return tracepoint_probe_register(#name, (void *)probe, \ - data); \ + return tracepoint_probe_register(&__tracepoint_##name, \ + (void *)probe, data); \ } \ static inline int \ unregister_trace_##name(void (*probe)(data_proto), void *data) \ { \ - return tracepoint_probe_unregister(#name, (void *)probe, \ - data); \ + return tracepoint_probe_unregister(&__tracepoint_##name,\ + (void *)probe, data); \ } \ static inline void \ check_trace_callback_type_##name(void (*cb)(data_proto)) \ -- cgit v1.2.3 From eb7d035c59431bb12e1aa6e69ddd3940352faddb Mon Sep 17 00:00:00 2001 From: "Steven Rostedt (Red Hat)" Date: Tue, 8 Apr 2014 20:09:40 -0400 Subject: tracepoint: Simplify tracepoint module search Instead of copying the num_tracepoints and tracepoints_ptrs from the module structure to the tp_mod structure, which only uses it to find the module associated to tracepoints of modules that are coming and going, simply copy the pointer to the module struct to the tracepoint tp_module structure. Also removed un-needed brackets around an if statement. Link: http://lkml.kernel.org/r/20140408201705.4dad2c4a@gandalf.local.home Acked-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 08150e265761..69a298b07357 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -47,8 +47,7 @@ for_each_kernel_tracepoint(void (*fct)(struct tracepoint *tp, void *priv), #ifdef CONFIG_MODULES struct tp_module { struct list_head list; - unsigned int num_tracepoints; - struct tracepoint * const *tracepoints_ptrs; + struct module *mod; }; bool trace_module_has_bad_taint(struct module *mod); -- cgit v1.2.3 From b725dfea24b89de672c055b34b22398283a3f4bc Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 9 Apr 2014 09:24:43 -0400 Subject: tracepoint: Fix sparse warnings in tracepoint.c Fix the following sparse warnings: CHECK kernel/tracepoint.c kernel/tracepoint.c:184:18: warning: incorrect type in assignment (different address spaces) kernel/tracepoint.c:184:18: expected struct tracepoint_func *tp_funcs kernel/tracepoint.c:184:18: got struct tracepoint_func [noderef] *funcs kernel/tracepoint.c:216:18: warning: incorrect type in assignment (different address spaces) kernel/tracepoint.c:216:18: expected struct tracepoint_func *tp_funcs kernel/tracepoint.c:216:18: got struct tracepoint_func [noderef] *funcs kernel/tracepoint.c:392:24: error: return expression in void function CC kernel/tracepoint.o kernel/tracepoint.c: In function tracepoint_module_going: kernel/tracepoint.c:491:6: warning: symbol 'syscall_regfunc' was not declared. Should it be static? kernel/tracepoint.c:508:6: warning: symbol 'syscall_unregfunc' was not declared. Should it be static? Link: http://lkml.kernel.org/r/1397049883-28692-1-git-send-email-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/tracepoint.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/tracepoint.h b/include/linux/tracepoint.h index 69a298b07357..9d30ee469c2a 100644 --- a/include/linux/tracepoint.h +++ b/include/linux/tracepoint.h @@ -80,6 +80,11 @@ static inline void tracepoint_synchronize_unregister(void) synchronize_sched(); } +#ifdef CONFIG_HAVE_SYSCALL_TRACEPOINTS +extern void syscall_regfunc(void); +extern void syscall_unregfunc(void); +#endif /* CONFIG_HAVE_SYSCALL_TRACEPOINTS */ + #define PARAMS(args...) args #endif /* _LINUX_TRACEPOINT_H */ -- cgit v1.2.3 From 565cbdc2fec92ef2ae75995e06e69172ed2edecd Mon Sep 17 00:00:00 2001 From: Mark Charlebois Date: Tue, 20 Nov 2012 22:13:10 +0100 Subject: LLVMLinux: Add support for clang to compiler.h and new compiler-clang.h Add a compiler-clang.h file to add specific macros needed for compiling the kernel with clang. Initially the only override required is the macro for silencing the compiler for a purposefully uninintialized variable. Author: Mark Charlebois Signed-off-by: Mark Charlebois Signed-off-by: Behan Webster --- include/linux/compiler-clang.h | 12 ++++++++++++ include/linux/compiler.h | 7 +++++++ 2 files changed, 19 insertions(+) create mode 100644 include/linux/compiler-clang.h (limited to 'include/linux') diff --git a/include/linux/compiler-clang.h b/include/linux/compiler-clang.h new file mode 100644 index 000000000000..d1e49d52b640 --- /dev/null +++ b/include/linux/compiler-clang.h @@ -0,0 +1,12 @@ +#ifndef __LINUX_COMPILER_H +#error "Please don't include directly, include instead." +#endif + +/* Some compiler specific definitions are overwritten here + * for Clang compiler + */ + +#ifdef uninitialized_var +#undef uninitialized_var +#define uninitialized_var(x) x = *(&(x)) +#endif diff --git a/include/linux/compiler.h b/include/linux/compiler.h index 2472740d7ab2..ee7239ea1583 100644 --- a/include/linux/compiler.h +++ b/include/linux/compiler.h @@ -63,6 +63,13 @@ extern void __chk_io_ptr(const volatile void __iomem *); # include #endif +/* Clang compiler defines __GNUC__. So we will overwrite implementations + * coming from above header files here + */ +#ifdef __clang__ +#include +#endif + /* * Generic compiler-dependent macros required for kernel * build go below this comment. Actual compiler/compiler version -- cgit v1.2.3 From c4586256f0c440bc2bdb29d2cbb915f0ca785d26 Mon Sep 17 00:00:00 2001 From: Behan Webster Date: Thu, 13 Feb 2014 12:21:48 -0800 Subject: x86: LLVMLinux: Fix "incomplete type const struct x86cpu_device_id" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Similar to the fix in 40413dcb7b273bda681dca38e6ff0bbb3728ef11 MODULE_DEVICE_TABLE(x86cpu, ...) expects the struct to be called struct x86cpu_device_id, and not struct x86_cpu_id which is what is used in the rest of the kernel code. Although gcc seems to ignore this error, clang fails without this define to fix the name. Code from drivers/thermal/x86_pkg_temp_thermal.c static const struct x86_cpu_id __initconst pkg_temp_thermal_ids[] = { ... }; MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids); Error from clang: drivers/thermal/x86_pkg_temp_thermal.c:577:1: error: variable has incomplete type 'const struct x86cpu_device_id' MODULE_DEVICE_TABLE(x86cpu, pkg_temp_thermal_ids); ^ include/linux/module.h:145:3: note: expanded from macro 'MODULE_DEVICE_TABLE' MODULE_GENERIC_TABLE(type##_device, name) ^ include/linux/module.h:87:32: note: expanded from macro 'MODULE_GENERIC_TABLE' extern const struct gtype##_id __mod_##gtype##_table \ ^ :143:1: note: expanded from here __mod_x86cpu_device_table ^ drivers/thermal/x86_pkg_temp_thermal.c:577:1: note: forward declaration of 'struct x86cpu_device_id' include/linux/module.h:145:3: note: expanded from macro 'MODULE_DEVICE_TABLE' MODULE_GENERIC_TABLE(type##_device, name) ^ include/linux/module.h:87:21: note: expanded from macro 'MODULE_GENERIC_TABLE' extern const struct gtype##_id __mod_##gtype##_table \ ^ :141:1: note: expanded from here x86cpu_device_id ^ 1 error generated. Signed-off-by: Behan Webster Signed-off-by: Jan-Simon Möller Acked-by: Greg Kroah-Hartman --- include/linux/mod_devicetable.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mod_devicetable.h b/include/linux/mod_devicetable.h index 9a165a213d93..44eeef0da186 100644 --- a/include/linux/mod_devicetable.h +++ b/include/linux/mod_devicetable.h @@ -556,6 +556,11 @@ struct amba_id { * See documentation of "x86_match_cpu" for details. */ +/* + * MODULE_DEVICE_TABLE expects this struct to be called x86cpu_device_id. + * Although gcc seems to ignore this error, clang fails without this define. + */ +#define x86cpu_device_id x86_cpu_id struct x86_cpu_id { __u16 vendor; __u16 family; -- cgit v1.2.3 From abb43f6998eb6466ea392d3757e673bbdb6ae171 Mon Sep 17 00:00:00 2001 From: Mathieu Desnoyers Date: Wed, 9 Apr 2014 17:06:08 -0400 Subject: tracing: Fix anonymous unions in struct ftrace_event_call gcc <= 4.5.x has significant limitations with respect to initialization of anonymous unions within structures. They need to be surrounded by brackets, _and_ they need to be initialized in the same order in which they appear in the structure declaration. Link: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=10676 Link: http://lkml.kernel.org/r/1397077568-3156-1-git-send-email-mathieu.desnoyers@efficios.com Signed-off-by: Mathieu Desnoyers Signed-off-by: Steven Rostedt --- include/linux/syscalls.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/syscalls.h b/include/linux/syscalls.h index 1e67b7a5968c..af94c98087c4 100644 --- a/include/linux/syscalls.h +++ b/include/linux/syscalls.h @@ -119,8 +119,10 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call __used \ event_enter_##sname = { \ - .name = "sys_enter"#sname, \ .class = &event_class_syscall_enter, \ + { \ + .name = "sys_enter"#sname, \ + }, \ .event.funcs = &enter_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ .flags = TRACE_EVENT_FL_CAP_ANY, \ @@ -133,8 +135,10 @@ extern struct trace_event_functions exit_syscall_print_funcs; static struct syscall_metadata __syscall_meta_##sname; \ static struct ftrace_event_call __used \ event_exit_##sname = { \ - .name = "sys_exit"#sname, \ .class = &event_class_syscall_exit, \ + { \ + .name = "sys_exit"#sname, \ + }, \ .event.funcs = &exit_syscall_print_funcs, \ .data = (void *)&__syscall_meta_##sname,\ .flags = TRACE_EVENT_FL_CAP_ANY, \ -- cgit v1.2.3 From 360f92c2443073143467a0088daffec96a17910b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Apr 2014 20:27:01 -0600 Subject: block: fix regression with block enabled tagging Martin reported that his test system would not boot with current git, it oopsed with this: BUG: unable to handle kernel paging request at ffff88046c6c9e80 IP: [] blk_queue_start_tag+0x90/0x150 PGD 1ddf067 PUD 1de2067 PMD 47fc7d067 PTE 800000046c6c9060 Oops: 0002 [#1] SMP DEBUG_PAGEALLOC Modules linked in: sd_mod lpfc(+) scsi_transport_fc scsi_tgt oracleasm rpcsec_gss_krb5 ipv6 igb dca i2c_algo_bit i2c_core hwmon CPU: 3 PID: 87 Comm: kworker/u17:1 Not tainted 3.14.0+ #246 Hardware name: Supermicro X9DRX+-F/X9DRX+-F, BIOS 3.00 07/09/2013 Workqueue: events_unbound async_run_entry_fn task: ffff8802743c2150 ti: ffff880273d02000 task.ti: ffff880273d02000 RIP: 0010:[] [] blk_queue_start_tag+0x90/0x150 RSP: 0018:ffff880273d03a58 EFLAGS: 00010092 RAX: ffff88046c6c9e78 RBX: ffff880077208e78 RCX: 00000000fffc8da6 RDX: 00000000fffc186d RSI: 0000000000000009 RDI: 00000000fffc8d9d RBP: ffff880273d03a88 R08: 0000000000000001 R09: ffff8800021c2410 R10: 0000000000000005 R11: 0000000000015b30 R12: ffff88046c5bb8a0 R13: ffff88046c5c0890 R14: 000000000000001e R15: 000000000000001e FS: 0000000000000000(0000) GS:ffff880277b00000(0000) knlGS:0000000000000000 CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 CR2: ffff88046c6c9e80 CR3: 00000000018f6000 CR4: 00000000000407e0 Stack: ffff880273d03a98 ffff880474b18800 0000000000000000 ffff880474157000 ffff88046c5c0890 ffff880077208e78 ffff880273d03ae8 ffffffff813b9e62 ffff880200000010 ffff880474b18968 ffff880474b18848 ffff88046c5c0cd8 Call Trace: [] scsi_request_fn+0xf2/0x510 [] __blk_run_queue+0x37/0x50 [] blk_execute_rq_nowait+0xb3/0x130 [] blk_execute_rq+0x64/0xf0 [] ? bit_waitqueue+0xd0/0xd0 [] scsi_execute+0xe5/0x180 [] scsi_execute_req_flags+0x9a/0x110 [] sd_spinup_disk+0x94/0x460 [sd_mod] [] ? __unmap_hugepage_range+0x200/0x2f0 [] sd_revalidate_disk+0xaa/0x3f0 [sd_mod] [] sd_probe_async+0xd8/0x200 [sd_mod] [] async_run_entry_fn+0x3f/0x140 [] process_one_work+0x175/0x410 [] worker_thread+0x123/0x400 [] ? manage_workers+0x160/0x160 [] kthread+0xce/0xf0 [] ? kthread_freezable_should_stop+0x70/0x70 [] ret_from_fork+0x7c/0xb0 [] ? kthread_freezable_should_stop+0x70/0x70 Code: 48 0f ab 11 72 db 48 81 4b 40 00 00 10 00 89 83 08 01 00 00 48 89 df 49 8b 04 24 48 89 1c d0 e8 f7 a8 ff ff 49 8b 85 28 05 00 00 <48> 89 58 08 48 89 03 49 8d 85 28 05 00 00 48 89 43 08 49 89 9d RIP [] blk_queue_start_tag+0x90/0x150 RSP CR2: ffff88046c6c9e80 Martin bisected and found this to be the problem patch; commit 6d113398dcf4dfcd9787a4ead738b186f7b7ff0f Author: Jan Kara Date: Mon Feb 24 16:39:54 2014 +0100 block: Stop abusing rq->csd.list in blk-softirq and the problem was immediately apparent. The patch states that it is safe to reuse queuelist at completion time, since it is no longer used. However, that is not true if a device is using block enabled tagging. If that is the case, then the queuelist is reused to keep track of busy tags. If a device also ended up using softirq completions, we'd reuse ->queuelist for the IPI handling while block tagging was still using it. Boom. Fix this by adding a new ipi_list list head, and share the memory used with the request hash table. The hash table is never used after the request is moved to the dispatch list, which happens long before any potential completion of the request. Add a new request bit for this, so we don't have cases that check rq->hash while it could potentially have been reused for the IPI completion. Reported-by: Martin K. Petersen Tested-by: Benjamin Herrenschmidt Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ include/linux/blkdev.h | 13 ++++++++++++- 2 files changed, 14 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index bbc3a6c88fce..aa0eaa2d0bd8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -189,6 +189,7 @@ enum rq_flag_bits { __REQ_KERNEL, /* direct IO to kernel pages */ __REQ_PM, /* runtime pm request */ __REQ_END, /* last of chain of requests */ + __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_NR_BITS, /* stops here */ }; @@ -241,5 +242,6 @@ enum rq_flag_bits { #define REQ_KERNEL (1ULL << __REQ_KERNEL) #define REQ_PM (1ULL << __REQ_PM) #define REQ_END (1ULL << __REQ_END) +#define REQ_HASHED (1ULL << __REQ_HASHED) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1e1fa3f93d5f..99617cf7dd1a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -118,7 +118,18 @@ struct request { struct bio *bio; struct bio *biotail; - struct hlist_node hash; /* merge hash */ + /* + * The hash is used inside the scheduler, and killed once the + * request reaches the dispatch list. The ipi_list is only used + * to queue the request for softirq completion, which is long + * after the request has been unhashed (and even removed from + * the dispatch list). + */ + union { + struct hlist_node hash; /* merge hash */ + struct list_head ipi_list; + }; + /* * The rb_node is only used inside the io scheduler, requests * are pruned when moved to the dispatch queue. So let the -- cgit v1.2.3 From 42f614201e80ff4cfb8b285d7190149a8e1e6cec Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 24 Mar 2014 10:46:25 -0600 Subject: NVMe: per-cpu io queues The device's IO queues are associated with CPUs, so we can use a per-cpu variable to map the a qid to a cpu. This provides a convienient way to optimally assign queues to multiple cpus when the device supports fewer queues than the host has cpus. The previous implementation may have assigned these poorly in these situations. This patch addresses this by sharing queues among cpus that are "close" together and should have a lower lock contention penalty. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7c3f85bc10f1..f0f95c719685 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -74,12 +74,16 @@ enum { struct nvme_dev { struct list_head node; struct nvme_queue __rcu **queues; + unsigned short __percpu *io_queue; u32 __iomem *dbs; struct pci_dev *pci_dev; struct dma_pool *prp_page_pool; struct dma_pool *prp_small_pool; int instance; - int queue_count; + unsigned queue_count; + unsigned online_queues; + unsigned max_qid; + int q_depth; u32 db_stride; u32 ctrl_config; struct msix_entry *entry; -- cgit v1.2.3 From 33b1e95c90447ea73e37e837ea0268a894919f19 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 24 Mar 2014 10:46:26 -0600 Subject: NVMe: CPU hot plug notification Registers with hot cpu notification to rebalance, and potentially allocate additional, io queues. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index f0f95c719685..15d071eba8b8 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -92,6 +92,7 @@ struct nvme_dev { struct kref kref; struct miscdevice miscdev; struct work_struct reset_work; + struct notifier_block nb; char name[12]; char serial[20]; char model[40]; -- cgit v1.2.3 From b355084a891985d4cd0ca23b1a83366af2c4232d Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Fri, 4 Apr 2014 11:43:36 -0600 Subject: NVMe: Make I/O timeout a module parameter Increase the default timeout to 30 seconds to match SCSI. Signed-off-by: Keith Busch [use byte instead of ushort] Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 15d071eba8b8..1da0807c65bc 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -66,7 +66,8 @@ enum { #define NVME_VS(major, minor) (major << 16 | minor) -#define NVME_IO_TIMEOUT (5 * HZ) +extern unsigned char io_timeout; +#define NVME_IO_TIMEOUT (io_timeout * HZ) /* * Represents an NVM Express device. Each nvme_dev is a PCI function. -- cgit v1.2.3 From edd10d33283899fb15d99a290dcc9ceb3604ca78 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Thu, 3 Apr 2014 16:45:23 -0600 Subject: NVMe: Retry failed commands with non-fatal errors For commands returned with failed status, queue these for resubmission and continue retrying them until success or for a limited amount of time. The final timeout was arbitrarily chosen so requests can't be retried indefinitely. Since these are requeued on the nvmeq that submitted the command, the callbacks have to take an nvmeq instead of an nvme_dev as a parameter so that we can use the locked queue to append the iod to retry later. The nvme_iod conviently can be used to track how long we've been trying to successfully complete an iod request. The nvme_iod also provides the nvme prp dma mappings, so I had to move a few things around so we can keep those mappings. Signed-off-by: Keith Busch [fixed checkpatch issue with long line] Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 1da0807c65bc..b95431d0338b 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -136,6 +136,7 @@ struct nvme_iod { int length; /* Of data, in bytes */ unsigned long start_time; dma_addr_t first_dma; + struct list_head node; struct scatterlist sg[0]; }; @@ -151,8 +152,7 @@ static inline u64 nvme_block_nr(struct nvme_ns *ns, sector_t sector) */ void nvme_free_iod(struct nvme_dev *dev, struct nvme_iod *iod); -int nvme_setup_prps(struct nvme_dev *dev, struct nvme_common_command *cmd, - struct nvme_iod *iod, int total_len, gfp_t gfp); +int nvme_setup_prps(struct nvme_dev *, struct nvme_iod *, int , gfp_t); struct nvme_iod *nvme_map_user_pages(struct nvme_dev *dev, int write, unsigned long addr, unsigned length); void nvme_unmap_user_pages(struct nvme_dev *dev, int write, -- cgit v1.2.3 From 312103d64d0fcadb332899a2c84b357ddb18f4e3 Mon Sep 17 00:00:00 2001 From: Chris Metcalf Date: Tue, 25 Mar 2014 09:25:41 -0400 Subject: AUDIT: make audit_is_compat depend on CONFIG_AUDIT_COMPAT_GENERIC On systems with CONFIG_COMPAT we introduced the new requirement that audit_classify_compat_syscall() exists. This wasn't true for everything (apparently not for "tilegx", which I know less that nothing about.) Instead of wrapping the preprocessor optomization with CONFIG_COMPAT we should have used the new CONFIG_AUDIT_COMPAT_GENERIC. This patch uses that config option to make sure only arches which intend to implement this have the requirement. This works fine for tilegx according to Chris Metcalf Signed-off-by: Eric Paris --- include/linux/audit.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/audit.h b/include/linux/audit.h index 611a59a56f1a..22cfddb75566 100644 --- a/include/linux/audit.h +++ b/include/linux/audit.h @@ -102,7 +102,7 @@ struct filename; extern void audit_log_session_info(struct audit_buffer *ab); -#ifdef CONFIG_COMPAT +#ifdef CONFIG_AUDIT_COMPAT_GENERIC #define audit_is_compat(arch) (!((arch) & __AUDIT_ARCH_64BIT)) #else #define audit_is_compat(arch) false -- cgit v1.2.3 From 34bf6ef94a835a8f1d8abd3e7d38c6c08d205867 Mon Sep 17 00:00:00 2001 From: Dave Hansen Date: Tue, 8 Apr 2014 13:44:27 -0700 Subject: mm: slab/slub: use page->list consistently instead of page->lru 'struct page' has two list_head fields: 'lru' and 'list'. Conveniently, they are unioned together. This means that code can use them interchangably, which gets horribly confusing like with this nugget from slab.c: > list_del(&page->lru); > if (page->active == cachep->num) > list_add(&page->list, &n->slabs_full); This patch makes the slab and slub code use page->lru universally instead of mixing ->list and ->lru. So, the new rule is: page->lru is what the you use if you want to keep your page on a list. Don't like the fact that it's not called ->list? Too bad. Signed-off-by: Dave Hansen Acked-by: Christoph Lameter Acked-by: David Rientjes Cc: Pekka Enberg Signed-off-by: Andrew Morton Signed-off-by: Pekka Enberg --- include/linux/mm_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 290901a8c1de..84b74080beb7 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -124,6 +124,8 @@ struct page { union { struct list_head lru; /* Pageout list, eg. active_list * protected by zone->lru_lock ! + * Can be used as a generic list + * by the page owner. */ struct { /* slub per cpu partial pages */ struct page *next; /* Next partial slab */ @@ -136,7 +138,6 @@ struct page { #endif }; - struct list_head list; /* slobs list of pages */ struct slab *slab_page; /* slab fields */ struct rcu_head rcu_head; /* Used by SLAB * when destroying via RCU -- cgit v1.2.3 From 676d23690fb62b5d51ba5d659935e9f7d9da9f8e Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Fri, 11 Apr 2014 16:15:36 -0400 Subject: net: Fix use after free by removing length arg from sk_data_ready callbacks. Several spots in the kernel perform a sequence like: skb_queue_tail(&sk->s_receive_queue, skb); sk->sk_data_ready(sk, skb->len); But at the moment we place the SKB onto the socket receive queue it can be consumed and freed up. So this skb->len access is potentially to freed up memory. Furthermore, the skb->len can be modified by the consumer so it is possible that the value isn't accurate. And finally, no actual implementation of this callback actually uses the length argument. And since nobody actually cared about it's value, lots of call sites pass arbitrary values in such as '0' and even '1'. So just remove the length argument from the callback, that way there is no confusion whatsoever and all of these use-after-free cases get fixed as a side effect. Based upon a patch by Eric Dumazet and his suggestion to audit this issue tree-wide. Signed-off-by: David S. Miller --- include/linux/sunrpc/svcsock.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sunrpc/svcsock.h b/include/linux/sunrpc/svcsock.h index 62fd1b756e99..88f7e1a477fe 100644 --- a/include/linux/sunrpc/svcsock.h +++ b/include/linux/sunrpc/svcsock.h @@ -22,7 +22,7 @@ struct svc_sock { /* We keep the old state_change and data_ready CB's here */ void (*sk_ostate)(struct sock *); - void (*sk_odata)(struct sock *, int bytes); + void (*sk_odata)(struct sock *); void (*sk_owspace)(struct sock *); /* private TCP part */ -- cgit v1.2.3