From b80d5ccca3a012e91ca64a2a0b13049163a6a698 Mon Sep 17 00:00:00 2001 From: Haiyan Hu Date: Tue, 10 Sep 2013 11:25:37 +0800 Subject: NVMe: Avoid shift operation when writing cq head doorbell Changes the type of dev->db_stride to unsigned and changes the value stored there to be 1 << the current value. Then there is less calculation to be done at completion time. Signed-off-by: Haiyan Hu Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 26ebcf41c213..8119a476cb29 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -80,7 +80,7 @@ struct nvme_dev { struct dma_pool *prp_small_pool; int instance; int queue_count; - int db_stride; + u32 db_stride; u32 ctrl_config; struct msix_entry *entry; struct nvme_bar __iomem *bar; -- cgit v1.2.3 From 320a382746e0ab1304476ea7e986a8d416ab99db Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Wed, 23 Oct 2013 13:07:34 -0600 Subject: NVMe: compat SG_IO ioctl For 32-bit versions of sg3-utils running on a 64-bit system. This is mostly a copy from the relevent portions of fs/compat_ioctl.c, with slight modifications for going through block_device_operations. Signed-off-by: Keith Busch Reviewed-by: Vishal Verma [fixed up CONFIG_COMPAT=n build problems] Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8119a476cb29..5bc29197d90e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -165,6 +165,7 @@ int nvme_set_features(struct nvme_dev *dev, unsigned fid, unsigned dword11, struct sg_io_hdr; int nvme_sg_io(struct nvme_ns *ns, struct sg_io_hdr __user *u_hdr); +int nvme_sg_io32(struct nvme_ns *ns, unsigned long arg); int nvme_sg_get_version_num(int __user *ip); #endif /* _LINUX_NVME_H */ -- cgit v1.2.3 From 9a6b94584de1a0467d85b435df9c744c5c45a270 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Dec 2013 13:10:36 -0700 Subject: NVMe: Device resume error handling Adds controller error handling on resume power management. If the device fails to initialize, the device is queued for a reset. If the reset fails, a thread is spawned to remove the pci device. If the device resumes as "busy", the device is responding to admin commands but will not create IO queues. In this case, we need to remove the gendisks and free the IO queues since they can't be used and may be holding bios in their lists. From testing, the dma pools require a pci device so this had to change the pci driver 'remove' to release the dma resources in line with that call instead of after all references to the device are released. Signed-off-by: Keith Busch Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 5bc29197d90e..eed81cc56d7e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -87,6 +87,7 @@ struct nvme_dev { struct list_head namespaces; struct kref kref; struct miscdevice miscdev; + struct work_struct reset_work; char name[12]; char serial[20]; char model[40]; -- cgit v1.2.3 From ec65993443736a5091b68e80ff1734548944a4b8 Mon Sep 17 00:00:00 2001 From: Mel Gorman Date: Tue, 21 Jan 2014 14:33:16 -0800 Subject: mm, x86: Account for TLB flushes only when debugging Bisection between 3.11 and 3.12 fingered commit 9824cf97 ("mm: vmstats: tlb flush counters") to cause overhead problems. The counters are undeniably useful but how often do we really need to debug TLB flush related issues? It does not justify taking the penalty everywhere so make it a debugging option. Signed-off-by: Mel Gorman Tested-by: Davidlohr Bueso Reviewed-by: Rik van Riel Signed-off-by: Andrew Morton Cc: Hugh Dickins Cc: Alex Shi Cc: Linus Torvalds Cc: Peter Zijlstra Link: http://lkml.kernel.org/n/tip-XzxjntugxuwpxXhcrxqqh53b@git.kernel.org Signed-off-by: Ingo Molnar --- include/linux/vm_event_item.h | 4 +++- include/linux/vmstat.h | 8 ++++++++ 2 files changed, 11 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index c557c6d096de..3a712e2e7d76 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -71,12 +71,14 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, THP_ZERO_PAGE_ALLOC, THP_ZERO_PAGE_ALLOC_FAILED, #endif +#ifdef CONFIG_DEBUG_TLBFLUSH #ifdef CONFIG_SMP NR_TLB_REMOTE_FLUSH, /* cpu tried to flush others' tlbs */ NR_TLB_REMOTE_FLUSH_RECEIVED,/* cpu received ipi for flush */ -#endif +#endif /* CONFIG_SMP */ NR_TLB_LOCAL_FLUSH_ALL, NR_TLB_LOCAL_FLUSH_ONE, +#endif /* CONFIG_DEBUG_TLBFLUSH */ NR_VM_EVENT_ITEMS }; diff --git a/include/linux/vmstat.h b/include/linux/vmstat.h index e4b948080d20..80ebba9c2e87 100644 --- a/include/linux/vmstat.h +++ b/include/linux/vmstat.h @@ -83,6 +83,14 @@ static inline void vm_events_fold_cpu(int cpu) #define count_vm_numa_events(x, y) do { (void)(y); } while (0) #endif /* CONFIG_NUMA_BALANCING */ +#ifdef CONFIG_DEBUG_TLBFLUSH +#define count_vm_tlb_event(x) count_vm_event(x) +#define count_vm_tlb_events(x, y) count_vm_events(x, y) +#else +#define count_vm_tlb_event(x) do {} while (0) +#define count_vm_tlb_events(x, y) do { (void)(y); } while (0) +#endif + #define __count_zone_vm_events(item, zone, delta) \ __count_vm_events(item##_NORMAL - ZONE_NORMAL + \ zone_idx(zone), delta) -- cgit v1.2.3 From 6e5f52674ff0756e61a8879f6232b9ac33735cba Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sat, 25 Jan 2014 22:36:13 +0200 Subject: spi: spi.h: clarify the documentation of transfer_one Explicitly note the transfer_one and transfer_one_message are mutually exclusive, to make the text a little more newcomers friendly. Signed-off-by: Baruch Siach Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index a1d4ca290862..ad050f0dd39d 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -287,7 +287,10 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * - return 1 if the transfer is still in progress. When * the driver is finished with this transfer it must * call spi_finalize_current_transfer() so the subsystem - * can issue the next transfer + * can issue the next transfer. Note: transfer_one and + * transfer_one_message are mutually exclusive; when both + * are set, the generic subsystem does not call your + * transfer_one callback. * @unprepare_message: undo any work done by prepare_message(). * @cs_gpios: Array of GPIOs to use as chip select lines; one per CS * number. Any individual value may be -ENOENT for CS lines that -- cgit v1.2.3 From e9305331f64ab9e5210baa4307355635c5e849f4 Mon Sep 17 00:00:00 2001 From: Baruch Siach Date: Sat, 25 Jan 2014 22:36:15 +0200 Subject: spi: correct the transfer_one_message documentation wording The transfer_one_message callback handles messages, not transfers. Signed-off-by: Baruch Siach Signed-off-by: Mark Brown --- include/linux/spi/spi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/spi/spi.h b/include/linux/spi/spi.h index ad050f0dd39d..4203c66d8803 100644 --- a/include/linux/spi/spi.h +++ b/include/linux/spi/spi.h @@ -273,7 +273,7 @@ static inline void spi_unregister_driver(struct spi_driver *sdrv) * message while queuing transfers that arrive in the meantime. When the * driver is finished with this message, it must call * spi_finalize_current_message() so the subsystem can issue the next - * transfer + * message * @unprepare_transfer_hardware: there are currently no more messages on the * queue so the subsystem notifies the driver that it may relax the * hardware by issuing this call -- cgit v1.2.3 From d4b4ff8e28b474fac0fbfa9cfc40f88b9e41e380 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Dec 2013 13:10:37 -0700 Subject: NVMe: Schedule reset for failed controllers Schedules a controller reset when it indicates it has a failed status. If the device does not become ready after a reset, the pci device will be scheduled for removal. Signed-off-by: Keith Busch [fixed checkpatch issue] Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index eed81cc56d7e..117d877e8be5 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -95,6 +95,7 @@ struct nvme_dev { u32 max_hw_sectors; u32 stripe_size; u16 oncs; + u8 initialized; }; /* -- cgit v1.2.3 From c30341dc3c436cf43508cd44cdfbb3810c38c195 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 10 Dec 2013 13:10:38 -0700 Subject: NVMe: Abort timed out commands Send nvme abort command to io requests that have timed out on an initialized device. If the command is not returned after another timeout, schedule the controller for reset. Signed-off-by: Keith Busch [fix endianness issues] Signed-off-by: Matthew Wilcox --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 117d877e8be5..69ae03f6eb15 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -95,6 +95,7 @@ struct nvme_dev { u32 max_hw_sectors; u32 stripe_size; u16 oncs; + u16 abort_limit; u8 initialized; }; -- cgit v1.2.3 From f0276924fa35a3607920a58cf5d878212824b951 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Tue, 31 Dec 2013 11:38:50 +0800 Subject: blk-mq: Don't reserve a tag for flush request Reserving a tag (request) for flush to avoid dead lock is a overkill. A tag is valuable resource. We can track the number of flush requests and disallow having too many pending flush requests allocated. With this patch, blk_mq_alloc_request_pinned() could do a busy nop (but not a dead loop) if too many pending requests are allocated and new flush request is allocated. But this should not be a problem, too many pending flush requests are very rare case. I verified this can fix the deadlock caused by too many pending flush requests. Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 161b23105b1e..1e8f16f65af4 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -36,12 +36,15 @@ struct blk_mq_hw_ctx { struct list_head page_list; struct blk_mq_tags *tags; + atomic_t pending_flush; + unsigned long queued; unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 10 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; unsigned int queue_depth; + unsigned int reserved_tags; unsigned int numa_node; unsigned int cmd_size; /* per-request extra data */ -- cgit v1.2.3 From 0ae89beb283a0db5980d1d4781c7d7be2f2810d6 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Thu, 30 Jan 2014 10:11:28 +0100 Subject: can: add destructor for self generated skbs Self generated skbuffs in net/can/bcm.c are setting a skb->sk reference but no explicit destructor which is enforced since Linux 3.11 with commit 376c7311bdb6 (net: add a temporary sanity check in skb_orphan()). This patch adds some helper functions to make sure that a destructor is properly defined when a sock reference is assigned to a CAN related skb. To create an unshared skb owned by the original sock a common helper function has been introduced to replace open coded functions to create CAN echo skbs. Signed-off-by: Oliver Hartkopp Tested-by: Andre Naujoks Reviewed-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/can/skb.h | 38 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 38 insertions(+) (limited to 'include/linux') diff --git a/include/linux/can/skb.h b/include/linux/can/skb.h index 2f0543f7510c..f9bbbb472663 100644 --- a/include/linux/can/skb.h +++ b/include/linux/can/skb.h @@ -11,7 +11,9 @@ #define CAN_SKB_H #include +#include #include +#include /* * The struct can_skb_priv is used to transport additional information along @@ -42,4 +44,40 @@ static inline void can_skb_reserve(struct sk_buff *skb) skb_reserve(skb, sizeof(struct can_skb_priv)); } +static inline void can_skb_destructor(struct sk_buff *skb) +{ + sock_put(skb->sk); +} + +static inline void can_skb_set_owner(struct sk_buff *skb, struct sock *sk) +{ + if (sk) { + sock_hold(sk); + skb->destructor = can_skb_destructor; + skb->sk = sk; + } +} + +/* + * returns an unshared skb owned by the original sock to be echo'ed back + */ +static inline struct sk_buff *can_create_echo_skb(struct sk_buff *skb) +{ + if (skb_shared(skb)) { + struct sk_buff *nskb = skb_clone(skb, GFP_ATOMIC); + + if (likely(nskb)) { + can_skb_set_owner(nskb, skb->sk); + consume_skb(skb); + return nskb; + } else { + kfree_skb(skb); + return NULL; + } + } + + /* we can assume to have an unshared skb with proper owner */ + return skb; +} + #endif /* CAN_SKB_H */ -- cgit v1.2.3 From 17ead6c85c3d0ef57a14d1373f1f1cee2ce60ea8 Mon Sep 17 00:00:00 2001 From: Trond Myklebust Date: Sat, 1 Feb 2014 14:53:23 -0500 Subject: NFSv4: Fix memory corruption in nfs4_proc_open_confirm nfs41_wake_and_assign_slot() relies on the task->tk_msg.rpc_argp and task->tk_msg.rpc_resp always pointing to the session sequence arguments. nfs4_proc_open_confirm tries to pull a fast one by reusing the open sequence structure, thus causing corruption of the NFSv4 slot table. Cc: stable@vger.kernel.org # 3.12+ Signed-off-by: Trond Myklebust --- include/linux/nfs_xdr.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nfs_xdr.h b/include/linux/nfs_xdr.h index 3ccfcecf8999..b2fb167b2e6d 100644 --- a/include/linux/nfs_xdr.h +++ b/include/linux/nfs_xdr.h @@ -379,12 +379,14 @@ struct nfs_openres { * Arguments to the open_confirm call. */ struct nfs_open_confirmargs { + struct nfs4_sequence_args seq_args; const struct nfs_fh * fh; nfs4_stateid * stateid; struct nfs_seqid * seqid; }; struct nfs_open_confirmres { + struct nfs4_sequence_res seq_res; nfs4_stateid stateid; struct nfs_seqid * seqid; }; -- cgit v1.2.3 From 788a4d56ff378bff0b8e685d03a962b36903a149 Mon Sep 17 00:00:00 2001 From: Andrew Lunn Date: Tue, 4 Feb 2014 18:33:12 +0100 Subject: drivers: phy: Add support for optional phys Add devm_phy_optional_get and phy_optional_get, which should be used when the phy is optional. They does not return an error when the phy does not exist, rather they returns NULL, which is considered as a valid phy, but results in NOPs when used with the consumer API. Signed-off-by: Andrew Lunn Tested-by: Gregory CLEMENT Acked-by: Kishon Vijay Abraham I Signed-off-by: Jason Cooper --- include/linux/phy/phy.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/phy/phy.h b/include/linux/phy/phy.h index e273e5ac19c9..3f83459dbb20 100644 --- a/include/linux/phy/phy.h +++ b/include/linux/phy/phy.h @@ -146,7 +146,9 @@ static inline void phy_set_bus_width(struct phy *phy, int bus_width) phy->attrs.bus_width = bus_width; } struct phy *phy_get(struct device *dev, const char *string); +struct phy *phy_optional_get(struct device *dev, const char *string); struct phy *devm_phy_get(struct device *dev, const char *string); +struct phy *devm_phy_optional_get(struct device *dev, const char *string); void phy_put(struct phy *phy); void devm_phy_put(struct device *dev, struct phy *phy); struct phy *of_phy_simple_xlate(struct device *dev, @@ -232,11 +234,23 @@ static inline struct phy *phy_get(struct device *dev, const char *string) return ERR_PTR(-ENOSYS); } +static inline struct phy *phy_optional_get(struct device *dev, + const char *string) +{ + return ERR_PTR(-ENOSYS); +} + static inline struct phy *devm_phy_get(struct device *dev, const char *string) { return ERR_PTR(-ENOSYS); } +static inline struct phy *devm_phy_optional_get(struct device *dev, + const char *string) +{ + return ERR_PTR(-ENOSYS); +} + static inline void phy_put(struct phy *phy) { } -- cgit v1.2.3 From 662372e42e46d9bbfcb83e1cce81f6b33cebaddd Mon Sep 17 00:00:00 2001 From: Rob Herring Date: Mon, 3 Feb 2014 08:53:44 -0600 Subject: of: restructure for_each macros to fix compile warnings Commit 00b2c76a6a "include/linux/of.h: make for_each_child_of_node() reference its args when CONFIG_OF=n" fixed warnings for unused variables, but introduced variable "used uninitialized" warnings. Simply initializing the variables would result in "set but not used" warnings with W=1. Fix both types of warnings by making all the for_each macros unconditional and rely on the dummy static inline functions to initialize and reference any variables. Reported-by: Geert Uytterhoeven Cc: David Howells Signed-off-by: Rob Herring Acked-by: Grant Likely --- include/linux/of.h | 153 +++++++++++++++++++++++++++++------------------------ 1 file changed, 84 insertions(+), 69 deletions(-) (limited to 'include/linux') diff --git a/include/linux/of.h b/include/linux/of.h index 70c64ba17fa5..435cb995904d 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -169,35 +169,15 @@ static inline const char *of_node_full_name(const struct device_node *np) extern struct device_node *of_find_node_by_name(struct device_node *from, const char *name); -#define for_each_node_by_name(dn, name) \ - for (dn = of_find_node_by_name(NULL, name); dn; \ - dn = of_find_node_by_name(dn, name)) extern struct device_node *of_find_node_by_type(struct device_node *from, const char *type); -#define for_each_node_by_type(dn, type) \ - for (dn = of_find_node_by_type(NULL, type); dn; \ - dn = of_find_node_by_type(dn, type)) extern struct device_node *of_find_compatible_node(struct device_node *from, const char *type, const char *compat); -#define for_each_compatible_node(dn, type, compatible) \ - for (dn = of_find_compatible_node(NULL, type, compatible); dn; \ - dn = of_find_compatible_node(dn, type, compatible)) extern struct device_node *of_find_matching_node_and_match( struct device_node *from, const struct of_device_id *matches, const struct of_device_id **match); -static inline struct device_node *of_find_matching_node( - struct device_node *from, - const struct of_device_id *matches) -{ - return of_find_matching_node_and_match(from, matches, NULL); -} -#define for_each_matching_node(dn, matches) \ - for (dn = of_find_matching_node(NULL, matches); dn; \ - dn = of_find_matching_node(dn, matches)) -#define for_each_matching_node_and_match(dn, matches, match) \ - for (dn = of_find_matching_node_and_match(NULL, matches, match); \ - dn; dn = of_find_matching_node_and_match(dn, matches, match)) + extern struct device_node *of_find_node_by_path(const char *path); extern struct device_node *of_find_node_by_phandle(phandle handle); extern struct device_node *of_get_parent(const struct device_node *node); @@ -209,43 +189,11 @@ extern struct device_node *of_get_next_available_child( extern struct device_node *of_get_child_by_name(const struct device_node *node, const char *name); -#define for_each_child_of_node(parent, child) \ - for (child = of_get_next_child(parent, NULL); child != NULL; \ - child = of_get_next_child(parent, child)) - -#define for_each_available_child_of_node(parent, child) \ - for (child = of_get_next_available_child(parent, NULL); child != NULL; \ - child = of_get_next_available_child(parent, child)) - -static inline int of_get_child_count(const struct device_node *np) -{ - struct device_node *child; - int num = 0; - - for_each_child_of_node(np, child) - num++; - - return num; -} - -static inline int of_get_available_child_count(const struct device_node *np) -{ - struct device_node *child; - int num = 0; - - for_each_available_child_of_node(np, child) - num++; - - return num; -} /* cache lookup */ extern struct device_node *of_find_next_cache_node(const struct device_node *); extern struct device_node *of_find_node_with_property( struct device_node *from, const char *prop_name); -#define for_each_node_with_property(dn, prop_name) \ - for (dn = of_find_node_with_property(NULL, prop_name); dn; \ - dn = of_find_node_with_property(dn, prop_name)) extern struct property *of_find_property(const struct device_node *np, const char *name, @@ -367,42 +315,53 @@ static inline struct device_node *of_find_node_by_name(struct device_node *from, return NULL; } -static inline struct device_node *of_get_parent(const struct device_node *node) +static inline struct device_node *of_find_node_by_type(struct device_node *from, + const char *type) { return NULL; } -static inline bool of_have_populated_dt(void) +static inline struct device_node *of_find_matching_node_and_match( + struct device_node *from, + const struct of_device_id *matches, + const struct of_device_id **match) { - return false; + return NULL; } -/* Kill an unused variable warning on a device_node pointer */ -static inline void __of_use_dn(const struct device_node *np) +static inline struct device_node *of_get_parent(const struct device_node *node) { + return NULL; } -#define for_each_child_of_node(parent, child) \ - while (__of_use_dn(parent), __of_use_dn(child), 0) +static inline struct device_node *of_get_next_child( + const struct device_node *node, struct device_node *prev) +{ + return NULL; +} -#define for_each_available_child_of_node(parent, child) \ - while (0) +static inline struct device_node *of_get_next_available_child( + const struct device_node *node, struct device_node *prev) +{ + return NULL; +} -static inline struct device_node *of_get_child_by_name( - const struct device_node *node, - const char *name) +static inline struct device_node *of_find_node_with_property( + struct device_node *from, const char *prop_name) { return NULL; } -static inline int of_get_child_count(const struct device_node *np) +static inline bool of_have_populated_dt(void) { - return 0; + return false; } -static inline int of_get_available_child_count(const struct device_node *np) +static inline struct device_node *of_get_child_by_name( + const struct device_node *node, + const char *name) { - return 0; + return NULL; } static inline int of_device_is_compatible(const struct device_node *device, @@ -569,6 +528,13 @@ extern int of_node_to_nid(struct device_node *np); static inline int of_node_to_nid(struct device_node *device) { return 0; } #endif +static inline struct device_node *of_find_matching_node( + struct device_node *from, + const struct of_device_id *matches) +{ + return of_find_matching_node_and_match(from, matches, NULL); +} + /** * of_property_read_bool - Findfrom a property * @np: device node from which the property value is to be read. @@ -618,6 +584,55 @@ static inline int of_property_read_u32(const struct device_node *np, s; \ s = of_prop_next_string(prop, s)) +#define for_each_node_by_name(dn, name) \ + for (dn = of_find_node_by_name(NULL, name); dn; \ + dn = of_find_node_by_name(dn, name)) +#define for_each_node_by_type(dn, type) \ + for (dn = of_find_node_by_type(NULL, type); dn; \ + dn = of_find_node_by_type(dn, type)) +#define for_each_compatible_node(dn, type, compatible) \ + for (dn = of_find_compatible_node(NULL, type, compatible); dn; \ + dn = of_find_compatible_node(dn, type, compatible)) +#define for_each_matching_node(dn, matches) \ + for (dn = of_find_matching_node(NULL, matches); dn; \ + dn = of_find_matching_node(dn, matches)) +#define for_each_matching_node_and_match(dn, matches, match) \ + for (dn = of_find_matching_node_and_match(NULL, matches, match); \ + dn; dn = of_find_matching_node_and_match(dn, matches, match)) + +#define for_each_child_of_node(parent, child) \ + for (child = of_get_next_child(parent, NULL); child != NULL; \ + child = of_get_next_child(parent, child)) +#define for_each_available_child_of_node(parent, child) \ + for (child = of_get_next_available_child(parent, NULL); child != NULL; \ + child = of_get_next_available_child(parent, child)) + +#define for_each_node_with_property(dn, prop_name) \ + for (dn = of_find_node_with_property(NULL, prop_name); dn; \ + dn = of_find_node_with_property(dn, prop_name)) + +static inline int of_get_child_count(const struct device_node *np) +{ + struct device_node *child; + int num = 0; + + for_each_child_of_node(np, child) + num++; + + return num; +} + +static inline int of_get_available_child_count(const struct device_node *np) +{ + struct device_node *child; + int num = 0; + + for_each_available_child_of_node(np, child) + num++; + + return num; +} + #if defined(CONFIG_PROC_FS) && defined(CONFIG_PROC_DEVICETREE) extern void proc_device_tree_add_node(struct device_node *, struct proc_dir_entry *); extern void proc_device_tree_add_prop(struct proc_dir_entry *pde, struct property *prop); -- cgit v1.2.3 From 1db73ae39a97797b7d929eca2af469f82fbb337a Mon Sep 17 00:00:00 2001 From: Geert Uytterhoeven Date: Mon, 3 Feb 2014 09:27:40 +0100 Subject: of/device: Nullify match table in of_match_device() for CONFIG_OF=n If the of_device_id table inside a device driver is protected by #ifdef CONFIG_OF, the driver still has to provide a dummy declaration of the table, or wrap it inside of_match_ptr(), when calling of_match_device() in the CONFIG_OF=n case, else the driver fails to compile with e.g. drivers/spi/spi-rspi.c: In function 'rspi_probe': drivers/spi/spi-rspi.c:1203:26: error: 'rspi_of_match' undeclared (first use in this function) drivers/spi/spi-rspi.c:1203:26: note: each undeclared identifier is reported only once for each function it appears in Make of_match_device() nullify the table pointer if CONFIG_OF=n to fix this. Reported-by: Yoshihiro Shimoda Signed-off-by: Geert Uytterhoeven Signed-off-by: Rob Herring --- include/linux/of_device.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/of_device.h b/include/linux/of_device.h index 8d7dd6768cb7..ef370210ffb2 100644 --- a/include/linux/of_device.h +++ b/include/linux/of_device.h @@ -78,11 +78,13 @@ static inline int of_device_uevent_modalias(struct device *dev, static inline void of_device_node_put(struct device *dev) { } -static inline const struct of_device_id *of_match_device( +static inline const struct of_device_id *__of_match_device( const struct of_device_id *matches, const struct device *dev) { return NULL; } +#define of_match_device(matches, dev) \ + __of_match_device(of_match_ptr(matches), (dev)) static inline struct device_node *of_cpu_device_node_get(int cpu) { -- cgit v1.2.3 From c4ad8f98bef77c7356aa6a9ad9188a6acc6b849d Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 5 Feb 2014 12:54:53 -0800 Subject: execve: use 'struct filename *' for executable name passing This changes 'do_execve()' to get the executable name as a 'struct filename', and to free it when it is done. This is what the normal users want, and it simplifies and streamlines their error handling. The controlled lifetime of the executable name also fixes a use-after-free problem with the trace_sched_process_exec tracepoint: the lifetime of the passed-in string for kernel users was not at all obvious, and the user-mode helper code used UMH_WAIT_EXEC to serialize the pathname allocation lifetime with the execve() having finished, which in turn meant that the trace point that happened after mm_release() of the old process VM ended up using already free'd memory. To solve the kernel string lifetime issue, this simply introduces "getname_kernel()" that works like the normal user-space getname() function, except with the source coming from kernel memory. As Oleg points out, this also means that we could drop the tcomm[] array from 'struct linux_binprm', since the pathname lifetime now covers setup_new_exec(). That would be a separate cleanup. Reported-by: Igor Zhbanov Tested-by: Steven Rostedt Cc: Oleg Nesterov Cc: Al Viro Signed-off-by: Linus Torvalds --- include/linux/binfmts.h | 1 - include/linux/fs.h | 1 + include/linux/sched.h | 3 ++- 3 files changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/binfmts.h b/include/linux/binfmts.h index fd8bf3219ef7..b4a745d7d9a9 100644 --- a/include/linux/binfmts.h +++ b/include/linux/binfmts.h @@ -115,7 +115,6 @@ extern int copy_strings_kernel(int argc, const char *const *argv, extern int prepare_bprm_creds(struct linux_binprm *bprm); extern void install_exec_creds(struct linux_binprm *bprm); extern void set_binfmt(struct linux_binfmt *new); -extern void free_bprm(struct linux_binprm *); extern ssize_t read_code(struct file *, unsigned long, loff_t, size_t); #endif /* _LINUX_BINFMTS_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 09f553c59813..d79678c188ad 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2079,6 +2079,7 @@ extern struct file * dentry_open(const struct path *, int, const struct cred *); extern int filp_close(struct file *, fl_owner_t id); extern struct filename *getname(const char __user *); +extern struct filename *getname_kernel(const char *); enum { FILE_CREATED = 1, diff --git a/include/linux/sched.h b/include/linux/sched.h index 68a0e84463a0..a781dec1cd0b 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -128,6 +128,7 @@ struct bio_list; struct fs_struct; struct perf_event_context; struct blk_plug; +struct filename; /* * List of flags we want to share for kernel threads, @@ -2311,7 +2312,7 @@ extern void do_group_exit(int); extern int allow_signal(int); extern int disallow_signal(int); -extern int do_execve(const char *, +extern int do_execve(struct filename *, const char __user * const __user *, const char __user * const __user *); extern long do_fork(unsigned long, unsigned long, unsigned long, int __user *, int __user *); -- cgit v1.2.3 From a3485d088516be4c2ae2890e8ad64321481f2857 Mon Sep 17 00:00:00 2001 From: Lars-Peter Clausen Date: Tue, 4 Feb 2014 13:42:10 +0100 Subject: gpio: consumer.h: Move forward declarations outside #ifdef Make sure that the forward declared structs in gpio/consumer.h are also visible on the else branch of the CONFIG_GPIOLIB #ifdef. Fixes the following warnings and their associated errors when CONFIG_GPIOLIB is not selected: include/linux/gpio/consumer.h:67:14: warning: 'struct device' declared inside parameter list include/linux/gpio/consumer.h:67:14: warning: its scope is only this definition or declaration, which is probably not what you want [...] Signed-off-by: Lars-Peter Clausen Reviewed-by: Alexandre Courbot Signed-off-by: Linus Walleij --- include/linux/gpio/consumer.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/gpio/consumer.h b/include/linux/gpio/consumer.h index 4d34dbbbad4d..7a8144fef406 100644 --- a/include/linux/gpio/consumer.h +++ b/include/linux/gpio/consumer.h @@ -4,8 +4,6 @@ #include #include -#ifdef CONFIG_GPIOLIB - struct device; struct gpio_chip; @@ -18,6 +16,8 @@ struct gpio_chip; */ struct gpio_desc; +#ifdef CONFIG_GPIOLIB + /* Acquire and dispose GPIOs */ struct gpio_desc *__must_check gpiod_get(struct device *dev, const char *con_id); -- cgit v1.2.3 From 579f82901f6f41256642936d7e632f3979ad76d4 Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Thu, 6 Feb 2014 12:04:21 -0800 Subject: swap: add a simple detector for inappropriate swapin readahead This is a patch to improve swap readahead algorithm. It's from Hugh and I slightly changed it. Hugh's original changelog: swapin readahead does a blind readahead, whether or not the swapin is sequential. This may be ok on harddisk, because large reads have relatively small costs, and if the readahead pages are unneeded they can be reclaimed easily - though, what if their allocation forced reclaim of useful pages? But on SSD devices large reads are more expensive than small ones: if the readahead pages are unneeded, reading them in caused significant overhead. This patch adds very simplistic random read detection. Stealing the PageReadahead technique from Konstantin Khlebnikov's patch, avoiding the vma/anon_vma sophistications of Shaohua Li's patch, swapin_nr_pages() simply looks at readahead's current success rate, and narrows or widens its readahead window accordingly. There is little science to its heuristic: it's about as stupid as can be whilst remaining effective. The table below shows elapsed times (in centiseconds) when running a single repetitive swapping load across a 1000MB mapping in 900MB ram with 1GB swap (the harddisk tests had taken painfully too long when I used mem=500M, but SSD shows similar results for that). Vanilla is the 3.6-rc7 kernel on which I started; Shaohua denotes his Sep 3 patch in mmotm and linux-next; HughOld denotes my Oct 1 patch which Shaohua showed to be defective; HughNew this Nov 14 patch, with page_cluster as usual at default of 3 (8-page reads); HughPC4 this same patch with page_cluster 4 (16-page reads); HughPC0 with page_cluster 0 (1-page reads: no readahead). HDD for swapping to harddisk, SSD for swapping to VertexII SSD. Seq for sequential access to the mapping, cycling five times around; Rand for the same number of random touches. Anon for a MAP_PRIVATE anon mapping; Shmem for a MAP_SHARED anon mapping, equivalent to tmpfs. One weakness of Shaohua's vma/anon_vma approach was that it did not optimize Shmem: seen below. Konstantin's approach was perhaps mistuned, 50% slower on Seq: did not compete and is not shown below. HDD Vanilla Shaohua HughOld HughNew HughPC4 HughPC0 Seq Anon 73921 76210 75611 76904 78191 121542 Seq Shmem 73601 73176 73855 72947 74543 118322 Rand Anon 895392 831243 871569 845197 846496 841680 Rand Shmem 1058375 1053486 827935 764955 764376 756489 SSD Vanilla Shaohua HughOld HughNew HughPC4 HughPC0 Seq Anon 24634 24198 24673 25107 21614 70018 Seq Shmem 24959 24932 25052 25703 22030 69678 Rand Anon 43014 26146 28075 25989 26935 25901 Rand Shmem 45349 45215 28249 24268 24138 24332 These tests are, of course, two extremes of a very simple case: under heavier mixed loads I've not yet observed any consistent improvement or degradation, and wider testing would be welcome. Shaohua Li: Test shows Vanilla is slightly better in sequential workload than Hugh's patch. I observed with Hugh's patch sometimes the readahead size is shrinked too fast (from 8 to 1 immediately) in sequential workload if there is no hit. And in such case, continuing doing readahead is good actually. I don't prepare a sophisticated algorithm for the sequential workload because so far we can't guarantee sequential accessed pages are swap out sequentially. So I slightly change Hugh's heuristic - don't shrink readahead size too fast. Here is my test result (unit second, 3 runs average): Vanilla Hugh New Seq 356 370 360 Random 4525 2447 2444 Attached graph is the swapin/swapout throughput I collected with 'vmstat 2'. The first part is running a random workload (till around 1200 of the x-axis) and the second part is running a sequential workload. swapin and swapout throughput are almost identical in steady state in both workloads. These are expected behavior. while in Vanilla, swapin is much bigger than swapout especially in random workload (because wrong readahead). Original patches by: Shaohua Li and Konstantin Khlebnikov. [fengguang.wu@intel.com: swapin_nr_pages() can be static] Signed-off-by: Hugh Dickins Signed-off-by: Shaohua Li Signed-off-by: Fengguang Wu Cc: Rik van Riel Cc: Wu Fengguang Cc: Minchan Kim Cc: Konstantin Khlebnikov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index e464b4e987e8..d1fe1a761047 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -228,9 +228,9 @@ PAGEFLAG(OwnerPriv1, owner_priv_1) TESTCLEARFLAG(OwnerPriv1, owner_priv_1) TESTPAGEFLAG(Writeback, writeback) TESTSCFLAG(Writeback, writeback) PAGEFLAG(MappedToDisk, mappedtodisk) -/* PG_readahead is only used for file reads; PG_reclaim is only for writes */ +/* PG_readahead is only used for reads; PG_reclaim is only for writes */ PAGEFLAG(Reclaim, reclaim) TESTCLEARFLAG(Reclaim, reclaim) -PAGEFLAG(Readahead, reclaim) /* Reminder to do async read-ahead */ +PAGEFLAG(Readahead, reclaim) TESTCLEARFLAG(Readahead, reclaim) #ifdef CONFIG_HIGHMEM /* -- cgit v1.2.3 From 78c0f98cc9dd46824fa66f35f14ea24ba733d145 Mon Sep 17 00:00:00 2001 From: Eli Cohen Date: Thu, 30 Jan 2014 13:49:48 +0200 Subject: IB/mlx5: Fix binary compatibility with libmlx5 Commit c1be5232d21d ("Fix micro UAR allocator") broke binary compatibility between libmlx5 and mlx5_ib since it defines a different value to the number of micro UARs per page, leading to wrong calculation in libmlx5. This patch defines struct mlx5_ib_alloc_ucontext_req_v2 as an extension to struct mlx5_ib_alloc_ucontext_req. The extended size is determined in mlx5_ib_alloc_ucontext() and in case of old library we use uuarn 0 which works fine -- this is acheived due to create_user_qp() falling back from high to medium then to low class where low class will return 0. For new libraries we use the more sophisticated allocation algorithm. Signed-off-by: Eli Cohen Reviewed-by: Yann Droneaud Signed-off-by: Roland Dreier --- include/linux/mlx5/driver.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 554548cd3dd4..32cb18c399c2 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -227,6 +227,7 @@ struct mlx5_uuar_info { * protect uuar allocation data structs */ struct mutex lock; + u32 ver; }; struct mlx5_bf { -- cgit v1.2.3 From e28bab4828354583bb66ac09021ca69b341a7db4 Mon Sep 17 00:00:00 2001 From: "K. Y. Srinivasan" Date: Wed, 15 Jan 2014 17:12:58 -0800 Subject: Drivers: hv: vmbus: Specify the target CPU that should receive notification During the initial VMBUS connect phase, starting with WS2012 R2, we should specify the VPCU in the guest that should receive the notification. Fix this issue. This fix is required to properly connect to the host in the kexeced kernel. Signed-off-by: K. Y. Srinivasan Cc: [3.9+] Signed-off-by: Greg Kroah-Hartman --- include/linux/hyperv.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/hyperv.h b/include/linux/hyperv.h index 15da677478dd..344883dce584 100644 --- a/include/linux/hyperv.h +++ b/include/linux/hyperv.h @@ -875,7 +875,7 @@ struct vmbus_channel_relid_released { struct vmbus_channel_initiate_contact { struct vmbus_channel_message_header header; u32 vmbus_version_requested; - u32 padding2; + u32 target_vcpu; /* The VCPU the host should respond to */ u64 interrupt_page; u64 monitor_page1; u64 monitor_page2; -- cgit v1.2.3 From 72a0a36e2854a6eadb4cf2561858f613f9cd4639 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 7 Feb 2014 10:22:36 -0800 Subject: blk-mq: support at_head inserations for blk_execute_rq This is neede for proper SG_IO operation as well as various uses of blk_execute_rq from the SCSI midlayer. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1e8f16f65af4..b7638be58599 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -122,7 +122,8 @@ void blk_mq_init_commands(struct request_queue *, void (*init)(void *data, struc void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); -void blk_mq_insert_request(struct request_queue *, struct request *, bool); +void blk_mq_insert_request(struct request_queue *, struct request *, + bool, bool); void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -- cgit v1.2.3 From 3d4b81eda2211f32886e2978daf6f39885042fc4 Mon Sep 17 00:00:00 2001 From: Sarah Sharp Date: Fri, 31 Jan 2014 11:52:57 -0800 Subject: Revert "usb: xhci: Link TRB must not occur within a USB payload burst" This reverts commit 35773dac5f862cb1c82ea151eba3e2f6de51ec3e. It's a hack that caused regressions in the usb-storage and userspace USB drivers that use usbfs and libusb. Commit 70cabb7d992f "xhci 1.0: Limit arbitrarily-aligned scatter gather." should fix the issues seen with the ax88179_178a driver on xHCI 1.0 hosts, without causing regressions. Signed-off-by: Sarah Sharp Cc: stable@vger.kernel.org # 3.12 --- include/linux/usb.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/usb.h b/include/linux/usb.h index c716da18c668..7f6eb859873e 100644 --- a/include/linux/usb.h +++ b/include/linux/usb.h @@ -1265,8 +1265,6 @@ typedef void (*usb_complete_t)(struct urb *); * @sg: scatter gather buffer list, the buffer size of each element in * the list (except the last) must be divisible by the endpoint's * max packet size if no_sg_constraint isn't set in 'struct usb_bus' - * (FIXME: scatter-gather under xHCI is broken for periodic transfers. - * Do not use urb->sg for interrupt endpoints for now, only bulk.) * @num_mapped_sgs: (internal) number of mapped sg entries * @num_sgs: number of entries in the sg list * @transfer_buffer_length: How big is transfer_buffer. The transfer may -- cgit v1.2.3 From 0668d3065128d39449c097e62dbdb5707820137d Mon Sep 17 00:00:00 2001 From: Stephen Boyd Date: Thu, 2 Jan 2014 16:37:32 -0800 Subject: genirq: Add devm_request_any_context_irq() Some drivers use request_any_context_irq() but there isn't a devm_* function for it. Add one so that these drivers don't need to explicitly free the irq on driver detach. Signed-off-by: Stephen Boyd Cc: linux-arm-kernel@lists.infradead.org Cc: Dmitry Torokhov Link: http://lkml.kernel.org/r/1388709460-19222-3-git-send-email-sboyd@codeaurora.org Signed-off-by: Thomas Gleixner --- include/linux/interrupt.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/interrupt.h b/include/linux/interrupt.h index 0053adde0ed9..a2678d35b5a2 100644 --- a/include/linux/interrupt.h +++ b/include/linux/interrupt.h @@ -158,6 +158,11 @@ devm_request_irq(struct device *dev, unsigned int irq, irq_handler_t handler, devname, dev_id); } +extern int __must_check +devm_request_any_context_irq(struct device *dev, unsigned int irq, + irq_handler_t handler, unsigned long irqflags, + const char *devname, void *dev_id); + extern void devm_free_irq(struct device *dev, unsigned int irq, void *dev_id); /* -- cgit v1.2.3 From d311d79de305f1ada47cadd672e6ed1b28a949eb Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 9 Feb 2014 15:18:09 -0500 Subject: fix O_SYNC|O_APPEND syncing the wrong range on write() It actually goes back to 2004 ([PATCH] Concurrent O_SYNC write support) when sync_page_range() had been introduced; generic_file_write{,v}() correctly synced pos_after_write - written .. pos_after_write - 1 but generic_file_aio_write() synced pos_before_write .. pos_before_write + written - 1 instead. Which is not the same thing with O_APPEND, obviously. A couple of years later correct variant had been killed off when everything switched to use of generic_file_aio_write(). All users of generic_file_aio_write() are affected, and the same bug has been copied into other instances of ->aio_write(). The fix is trivial; the only subtle point is that generic_write_sync() ought to be inlined to avoid calculations useless for the majority of calls. Signed-off-by: Al Viro --- include/linux/fs.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 09f553c59813..75ff961be051 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2273,7 +2273,13 @@ extern int filemap_fdatawrite_range(struct address_space *mapping, extern int vfs_fsync_range(struct file *file, loff_t start, loff_t end, int datasync); extern int vfs_fsync(struct file *file, int datasync); -extern int generic_write_sync(struct file *file, loff_t pos, loff_t count); +static inline int generic_write_sync(struct file *file, loff_t pos, loff_t count) +{ + if (!(file->f_flags & O_DSYNC) && !IS_SYNC(file->f_mapping->host)) + return 0; + return vfs_fsync_range(file, pos, pos + count - 1, + (file->f_flags & __O_SYNC) ? 0 : 1); +} extern void emergency_sync(void); extern void emergency_remount(void); #ifdef CONFIG_BLOCK -- cgit v1.2.3 From c4540a7d8c1e595560e53acedf88901daf15a2b5 Mon Sep 17 00:00:00 2001 From: Rashika Kheria Date: Sun, 9 Feb 2014 18:30:39 +0530 Subject: fs: Add prototype declaration to appropriate header file include/linux/bio.h MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Add prototype declaration to header file include/linux/bio.h because it is used by more than one file. This eliminates the following warning in bio-integrity.c: fs/bio-integrity.c:214:14: warning: no previous prototype for ‘bio_integrity_tag_size’ [-Wmissing-prototypes] Signed-off-by: Rashika Kheria Reviewed-by: Josh Triplett Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 70654521dab6..d6791bba8264 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -332,6 +332,7 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); extern struct bio_set *fs_bio_set; +unsigned int bio_integrity_tag_size(struct bio *bio); static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { -- cgit v1.2.3 From 30a91cb4ef385fe1b260df204ef314d86fff2850 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Feb 2014 03:24:38 -0800 Subject: blk-mq: rework I/O completions Rework I/O completions to work more like the old code path. blk_mq_end_io now stays out of the business of deferring completions to others CPUs and calling blk_mark_rq_complete. The latter is very important to allow completing requests that have timed out and thus are already marked completed, the former allows using the IPI callout even for driver specific completions instead of having to reimplement them. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b7638be58599..468be242db90 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -86,6 +86,8 @@ struct blk_mq_ops { */ rq_timed_out_fn *timeout; + softirq_done_fn *complete; + /* * Override for hctx allocations (should probably go) */ @@ -137,6 +139,8 @@ void blk_mq_free_single_hw_queue(struct blk_mq_hw_ctx *, unsigned int); void blk_mq_end_io(struct request *rq, int error); +void blk_mq_complete_request(struct request *rq); + void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_stop_hw_queues(struct request_queue *q); -- cgit v1.2.3 From 18741986a4b1dc4b1f171634c4191abc3b0fa023 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 10 Feb 2014 09:29:00 -0700 Subject: blk-mq: rework flush sequencing logic Witch to using a preallocated flush_rq for blk-mq similar to what's done with the old request path. This allows us to set up the request properly with a tag from the actually allowed range and ->rq_disk as needed by some drivers. To make life easier we also switch to dynamic allocation of ->flush_rq for the old path. This effectively reverts most of "blk-mq: fix for flush deadlock" and "blk-mq: Don't reserve a tag for flush request" Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 5 +---- include/linux/blkdev.h | 11 +++-------- 2 files changed, 4 insertions(+), 12 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 468be242db90..18ba8a627f46 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -36,15 +36,12 @@ struct blk_mq_hw_ctx { struct list_head page_list; struct blk_mq_tags *tags; - atomic_t pending_flush; - unsigned long queued; unsigned long run; #define BLK_MQ_MAX_DISPATCH_ORDER 10 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; unsigned int queue_depth; - unsigned int reserved_tags; unsigned int numa_node; unsigned int cmd_size; /* per-request extra data */ @@ -129,7 +126,7 @@ void blk_mq_insert_request(struct request_queue *, struct request *, void blk_mq_run_queues(struct request_queue *q, bool async); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp, bool reserved); +struct request *blk_mq_alloc_request(struct request_queue *q, int rw, gfp_t gfp); struct request *blk_mq_alloc_reserved_request(struct request_queue *q, int rw, gfp_t gfp); struct request *blk_mq_rq_from_tag(struct request_queue *q, unsigned int tag); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0375654adb28..b2d25ecbcbc1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -101,7 +101,7 @@ struct request { }; union { struct call_single_data csd; - struct work_struct mq_flush_data; + struct work_struct mq_flush_work; }; struct request_queue *q; @@ -451,13 +451,8 @@ struct request_queue { unsigned long flush_pending_since; struct list_head flush_queue[2]; struct list_head flush_data_in_flight; - union { - struct request flush_rq; - struct { - spinlock_t mq_flush_lock; - struct work_struct mq_flush_work; - }; - }; + struct request *flush_rq; + spinlock_t mq_flush_lock; struct mutex sysfs_lock; -- cgit v1.2.3 From fb37bb04d6c8d6c44e61d9da189dcfa6aa0f135e Mon Sep 17 00:00:00 2001 From: Paul Gortmaker Date: Mon, 10 Feb 2014 14:25:49 -0800 Subject: smp.h: fix x86+cpu.c sparse warnings about arch nonboot CPU calls Use what we already do for arch_disable_smp_support() to fix these: arch/x86/kernel/smpboot.c:1155:6: warning: symbol 'arch_enable_nonboot_cpus_begin' was not declared. Should it be static? arch/x86/kernel/smpboot.c:1160:6: warning: symbol 'arch_enable_nonboot_cpus_end' was not declared. Should it be static? kernel/cpu.c:512:13: warning: symbol 'arch_enable_nonboot_cpus_begin' was not declared. Should it be static? kernel/cpu.c:516:13: warning: symbol 'arch_enable_nonboot_cpus_end' was not declared. Should it be static? Signed-off-by: Paul Gortmaker Cc: Thomas Gleixner Cc: Ingo Molnar Cc: "H. Peter Anvin" Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/smp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/smp.h b/include/linux/smp.h index 3834f43f9993..6ae004e437ea 100644 --- a/include/linux/smp.h +++ b/include/linux/smp.h @@ -188,6 +188,9 @@ static inline void kick_all_cpus_sync(void) { } */ extern void arch_disable_smp_support(void); +extern void arch_enable_nonboot_cpus_begin(void); +extern void arch_enable_nonboot_cpus_end(void); + void smp_setup_processor_id(void); #endif /* __LINUX_SMP_H */ -- cgit v1.2.3 From 8423ae3d7a3cfe084865262cfaeba1359d405182 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Mon, 10 Feb 2014 17:45:50 -0800 Subject: block: Fix cloning of discard/write same bios Immutable biovecs changed the way bio segments are treated in such a way that bio_for_each_segment() cannot now do what we want for discard/write same bios, since bi_size means something completely different for them. Fortunately discard and write same bios never have more than a single biovec, so bio_for_each_segment() is unnecessary and not terribly meaningful for them, but we still have to special case them in a few places. Signed-off-by: Kent Overstreet Tested-by: Richard W.M. Jones Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index d6791bba8264..5a4d39b4686b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -250,6 +250,17 @@ static inline unsigned bio_segments(struct bio *bio) struct bio_vec bv; struct bvec_iter iter; + /* + * We special case discard/write same, because they interpret bi_size + * differently: + */ + + if (bio->bi_rw & REQ_DISCARD) + return 1; + + if (bio->bi_rw & REQ_WRITE_SAME) + return 1; + bio_for_each_segment(bv, bio, iter) segs++; -- cgit v1.2.3 From a9f180345f5378ac87d80ed0bea55ba421d83859 Mon Sep 17 00:00:00 2001 From: Steven Noonan Date: Wed, 12 Feb 2014 23:01:07 -0800 Subject: compiler/gcc4: Make quirk for asm_volatile_goto() unconditional I started noticing problems with KVM guest destruction on Linux 3.12+, where guest memory wasn't being cleaned up. I bisected it down to the commit introducing the new 'asm goto'-based atomics, and found this quirk was later applied to those. Unfortunately, even with GCC 4.8.2 (which ostensibly fixed the known 'asm goto' bug) I am still getting some kind of miscompilation. If I enable the asm_volatile_goto quirk for my compiler, KVM guests are destroyed correctly and the memory is cleaned up. So make the quirk unconditional for now, until bug is found and fixed. Suggested-by: Linus Torvalds Signed-off-by: Steven Noonan Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Jakub Jelinek Cc: Richard Henderson Cc: Andrew Morton Cc: Oleg Nesterov Cc: Link: http://lkml.kernel.org/r/1392274867-15236-1-git-send-email-steven@uplinklabs.net Link: http://gcc.gnu.org/bugzilla/show_bug.cgi?id=58670 Signed-off-by: Ingo Molnar --- include/linux/compiler-gcc4.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc4.h b/include/linux/compiler-gcc4.h index ded429966c1f..2507fd2a1eb4 100644 --- a/include/linux/compiler-gcc4.h +++ b/include/linux/compiler-gcc4.h @@ -75,11 +75,7 @@ * * (asm goto is automatically volatile - the naming reflects this.) */ -#if GCC_VERSION <= 40801 -# define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) -#else -# define asm_volatile_goto(x...) do { asm goto(x); } while (0) -#endif +#define asm_volatile_goto(x...) do { asm goto(x); asm (""); } while (0) #ifdef CONFIG_ARCH_USE_BUILTIN_BSWAP #if GCC_VERSION >= 40400 -- cgit v1.2.3 From 6ecde51dd7894ffe2f959cca1fea3ea2b9ee2394 Mon Sep 17 00:00:00 2001 From: Roland Dreier Date: Thu, 13 Feb 2014 20:45:17 -0800 Subject: mlx5: Add include of because of kzalloc()/kfree() use On some architectures (for example, arm), we don't end up indirectly pulling in the declaration of kzalloc() and kfree(), and so building anything that includes breaks. Fix this by adding an explicit include to get the declaration. Reported-by: kbuild test robot Signed-off-by: Roland Dreier --- include/linux/mlx5/driver.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 32cb18c399c2..130bc8d77fa5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -38,8 +38,10 @@ #include #include #include +#include #include #include + #include #include -- cgit v1.2.3