From 6f3b0e8bcf3cbb87a7459b3ed018d31d918df3f8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 26 Nov 2015 09:13:05 +0100 Subject: blk-mq: add a flags parameter to blk_mq_alloc_request We already have the reserved flag, and a nowait flag awkwardly encoded as a gfp_t. Add a real flags argument to make the scheme more extensible and allow for a nicer calling convention. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c0d2b7927c1f..e711f294934c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -794,7 +794,7 @@ extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, struct scsi_ioctl_command __user *); -extern int blk_queue_enter(struct request_queue *q, gfp_t gfp); +extern int blk_queue_enter(struct request_queue *q, bool nowait); extern void blk_queue_exit(struct request_queue *q); extern void blk_start_queue(struct request_queue *q); extern void blk_stop_queue(struct request_queue *q); -- cgit v1.2.3 From 287922eb0b186e2a5bf54fdd04b734c25c90035c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 30 Oct 2015 20:57:30 +0800 Subject: block: defer timeouts to a workqueue Timer context is not very useful for drivers to perform any meaningful abort action from. So instead of calling the driver from this useless context defer it to a workqueue as soon as possible. Note that while a delayed_work item would seem the right thing here I didn't dare to use it due to the magic in blk_add_timer that pokes deep into timer internals. But maybe this encourages Tejun to add a sensible API for that to the workqueue API and we'll all be fine in the end :) Contains a major update from Keith Bush: "This patch removes synchronizing the timeout work so that the timer can start a freeze on its own queue. The timer enters the queue, so timer context can only start a freeze, but not wait for frozen." Signed-off-by: Christoph Hellwig Acked-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e711f294934c..221dc3bac49f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -407,6 +407,7 @@ struct request_queue { unsigned int rq_timeout; struct timer_list timeout; + struct work_struct timeout_work; struct list_head timeout_list; struct list_head icq_list; -- cgit v1.2.3 From b2e0d1625e193b40cbbd45b799f82d54d34e015c Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:55:59 -0800 Subject: dax: fix lifetime of in-kernel dax mappings with dax_map_atomic() The DAX implementation needs to protect new calls to ->direct_access() and usage of its return value against the driver for the underlying block device being disabled. Use blk_queue_enter()/blk_queue_exit() to hold off blk_cleanup_queue() from proceeding, or otherwise fail new mapping requests if the request_queue is being torn down. This also introduces blk_dax_ctl to simplify the interface from fs/dax.c through dax_map_atomic() to bdev_direct_access(). [willy@linux.intel.com: fix read() of a hole] Signed-off-by: Dan Williams Reviewed-by: Jeff Moyer Cc: Jan Kara Cc: Jens Axboe Cc: Dave Chinner Cc: Ross Zwisler Cc: Matthew Wilcox Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 17 +++++++++++++++-- 1 file changed, 15 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c70e3588a48c..88821fa26f19 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1617,6 +1617,20 @@ static inline bool integrity_req_gap_front_merge(struct request *req, #endif /* CONFIG_BLK_DEV_INTEGRITY */ +/** + * struct blk_dax_ctl - control and output parameters for ->direct_access + * @sector: (input) offset relative to a block_device + * @addr: (output) kernel virtual address for @sector populated by driver + * @pfn: (output) page frame number for @addr populated by driver + * @size: (input) number of bytes requested + */ +struct blk_dax_ctl { + sector_t sector; + void __pmem *addr; + long size; + unsigned long pfn; +}; + struct block_device_operations { int (*open) (struct block_device *, fmode_t); void (*release) (struct gendisk *, fmode_t); @@ -1643,8 +1657,7 @@ extern int __blkdev_driver_ioctl(struct block_device *, fmode_t, unsigned int, extern int bdev_read_page(struct block_device *, sector_t, struct page *); extern int bdev_write_page(struct block_device *, sector_t, struct page *, struct writeback_control *); -extern long bdev_direct_access(struct block_device *, sector_t, - void __pmem **addr, unsigned long *pfn, long size); +extern long bdev_direct_access(struct block_device *, struct blk_dax_ctl *); #else /* CONFIG_BLOCK */ struct block_device; -- cgit v1.2.3 From 34c0fd540e79fb49ef9ce864dae1058cca265780 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Fri, 15 Jan 2016 16:56:14 -0800 Subject: mm, dax, pmem: introduce pfn_t For the purpose of communicating the optional presence of a 'struct page' for the pfn returned from ->direct_access(), introduce a type that encapsulates a page-frame-number plus flags. These flags contain the historical "page_link" encoding for a scatterlist entry, but can also denote "device memory". Where "device memory" is a set of pfns that are not part of the kernel's linear mapping by default, but are accessed via the same memory controller as ram. The motivation for this new type is large capacity persistent memory that needs struct page entries in the 'memmap' to support 3rd party DMA (i.e. O_DIRECT I/O with a persistent memory source/target). However, we also need it in support of maintaining a list of mapped inodes which need to be unmapped at driver teardown or freeze_bdev() time. Signed-off-by: Dan Williams Cc: Christoph Hellwig Cc: Dave Hansen Cc: Ross Zwisler Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 88821fa26f19..bfb64d672e19 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -15,6 +15,7 @@ #include #include #include +#include #include #include #include @@ -1628,7 +1629,7 @@ struct blk_dax_ctl { sector_t sector; void __pmem *addr; long size; - unsigned long pfn; + pfn_t pfn; }; struct block_device_operations { @@ -1638,7 +1639,7 @@ struct block_device_operations { int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); long (*direct_access)(struct block_device *, sector_t, void __pmem **, - unsigned long *pfn); + pfn_t *); unsigned int (*check_events) (struct gendisk *disk, unsigned int clearing); /* ->media_changed() is DEPRECATED, use ->check_events() instead */ -- cgit v1.2.3