From ff9ea323816dc1c8ac7144afd4eab3ac97704430 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:03:56 +0900 Subject: block, bdi: an active gendisk always has a request_queue associated with it bdev_get_queue() returns the request_queue associated with the specified block_device. blk_get_backing_dev_info() makes use of bdev_get_queue() to determine the associated bdi given a block_device. All the callers of bdev_get_queue() including blk_get_backing_dev_info() assume that bdev_get_queue() may return NULL and implement NULL handling; however, bdev_get_queue() requires the passed in block_device is opened and attached to its gendisk. Because an active gendisk always has a valid request_queue associated with it, bdev_get_queue() can never return NULL and neither can blk_get_backing_dev_info(). Make it clear that neither of the two functions can return NULL and remove NULL handling from all the callers. Signed-off-by: Tejun Heo Cc: Chris Mason Cc: Dave Chinner Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 518b46555b80..e267bf0db559 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,7 +865,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { - return bdev->bd_disk->queue; + return bdev->bd_disk->queue; /* this is never NULL */ } /* -- cgit v1.2.3 From e36f1dfce0b45d347927568efe1088821758cc3c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:03:57 +0900 Subject: bdi: remove unused stuff Two flags and one bdi_writeback field are no longer used. Remove them. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index e488e9459a93..2103a7fa3fd8 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -28,12 +28,10 @@ struct dentry; * Bits in backing_dev_info.state */ enum bdi_state { - BDI_wb_alloc, /* Default embedded wb allocated */ BDI_async_congested, /* The async (write) queue is getting full */ BDI_sync_congested, /* The sync queue is getting full */ BDI_registered, /* bdi_register() was done */ BDI_writeback_running, /* Writeback is in progress */ - BDI_unused, /* Available bits start here */ }; typedef int (congested_fn)(void *, int); @@ -50,7 +48,6 @@ enum bdi_stat_item { struct bdi_writeback { struct backing_dev_info *bdi; /* our parent bdi */ - unsigned int nr; unsigned long last_old_flush; /* last old data flush */ -- cgit v1.2.3 From 018a17bdc8658ad448497c84d4ba21b6985820ec Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 8 Sep 2014 08:04:01 +0900 Subject: bdi: reimplement bdev_inode_switch_bdi() A block_device may be attached to different gendisks and thus different bdis over time. bdev_inode_switch_bdi() is used to switch the associated bdi. The function assumes that the inode could be dirty and transfers it between bdis if so. This is a bit nasty in that it reaches into bdi internals. This patch reimplements the function so that it writes out the inode if dirty. This is a lot simpler and can be implemented without exposing bdi internals. Signed-off-by: Tejun Heo Cc: Alexander Viro Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 2103a7fa3fd8..5da6012b7a14 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -121,7 +121,6 @@ void bdi_start_background_writeback(struct backing_dev_info *bdi); void bdi_writeback_workfn(struct work_struct *work); int bdi_has_dirty_io(struct backing_dev_info *bdi); void bdi_wakeup_thread_delayed(struct backing_dev_info *bdi); -void bdi_lock_two(struct bdi_writeback *wb1, struct bdi_writeback *wb2); extern spinlock_t bdi_lock; extern struct list_head bdi_list; -- cgit v1.2.3 From bf57229745f849e500ba69ff91e35bc8160a7373 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 13 Sep 2014 16:40:08 -0700 Subject: blk-mq: remove REQ_END Pass an explicit parameter for the last request in a batch to ->queue_rq instead of using a request flag. Besides being a cleaner and non-stateful interface this is also required for the next patch, which fixes the blk-mq I/O submission code to not start a time too early. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- include/linux/blk_types.h | 2 -- 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index a1e31f274fcd..9c4e306a9217 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -77,7 +77,7 @@ struct blk_mq_tag_set { struct list_head tag_list; }; -typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *); +typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool); typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 66c2167f04a9..bb7d66460e7a 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -188,7 +188,6 @@ enum rq_flag_bits { __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_KERNEL, /* direct IO to kernel pages */ __REQ_PM, /* runtime pm request */ - __REQ_END, /* last of chain of requests */ __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_MQ_INFLIGHT, /* track inflight for MQ */ __REQ_NR_BITS, /* stops here */ @@ -242,7 +241,6 @@ enum rq_flag_bits { #define REQ_SECURE (1ULL << __REQ_SECURE) #define REQ_KERNEL (1ULL << __REQ_KERNEL) #define REQ_PM (1ULL << __REQ_PM) -#define REQ_END (1ULL << __REQ_END) #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) -- cgit v1.2.3 From e2490073cd7c3d6f6ef6e029a208edd4d38efac4 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 13 Sep 2014 16:40:09 -0700 Subject: blk-mq: call blk_mq_start_request from ->queue_rq When we call blk_mq_start_request from the core blk-mq code before calling into ->queue_rq there is a racy window where the timeout handler can hit before we've fully set up the driver specific part of the command. Move the call to blk_mq_start_request into the driver so the driver can start the request only once it is fully set up. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9c4e306a9217..878b6f71da48 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -159,6 +159,7 @@ struct request *blk_mq_tag_to_rq(struct blk_mq_tags *tags, unsigned int tag); struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); +void blk_mq_start_request(struct request *rq); void blk_mq_end_io(struct request *rq, int error); void __blk_mq_end_io(struct request *rq, int error); -- cgit v1.2.3 From c8a446ad695ada43a885ec12b38411dbd190a11b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 13 Sep 2014 16:40:10 -0700 Subject: blk-mq: rename blk_mq_end_io to blk_mq_end_request Now that we've changed the driver API on the submission side use the opportunity to fix up the name on the completion side to fit into the general scheme. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 878b6f71da48..cb217c16990d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -160,8 +160,8 @@ struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_ind struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); void blk_mq_start_request(struct request *rq); -void blk_mq_end_io(struct request *rq, int error); -void __blk_mq_end_io(struct request *rq, int error); +void blk_mq_end_request(struct request *rq, int error); +void __blk_mq_end_request(struct request *rq, int error); void blk_mq_requeue_request(struct request *rq); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); -- cgit v1.2.3 From 81481eb423c295c5480a3fab9bb961cf286c91e7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 13 Sep 2014 16:40:11 -0700 Subject: blk-mq: fix and simplify tag iteration for the timeout handler Don't do a kmalloc from timer to handle timeouts, chances are we could be under heavy load or similar and thus just miss out on the timeouts. Fortunately it is very easy to just iterate over all in use tags, and doing this properly actually cleans up the blk_mq_busy_iter API as well, and prepares us for the next patch by passing a reserved argument to the iterator. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index cb217c16990d..0eb0f642be4b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -86,6 +86,9 @@ typedef int (init_request_fn)(void *, struct request *, unsigned int, typedef void (exit_request_fn)(void *, struct request *, unsigned int, unsigned int); +typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, + bool); + struct blk_mq_ops { /* * Queue request @@ -174,7 +177,8 @@ void blk_mq_stop_hw_queues(struct request_queue *q); void blk_mq_start_hw_queues(struct request_queue *q); void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async); void blk_mq_delay_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs); -void blk_mq_tag_busy_iter(struct blk_mq_tags *tags, void (*fn)(void *data, unsigned long *), void *data); +void blk_mq_tag_busy_iter(struct blk_mq_hw_ctx *hctx, busy_iter_fn *fn, + void *priv); /* * Driver command data is immediately after the request. So subtract request -- cgit v1.2.3 From 0152fb6b57c4fae769ee75ea2ae670f4ff39fba9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 13 Sep 2014 16:40:13 -0700 Subject: blk-mq: pass a reserved argument to the timeout handler Allow blk-mq to pass an argument to the timeout handler to indicate if we're timing out a reserved or regular command. For many drivers those need to be handled different. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 0eb0f642be4b..325349559fb0 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -79,6 +79,7 @@ struct blk_mq_tag_set { typedef int (queue_rq_fn)(struct blk_mq_hw_ctx *, struct request *, bool); typedef struct blk_mq_hw_ctx *(map_queue_fn)(struct request_queue *, const int); +typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); typedef int (init_hctx_fn)(struct blk_mq_hw_ctx *, void *, unsigned int); typedef void (exit_hctx_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (init_request_fn)(void *, struct request *, unsigned int, @@ -103,7 +104,7 @@ struct blk_mq_ops { /* * Called on request timeout */ - rq_timed_out_fn *timeout; + timeout_fn *timeout; softirq_done_fn *complete; -- cgit v1.2.3 From 7c94e1c157a227837b04f02f5edeff8301410ba2 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 25 Sep 2014 23:23:43 +0800 Subject: block: introduce blk_flush_queue to drive flush machinery This patch introduces 'struct blk_flush_queue' and puts all flush machinery related fields into this structure, so that - flush implementation details aren't exposed to driver - it is easy to convert to per dispatch-queue flush machinery This patch is basically a mechanical replacement. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e267bf0db559..49f3461e4272 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -36,6 +36,7 @@ struct request; struct sg_io_hdr; struct bsg_job; struct blkcg_gq; +struct blk_flush_queue; #define BLKDEV_MIN_RQ 4 #define BLKDEV_MAX_RQ 128 /* Default maximum */ @@ -455,14 +456,7 @@ struct request_queue { */ unsigned int flush_flags; unsigned int flush_not_queueable:1; - unsigned int flush_queue_delayed:1; - unsigned int flush_pending_idx:1; - unsigned int flush_running_idx:1; - unsigned long flush_pending_since; - struct list_head flush_queue[2]; - struct list_head flush_data_in_flight; - struct request *flush_rq; - spinlock_t mq_flush_lock; + struct blk_flush_queue *fq; struct list_head requeue_list; spinlock_t requeue_lock; -- cgit v1.2.3 From f70ced09170761acb69840cafaace4abc72cba4b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 25 Sep 2014 23:23:47 +0800 Subject: blk-mq: support per-distpatch_queue flush machinery This patch supports to run one single flush machinery for each blk-mq dispatch queue, so that: - current init_request and exit_request callbacks can cover flush request too, then the buggy copying way of initializing flush request's pdu can be fixed - flushing performance gets improved in case of multi hw-queue In fio sync write test over virtio-blk(4 hw queues, ioengine=sync, iodepth=64, numjobs=4, bs=4K), it is observed that througput gets increased a lot over my test environment: - throughput: +70% in case of virtio-blk over null_blk - throughput: +30% in case of virtio-blk over SSD image The multi virtqueue feature isn't merged to QEMU yet, and patches for the feature can be found in below tree: git://kernel.ubuntu.com/ming/qemu.git v2.1.0-mq.4 And simply passing 'num_queues=4 vectors=5' should be enough to enable multi queue(quad queue) feature for QEMU virtio-blk. Suggested-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 325349559fb0..02c5d950f444 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -4,6 +4,7 @@ #include struct blk_mq_tags; +struct blk_flush_queue; struct blk_mq_cpu_notifier { struct list_head list; @@ -34,6 +35,7 @@ struct blk_mq_hw_ctx { struct request_queue *queue; unsigned int queue_num; + struct blk_flush_queue *fq; void *driver_data; @@ -119,6 +121,10 @@ struct blk_mq_ops { /* * Called for every command allocated by the block layer to allow * the driver to set up driver specific data. + * + * Tag greater than or equal to queue_depth is for setting up + * flush request. + * * Ditto for exit/teardown. */ init_request_fn *init_request; -- cgit v1.2.3 From e7258c1a269e0967856c81d182c286a78f5ecf15 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:19:55 -0400 Subject: block: Get rid of bdev_integrity_enabled() bdev_integrity_enabled() is only used by bio_integrity_enabled(). Combine these two functions. Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index b39e5000ff58..63e399b4fde5 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -666,7 +666,7 @@ struct biovec_slab { extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); -extern int bio_integrity_enabled(struct bio *bio); +extern bool bio_integrity_enabled(struct bio *bio); extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); extern int bio_integrity_get_tag(struct bio *, void *, unsigned int); extern int bio_integrity_prep(struct bio *); @@ -685,9 +685,9 @@ static inline int bio_integrity(struct bio *bio) return 0; } -static inline int bio_integrity_enabled(struct bio *bio) +static inline bool bio_integrity_enabled(struct bio *bio) { - return 0; + return false; } static inline int bioset_integrity_create(struct bio_set *bs, int pool_size) -- cgit v1.2.3 From 180b2f95dd331010a9930a65c8a18d6d81b94dc1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:19:56 -0400 Subject: block: Replace bi_integrity with bi_special For commands like REQ_COPY we need a way to pass extra information along with each bio. Like integrity metadata this information must be available at the bottom of the stack so bi_private does not suffice. Rename the existing bi_integrity field to bi_special and make it a union so we can have different bio extensions for each class of command. We previously used bi_integrity != NULL as a way to identify whether a bio had integrity metadata or not. Introduce a REQ_INTEGRITY to be the indicator now that bi_special can contain different things. In addition, bio_integrity(bio) will now return a pointer to the integrity payload (when applicable). Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 +++++++++-- include/linux/blk_types.h | 8 ++++++-- include/linux/blkdev.h | 7 ++----- 3 files changed, 17 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 63e399b4fde5..a810a74071b2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -293,6 +293,15 @@ static inline unsigned bio_segments(struct bio *bio) #define bio_get(bio) atomic_inc(&(bio)->bi_cnt) #if defined(CONFIG_BLK_DEV_INTEGRITY) + +static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) +{ + if (bio->bi_rw & REQ_INTEGRITY) + return bio->bi_integrity; + + return NULL; +} + /* * bio integrity payload */ @@ -661,8 +670,6 @@ struct biovec_slab { for_each_bio(_bio) \ bip_for_each_vec(_bvl, _bio->bi_integrity, _iter) -#define bio_integrity(bio) (bio->bi_integrity != NULL) - extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, unsigned int); extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index bb7d66460e7a..6a5d2f2de1b9 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -78,9 +78,11 @@ struct bio { struct io_context *bi_ioc; struct cgroup_subsys_state *bi_css; #endif + union { #if defined(CONFIG_BLK_DEV_INTEGRITY) - struct bio_integrity_payload *bi_integrity; /* data integrity */ + struct bio_integrity_payload *bi_integrity; /* data integrity */ #endif + }; unsigned short bi_vcnt; /* how many bio_vec's */ @@ -162,6 +164,7 @@ enum rq_flag_bits { __REQ_WRITE_SAME, /* write same block many times */ __REQ_NOIDLE, /* don't anticipate more IO after this one */ + __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ __REQ_FLUSH, /* request for cache flush */ @@ -203,13 +206,14 @@ enum rq_flag_bits { #define REQ_DISCARD (1ULL << __REQ_DISCARD) #define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME) #define REQ_NOIDLE (1ULL << __REQ_NOIDLE) +#define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ - REQ_SECURE) + REQ_SECURE | REQ_INTEGRITY) #define REQ_CLONE_MASK REQ_COMMON_MASK #define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49f3461e4272..7fcb2caef559 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1514,12 +1514,9 @@ static inline struct blk_integrity *blk_get_integrity(struct gendisk *disk) return disk->integrity; } -static inline int blk_integrity_rq(struct request *rq) +static inline bool blk_integrity_rq(struct request *rq) { - if (rq->bio == NULL) - return 0; - - return bio_integrity(rq->bio); + return rq->cmd_flags & REQ_INTEGRITY; } static inline void blk_queue_max_integrity_segments(struct request_queue *q, -- cgit v1.2.3 From 8492b68bc4025e7bce1d57761bd7c047efda2f81 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:19:57 -0400 Subject: block: Remove integrity tagging functions None of the filesystems appear interested in using the integrity tagging feature. Potentially because very few storage devices actually permit using the application tag space. Remove the tagging functions. Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 --- include/linux/blkdev.h | 4 ---- 2 files changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index a810a74071b2..63a0e53e238c 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -362,7 +362,6 @@ extern struct bio *bio_clone_fast(struct bio *, gfp_t, struct bio_set *); extern struct bio *bio_clone_bioset(struct bio *, gfp_t, struct bio_set *bs); extern struct bio_set *fs_bio_set; -unsigned int bio_integrity_tag_size(struct bio *bio); static inline struct bio *bio_alloc(gfp_t gfp_mask, unsigned int nr_iovecs) { @@ -674,8 +673,6 @@ extern struct bio_integrity_payload *bio_integrity_alloc(struct bio *, gfp_t, un extern void bio_integrity_free(struct bio *); extern int bio_integrity_add_page(struct bio *, struct page *, unsigned int, unsigned int); extern bool bio_integrity_enabled(struct bio *bio); -extern int bio_integrity_set_tag(struct bio *, void *, unsigned int); -extern int bio_integrity_get_tag(struct bio *, void *, unsigned int); extern int bio_integrity_prep(struct bio *); extern void bio_integrity_endio(struct bio *, int); extern void bio_integrity_advance(struct bio *, unsigned int); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7fcb2caef559..0bf5d79d9ba0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1472,14 +1472,10 @@ struct blk_integrity_exchg { typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); -typedef void (integrity_set_tag_fn) (void *, void *, unsigned int); -typedef void (integrity_get_tag_fn) (void *, void *, unsigned int); struct blk_integrity { integrity_gen_fn *generate_fn; integrity_vrfy_fn *verify_fn; - integrity_set_tag_fn *set_tag_fn; - integrity_get_tag_fn *get_tag_fn; unsigned short flags; unsigned short tuple_size; -- cgit v1.2.3 From 5f9378fa9ca214977b5bfc12197c67eea450fc40 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:19:58 -0400 Subject: block: Remove bip_buf bip_buf is not really needed so we can remove it. Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 63a0e53e238c..448d8c052cb7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -310,9 +310,6 @@ struct bio_integrity_payload { struct bvec_iter bip_iter; - /* kill - should just use bip_vec */ - void *bip_buf; /* generated integrity data */ - bio_end_io_t *bip_end_io; /* saved I/O completion fn */ unsigned short bip_slab; /* slab the bip came from */ -- cgit v1.2.3 From 3be91c4a3d090bd700bd6ee5bf457c1bbf189a4f Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:19:59 -0400 Subject: block: Deprecate the use of the term sector in the context of block integrity The protection interval is not necessarily tied to the logical block size of a block device. Stop using the terms "sector" and "sectors". Going forward we will use the term "seed" to describe the initial reference tag value for a given I/O. "Interval" will be used to describe the portion of the data buffer that a given piece of protection information is associated with. Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0bf5d79d9ba0..d364c42dbf17 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1464,9 +1464,9 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) struct blk_integrity_exchg { void *prot_buf; void *data_buf; - sector_t sector; + sector_t seed; unsigned int data_size; - unsigned short sector_size; + unsigned short interval; const char *disk_name; }; @@ -1479,7 +1479,7 @@ struct blk_integrity { unsigned short flags; unsigned short tuple_size; - unsigned short sector_size; + unsigned short interval; unsigned short tag_size; const char *name; -- cgit v1.2.3 From 1859308853b19c4daf4afaab910d3d52ac1ec2ff Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:20:01 -0400 Subject: block: Clean up the code used to generate and verify integrity metadata Instead of the "operate" parameter we pass in a seed value and a pointer to a function that can be used to process the integrity metadata. The generation function is changed to have a return value to fit into this scheme. Signed-off-by: Martin K. Petersen Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/bio.h | 12 ++++++++++++ include/linux/blkdev.h | 9 ++++----- 2 files changed, 16 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 448d8c052cb7..3fd36660fd10 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -322,6 +322,18 @@ struct bio_integrity_payload { struct bio_vec *bip_vec; struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ }; + +static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) +{ + return bip->bip_iter.bi_sector; +} + +static inline void bip_set_seed(struct bio_integrity_payload *bip, + sector_t seed) +{ + bip->bip_iter.bi_sector = seed; +} + #endif /* CONFIG_BLK_DEV_INTEGRITY */ extern void bio_trim(struct bio *bio, int offset, int size); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d364c42dbf17..24c1e055b8a7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1461,7 +1461,7 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) #define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ #define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ -struct blk_integrity_exchg { +struct blk_integrity_iter { void *prot_buf; void *data_buf; sector_t seed; @@ -1470,12 +1470,11 @@ struct blk_integrity_exchg { const char *disk_name; }; -typedef void (integrity_gen_fn) (struct blk_integrity_exchg *); -typedef int (integrity_vrfy_fn) (struct blk_integrity_exchg *); +typedef int (integrity_processing_fn) (struct blk_integrity_iter *); struct blk_integrity { - integrity_gen_fn *generate_fn; - integrity_vrfy_fn *verify_fn; + integrity_processing_fn *generate_fn; + integrity_processing_fn *verify_fn; unsigned short flags; unsigned short tuple_size; -- cgit v1.2.3 From 8288f496eb1b1905c425e92eaf1abbb29119217b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:20:02 -0400 Subject: block: Add prefix to block integrity profile flags Add a BLK_ prefix to the integrity profile flags. Also rename the flags to be more consistent with the generate/verify terminology in the rest of the integrity code. Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 24c1e055b8a7..cf92eb031ae9 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1458,8 +1458,10 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) #if defined(CONFIG_BLK_DEV_INTEGRITY) -#define INTEGRITY_FLAG_READ 2 /* verify data integrity on read */ -#define INTEGRITY_FLAG_WRITE 4 /* generate data integrity on write */ +enum blk_integrity_flags { + BLK_INTEGRITY_VERIFY = 1 << 0, + BLK_INTEGRITY_GENERATE = 1 << 1, +}; struct blk_integrity_iter { void *prot_buf; -- cgit v1.2.3 From 3aec2f41a8baeb70aa77556a4e4dcec7d9d70b4d Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:20:03 -0400 Subject: block: Add a disk flag to block integrity profile So far we have relied on the app tag size to determine whether a disk has been formatted with T10 protection information or not. However, not all target devices provide application tag storage. Add a flag to the block integrity profile that indicates whether the disk has been formatted with protection information. Signed-off-by: Martin K. Petersen Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cf92eb031ae9..4600fc63e3fc 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1461,6 +1461,7 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) enum blk_integrity_flags { BLK_INTEGRITY_VERIFY = 1 << 0, BLK_INTEGRITY_GENERATE = 1 << 1, + BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, }; struct blk_integrity_iter { -- cgit v1.2.3 From b1f01388574c9329922f760fc2a7335c2d14b08b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:20:04 -0400 Subject: block: Relocate bio integrity flags Move flags affecting the integrity code out of the bio bi_flags and into the block integrity payload. Signed-off-by: Martin K. Petersen Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/bio.h | 9 ++++++++- include/linux/blk_types.h | 6 ++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 3fd36660fd10..b508cf69206d 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -315,7 +315,7 @@ struct bio_integrity_payload { unsigned short bip_slab; /* slab the bip came from */ unsigned short bip_vcnt; /* # of integrity bio_vecs */ unsigned short bip_max_vcnt; /* integrity bio_vec slots */ - unsigned bip_owns_buf:1; /* should free bip_buf */ + unsigned short bip_flags; /* control flags */ struct work_struct bip_work; /* I/O completion */ @@ -323,6 +323,13 @@ struct bio_integrity_payload { struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ }; +enum bip_flags { + BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ + BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ + BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ + BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ +}; + static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) { return bip->bip_iter.bi_sector; diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6a5d2f2de1b9..38bc008e4503 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -120,10 +120,8 @@ struct bio { #define BIO_USER_MAPPED 6 /* contains user pages */ #define BIO_EOPNOTSUPP 7 /* not supported */ #define BIO_NULL_MAPPED 8 /* contains invalid user pages */ -#define BIO_FS_INTEGRITY 9 /* fs owns integrity data, not block layer */ -#define BIO_QUIET 10 /* Make BIO Quiet */ -#define BIO_MAPPED_INTEGRITY 11/* integrity metadata has been remapped */ -#define BIO_SNAP_STABLE 12 /* bio data must be snapshotted during write */ +#define BIO_QUIET 9 /* Make BIO Quiet */ +#define BIO_SNAP_STABLE 10 /* bio data must be snapshotted during write */ /* * Flags starting here get preserved by bio_reset() - this includes -- cgit v1.2.3 From aae7df50190a640e51bfe11c93f94741ac82ff0b Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:20:05 -0400 Subject: block: Integrity checksum flag Make the choice of checksum a per-I/O property by introducing a flag that can be inspected by the SCSI layer. There are several reasons for this: 1. It allows us to switch choice of checksum without unloading and reloading the HBA driver. 2. During error recovery we need to be able to tell the HBA that checksums read from disk should not be verified and converted to IP checksums. 3. For error injection purposes we need to be able to write a bad guard tag to storage. Since the storage device only supports T10 CRC we need to be able to disable IP checksum conversion on the HBA. Signed-off-by: Martin K. Petersen Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + include/linux/blkdev.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index b508cf69206d..14bff3fe56d4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -328,6 +328,7 @@ enum bip_flags { BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ + BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ }; static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4600fc63e3fc..773df190a4ee 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1462,6 +1462,7 @@ enum blk_integrity_flags { BLK_INTEGRITY_VERIFY = 1 << 0, BLK_INTEGRITY_GENERATE = 1 << 1, BLK_INTEGRITY_DEVICE_CAPABLE = 1 << 2, + BLK_INTEGRITY_IP_CHECKSUM = 1 << 3, }; struct blk_integrity_iter { -- cgit v1.2.3 From 4eaf99beadcefbf126fa05e66fb40fca999e09fd Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:20:06 -0400 Subject: block: Don't merge requests if integrity flags differ We'd occasionally merge requests with conflicting integrity flags. Introduce a merge helper which checks that the requests have compatible integrity payloads. Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 773df190a4ee..038b40f84c7a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1497,10 +1497,10 @@ extern int blk_integrity_compare(struct gendisk *, struct gendisk *); extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, struct scatterlist *); extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); -extern int blk_integrity_merge_rq(struct request_queue *, struct request *, - struct request *); -extern int blk_integrity_merge_bio(struct request_queue *, struct request *, - struct bio *); +extern bool blk_integrity_merge_rq(struct request_queue *, struct request *, + struct request *); +extern bool blk_integrity_merge_bio(struct request_queue *, struct request *, + struct bio *); static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) @@ -1580,15 +1580,15 @@ static inline unsigned short queue_max_integrity_segments(struct request_queue * { return 0; } -static inline int blk_integrity_merge_rq(struct request_queue *rq, - struct request *r1, - struct request *r2) +static inline bool blk_integrity_merge_rq(struct request_queue *rq, + struct request *r1, + struct request *r2) { return 0; } -static inline int blk_integrity_merge_bio(struct request_queue *rq, - struct request *r, - struct bio *b) +static inline bool blk_integrity_merge_bio(struct request_queue *rq, + struct request *r, + struct bio *b) { return 0; } -- cgit v1.2.3 From 2341c2f8c33196d02cf5a721746eea4e3c06674a Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:20:07 -0400 Subject: block: Add T10 Protection Information functions The T10 Protection Information format is also used by some devices that do not go through the SCSI layer (virtual block devices, NVMe). Relocate the relevant functions to a block layer library that can be used without involving SCSI. Signed-off-by: Martin K. Petersen Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/crc-t10dif.h | 5 +++-- include/linux/t10-pi.h | 22 ++++++++++++++++++++++ 2 files changed, 25 insertions(+), 2 deletions(-) create mode 100644 include/linux/t10-pi.h (limited to 'include/linux') diff --git a/include/linux/crc-t10dif.h b/include/linux/crc-t10dif.h index b3cb71f0d3b0..cf53d0773ce3 100644 --- a/include/linux/crc-t10dif.h +++ b/include/linux/crc-t10dif.h @@ -6,7 +6,8 @@ #define CRC_T10DIF_DIGEST_SIZE 2 #define CRC_T10DIF_BLOCK_SIZE 1 -__u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, size_t len); -__u16 crc_t10dif(unsigned char const *, size_t); +extern __u16 crc_t10dif_generic(__u16 crc, const unsigned char *buffer, + size_t len); +extern __u16 crc_t10dif(unsigned char const *, size_t); #endif diff --git a/include/linux/t10-pi.h b/include/linux/t10-pi.h new file mode 100644 index 000000000000..6a8b9942632d --- /dev/null +++ b/include/linux/t10-pi.h @@ -0,0 +1,22 @@ +#ifndef _LINUX_T10_PI_H +#define _LINUX_T10_PI_H + +#include +#include + +/* + * T10 Protection Information tuple. + */ +struct t10_pi_tuple { + __be16 guard_tag; /* Checksum */ + __be16 app_tag; /* Opaque storage */ + __be32 ref_tag; /* Target LBA or indirect LBA */ +}; + + +extern struct blk_integrity t10_pi_type1_crc; +extern struct blk_integrity t10_pi_type1_ip; +extern struct blk_integrity t10_pi_type3_crc; +extern struct blk_integrity t10_pi_type3_ip; + +#endif -- cgit v1.2.3 From c611529e7cd3465ec0eada0f44200e8420c38908 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 26 Sep 2014 19:20:08 -0400 Subject: sd: Honor block layer integrity handling flags A set of flags introduced in the block layer enable better control over how protection information is handled. These flags are useful for both error injection and data recovery purposes. Checking can be enabled and disabled for controller and disk, and the guard tag format is now a per-I/O property. Update sd_protect_op to communicate the relevant information to the low-level device driver via a set of flags in scsi_cmnd. Signed-off-by: Martin K. Petersen Reviewed-by: Sagi Grimberg Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 33 ++++++++++++++++++++++++--------- 1 file changed, 24 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 14bff3fe56d4..ce6b75964b71 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -292,6 +292,14 @@ static inline unsigned bio_segments(struct bio *bio) */ #define bio_get(bio) atomic_inc(&(bio)->bi_cnt) +enum bip_flags { + BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ + BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ + BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ + BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ + BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ +}; + #if defined(CONFIG_BLK_DEV_INTEGRITY) static inline struct bio_integrity_payload *bio_integrity(struct bio *bio) @@ -323,13 +331,15 @@ struct bio_integrity_payload { struct bio_vec bip_inline_vecs[0];/* embedded bvec array */ }; -enum bip_flags { - BIP_BLOCK_INTEGRITY = 1 << 0, /* block layer owns integrity data */ - BIP_MAPPED_INTEGRITY = 1 << 1, /* ref tag has been remapped */ - BIP_CTRL_NOCHECK = 1 << 2, /* disable HBA integrity checking */ - BIP_DISK_NOCHECK = 1 << 3, /* disable disk integrity checking */ - BIP_IP_CHECKSUM = 1 << 4, /* IP checksum */ -}; +static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) +{ + struct bio_integrity_payload *bip = bio_integrity(bio); + + if (bip) + return bip->bip_flags & flag; + + return false; +} static inline sector_t bip_get_seed(struct bio_integrity_payload *bip) { @@ -701,9 +711,9 @@ extern void bio_integrity_init(void); #else /* CONFIG_BLK_DEV_INTEGRITY */ -static inline int bio_integrity(struct bio *bio) +static inline void *bio_integrity(struct bio *bio) { - return 0; + return NULL; } static inline bool bio_integrity_enabled(struct bio *bio) @@ -754,6 +764,11 @@ static inline void bio_integrity_init(void) return; } +static inline bool bio_integrity_flagged(struct bio *bio, enum bip_flags flag) +{ + return false; +} + #endif /* CONFIG_BLK_DEV_INTEGRITY */ #endif /* CONFIG_BLOCK */ -- cgit v1.2.3 From d8f429e1669b9709f5b669aac9d734dbe0640891 Mon Sep 17 00:00:00 2001 From: Junichi Nomura Date: Fri, 3 Oct 2014 17:27:12 -0400 Subject: block: add bioset_create_nobvec() Users of bio_clone_fast() do not want bios with their own bvecs. Allocating a bvec mempool as part of the bioset intended for such users is a waste of memory. bioset_create_nobvec() creates a bioset that doesn't have the bvec mempool. Signed-off-by: Jun'ichi Nomura Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index ce6b75964b71..7347f486ceca 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -378,6 +378,7 @@ static inline struct bio *bio_next_split(struct bio *bio, int sectors, } extern struct bio_set *bioset_create(unsigned int, unsigned int); +extern struct bio_set *bioset_create_nobvec(unsigned int, unsigned int); extern void bioset_free(struct bio_set *); extern mempool_t *biovec_create_pool(int pool_entries); -- cgit v1.2.3 From b8839b8c55f3fdd60dc36abcda7e0266aff7985c Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 8 Oct 2014 18:26:13 -0400 Subject: block: fix alignment_offset math that assumes io_min is a power-of-2 The math in both blk_stack_limits() and queue_limit_alignment_offset() assume that a block device's io_min (aka minimum_io_size) is always a power-of-2. Fix the math such that it works for non-power-of-2 io_min. This issue (of alignment_offset != 0) became apparent when testing dm-thinp with a thinp blocksize that matches a RAID6 stripesize of 1280K. Commit fdfb4c8c1 ("dm thin: set minimum_io_size to pool's data block size") unlocked the potential for alignment_offset != 0 due to the dm-thin-pool's io_min possibly being a non-power-of-2. Signed-off-by: Mike Snitzer Cc: stable@vger.kernel.org Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 038b40f84c7a..554639249fd6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1279,10 +1279,9 @@ static inline int queue_alignment_offset(struct request_queue *q) static inline int queue_limit_alignment_offset(struct queue_limits *lim, sector_t sector) { unsigned int granularity = max(lim->physical_block_size, lim->io_min); - unsigned int alignment = (sector << 9) & (granularity - 1); + unsigned int alignment = sector_div(sector, granularity >> 9) << 9; - return (granularity + lim->alignment_offset - alignment) - & (granularity - 1); + return (granularity + lim->alignment_offset - alignment) % granularity; } static inline int bdev_alignment_offset(struct block_device *bdev) -- cgit v1.2.3 From e19a8a0ad2d255316830ead05b59c5a704434cbb Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 14 Oct 2014 09:00:44 -0600 Subject: block: Remove REQ_KERNEL REQ_KERNEL is no longer used. Remove it and drop the redundant uio argument to nfs_file_direct_{read,write}. Signed-off-by: Martin K. Petersen Cc: Christoph Hellwig Reported-by: Dan Carpenter Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 -- include/linux/fs.h | 2 -- include/linux/nfs_fs.h | 4 ++-- 3 files changed, 2 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 38bc008e4503..445d59231bc4 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -187,7 +187,6 @@ enum rq_flag_bits { __REQ_FLUSH_SEQ, /* request for flush sequence */ __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ - __REQ_KERNEL, /* direct IO to kernel pages */ __REQ_PM, /* runtime pm request */ __REQ_HASHED, /* on IO scheduler merge hash */ __REQ_MQ_INFLIGHT, /* track inflight for MQ */ @@ -241,7 +240,6 @@ enum rq_flag_bits { #define REQ_IO_STAT (1ULL << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE) #define REQ_SECURE (1ULL << __REQ_SECURE) -#define REQ_KERNEL (1ULL << __REQ_KERNEL) #define REQ_PM (1ULL << __REQ_PM) #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) diff --git a/include/linux/fs.h b/include/linux/fs.h index 94187721ad41..9b5bc1cacb8e 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -192,8 +192,6 @@ typedef void (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define READ 0 #define WRITE RW_MASK #define READA RWA_MASK -#define KERNEL_READ (READ|REQ_KERNEL) -#define KERNEL_WRITE (WRITE|REQ_KERNEL) #define READ_SYNC (READ | REQ_SYNC) #define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) diff --git a/include/linux/nfs_fs.h b/include/linux/nfs_fs.h index 5180a7ededec..e6e1c4e0d7f3 100644 --- a/include/linux/nfs_fs.h +++ b/include/linux/nfs_fs.h @@ -464,10 +464,10 @@ extern int nfs3_removexattr (struct dentry *, const char *name); extern ssize_t nfs_direct_IO(int, struct kiocb *, struct iov_iter *, loff_t); extern ssize_t nfs_file_direct_read(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos, bool uio); + loff_t pos); extern ssize_t nfs_file_direct_write(struct kiocb *iocb, struct iov_iter *iter, - loff_t pos, bool uio); + loff_t pos); /* * linux/fs/nfs/dir.c -- cgit v1.2.3