From 4e49ea4a3d276365bf7396c9b77b4d1d5923835a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:31:41 -0500 Subject: block/fs/drivers: remove rw argument from submit_bio This has callers of submit_bio/submit_bio_wait set the bio->bi_rw instead of passing it in. This makes that use the same as generic_make_request and how we set the other bio fields. Signed-off-by: Mike Christie Fixed up fs/ext4/crypto.c Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- include/linux/fs.h | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9faebf7f9a33..3bde94234eea 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -473,7 +473,7 @@ static inline void bio_io_error(struct bio *bio) struct request_queue; extern int bio_phys_segments(struct request_queue *, struct bio *); -extern int submit_bio_wait(int rw, struct bio *bio); +extern int submit_bio_wait(struct bio *bio); extern void bio_advance(struct bio *, unsigned); extern void bio_init(struct bio *); diff --git a/include/linux/fs.h b/include/linux/fs.h index dd288148a6b1..65e4c51ecb3d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2747,7 +2747,7 @@ static inline void remove_inode_hash(struct inode *inode) extern void inode_sb_list_add(struct inode *inode); #ifdef CONFIG_BLOCK -extern blk_qc_t submit_bio(int, struct bio *); +extern blk_qc_t submit_bio(struct bio *); extern int bdev_read_only(struct block_device *); #endif extern int set_blocksize(struct block_device *, int); -- cgit v1.2.3 From f21508211d2b16e65821abd171378fa6ece126fe Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:31:42 -0500 Subject: block: add REQ_OP definitions and helpers The following patches separate the operation (WRITE, READ, DISCARD, etc) from the rq_flag_bits flags. This patch adds definitions for request/bio operations (REQ_OPs) and adds request/bio accessors to get/set the op. In this patch the REQ_OPs match the REQ rq_flag_bits ones for compat reasons while all the code is converted to use the op accessors in the set. In the last patches the op will become a number and the accessors and helpers in this patch will be dropped or updated. Signed-off-by: Mike Christie Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 +++ include/linux/blk_types.h | 24 ++++++++++++++++++++++++ include/linux/blkdev.h | 10 +++++++++- include/linux/fs.h | 26 ++++++++++++++++++++++++-- 4 files changed, 60 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 3bde94234eea..09c5308494a6 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -44,6 +44,9 @@ #define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_SHIFT) #define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9) +#define bio_op(bio) (op_from_rq_bits((bio)->bi_rw)) +#define bio_set_op_attrs(bio, op, flags) ((bio)->bi_rw |= (op | flags)) + /* * upper 16 bits of bi_rw define the io priority of this bio */ diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 77e5d81f07aa..6e60baa583da 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -242,6 +242,30 @@ enum rq_flag_bits { #define REQ_HASHED (1ULL << __REQ_HASHED) #define REQ_MQ_INFLIGHT (1ULL << __REQ_MQ_INFLIGHT) +enum req_op { + REQ_OP_READ, + REQ_OP_WRITE = REQ_WRITE, + REQ_OP_DISCARD = REQ_DISCARD, + REQ_OP_WRITE_SAME = REQ_WRITE_SAME, +}; + +/* + * tmp cpmpat. Users used to set the write bit for all non reads, but + * we will be dropping the bitmap use for ops. Support both until + * the end of the patchset. + */ +static inline int op_from_rq_bits(u64 flags) +{ + if (flags & REQ_OP_DISCARD) + return REQ_OP_DISCARD; + else if (flags & REQ_OP_WRITE_SAME) + return REQ_OP_WRITE_SAME; + else if (flags & REQ_OP_WRITE) + return REQ_OP_WRITE; + else + return REQ_OP_READ; +} + typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U #define BLK_QC_T_SHIFT 16 diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3d9cf326574f..49c2dbcad583 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -200,6 +200,13 @@ struct request { struct request *next_rq; }; +#define req_op(req) (op_from_rq_bits((req)->cmd_flags)) +#define req_set_op(req, op) ((req)->cmd_flags |= op) +#define req_set_op_attrs(req, op, flags) do { \ + req_set_op(req, op); \ + (req)->cmd_flags |= flags; \ +} while (0) + static inline unsigned short req_get_ioprio(struct request *req) { return req->ioprio; @@ -597,7 +604,8 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -#define rq_data_dir(rq) ((int)((rq)->cmd_flags & 1)) +#define rq_data_dir(rq) \ + (op_is_write(op_from_rq_bits(rq->cmd_flags)) ? WRITE : READ) /* * Driver can handle struct request, if it either has an old style diff --git a/include/linux/fs.h b/include/linux/fs.h index 65e4c51ecb3d..62ca2f9cad95 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2464,15 +2464,37 @@ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK +/* + * tmp cpmpat. Users used to set the write bit for all non reads, but + * we will be dropping the bitmap use for ops. Support both until + * the end of the patchset. + */ +static inline bool op_is_write(unsigned long flags) +{ + if (flags & (REQ_OP_WRITE | REQ_OP_WRITE_SAME | REQ_OP_DISCARD)) + return true; + else + return false; +} + /* * return READ, READA, or WRITE */ -#define bio_rw(bio) ((bio)->bi_rw & (RW_MASK | RWA_MASK)) +static inline int bio_rw(struct bio *bio) +{ + if (op_is_write(op_from_rq_bits(bio->bi_rw))) + return WRITE; + + return bio->bi_rw & RWA_MASK; +} /* * return data direction, READ or WRITE */ -#define bio_data_dir(bio) ((bio)->bi_rw & 1) +static inline int bio_data_dir(struct bio *bio) +{ + return op_is_write(op_from_rq_bits(bio->bi_rw)) ? WRITE : READ; +} extern void check_disk_size_change(struct gendisk *disk, struct block_device *bdev); -- cgit v1.2.3 From 2a222ca992c35aee1e83af428f3dd26a3f5d5d94 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:31:43 -0500 Subject: fs: have submit_bh users pass in op and flags separately This has submit_bh users pass in the operation and flags separately, so submit_bh_wbc can setup the bio op and bi_rw flags on the bio that is submitted. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index d48daa3f6f20..bc9a45d1e3af 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -189,10 +189,11 @@ void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); void ll_rw_block(int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); -int __sync_dirty_buffer(struct buffer_head *bh, int rw); -void write_dirty_buffer(struct buffer_head *bh, int rw); -int _submit_bh(int rw, struct buffer_head *bh, unsigned long bio_flags); -int submit_bh(int, struct buffer_head *); +int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); +void write_dirty_buffer(struct buffer_head *bh, int op_flags); +int _submit_bh(int op, int op_flags, struct buffer_head *bh, + unsigned long bio_flags); +int submit_bh(int, int, struct buffer_head *); void write_boundary_block(struct block_device *bdev, sector_t bblock, unsigned blocksize); int bh_uptodate_or_lock(struct buffer_head *bh); -- cgit v1.2.3 From dfec8a14fc9043039e3c04807caf39dc71102816 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:31:44 -0500 Subject: fs: have ll_rw_block users pass in op and flags separately This has ll_rw_block users pass in the operation and flags separately, so ll_rw_block can setup the bio op and bi_rw flags on the bio that is submitted. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/buffer_head.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index bc9a45d1e3af..d1dd29338ba0 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -187,7 +187,7 @@ struct buffer_head *alloc_buffer_head(gfp_t gfp_flags); void free_buffer_head(struct buffer_head * bh); void unlock_buffer(struct buffer_head *bh); void __lock_buffer(struct buffer_head *bh); -void ll_rw_block(int, int, struct buffer_head * bh[]); +void ll_rw_block(int, int, int, struct buffer_head * bh[]); int sync_dirty_buffer(struct buffer_head *bh); int __sync_dirty_buffer(struct buffer_head *bh, int op_flags); void write_dirty_buffer(struct buffer_head *bh, int op_flags); -- cgit v1.2.3 From a8ebb056a8aeb58aafef0af241a6b3ac34ac86bd Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:31:45 -0500 Subject: block, drivers, cgroup: use op_is_write helper instead of checking for REQ_WRITE We currently set REQ_WRITE/WRITE for all non READ IOs like discard, flush, writesame, etc. In the next patches where we no longer set up the op as a bitmap, we will not be able to detect a operation direction like writesame by testing if REQ_WRITE is set. This patch converts the drivers and cgroup to use the op_is_write helper. This should just cover the simple cases. I did dm, md and bcache in their own patches because they were more involved. Signed-off-by: Mike Christie Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c02e669945e9..04f9c8d64060 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -601,7 +601,7 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, { struct percpu_counter *cnt; - if (rw & REQ_WRITE) + if (op_is_write(rw)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; -- cgit v1.2.3 From 95fe6c1a209ef89d9f94dd04a0ad72be1487d5d5 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:31:48 -0500 Subject: block, fs, mm, drivers: use bio set/get op accessors This patch converts the simple bi_rw use cases in the block, drivers, mm and fs code to set/get the bio operation using bio_set_op_attrs/bio_op These should be simple one or two liner cases, so I just did them in one patch. The next patches handle the more complicated cases in a module per patch. Signed-off-by: Mike Christie Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/bio.h | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 09c5308494a6..4568647269a7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -109,18 +109,23 @@ static inline bool bio_has_data(struct bio *bio) { if (bio && bio->bi_iter.bi_size && - !(bio->bi_rw & REQ_DISCARD)) + bio_op(bio) != REQ_OP_DISCARD) return true; return false; } +static inline bool bio_no_advance_iter(struct bio *bio) +{ + return bio_op(bio) == REQ_OP_DISCARD || bio_op(bio) == REQ_OP_WRITE_SAME; +} + static inline bool bio_is_rw(struct bio *bio) { if (!bio_has_data(bio)) return false; - if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK) + if (bio_no_advance_iter(bio)) return false; return true; @@ -228,7 +233,7 @@ static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, { iter->bi_sector += bytes >> 9; - if (bio->bi_rw & BIO_NO_ADVANCE_ITER_MASK) + if (bio_no_advance_iter(bio)) iter->bi_size -= bytes; else bvec_iter_advance(bio->bi_io_vec, iter, bytes); @@ -256,10 +261,10 @@ static inline unsigned bio_segments(struct bio *bio) * differently: */ - if (bio->bi_rw & REQ_DISCARD) + if (bio_op(bio) == REQ_OP_DISCARD) return 1; - if (bio->bi_rw & REQ_WRITE_SAME) + if (bio_op(bio) == REQ_OP_WRITE_SAME) return 1; bio_for_each_segment(bv, bio, iter) -- cgit v1.2.3 From 469e3216e20a3946a292ff0414ab86de408d465e Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:31:49 -0500 Subject: block discard: use bio set op accessor This converts the block issue discard helper and users to use the bio_set_op_attrs accessor and only pass in the operation flags like REQ_SEQURE. Signed-off-by: Mike Christie Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 49c2dbcad583..8c78aca080af 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1149,7 +1149,8 @@ extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int __blkdev_issue_discard(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, int type, struct bio **biop); + sector_t nr_sects, gfp_t gfp_mask, int op_flags, + struct bio **biop); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, -- cgit v1.2.3 From 8a4c1e42e0eb7189296ac140761370e2549cffe7 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:31:50 -0500 Subject: direct-io: use bio set/get op accessors This patch has the dio code use a REQ_OP for the op and rq_flag_bits for bi_rw flags. To set/get the op it uses the bio_set_op_attrs/bio_op accssors. It also begins to convert btrfs's dio_submit_t because of the dio submit_io callout use. The next patches will completely convert this code and the reset of the btrfs code paths. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 62ca2f9cad95..af6f3c7e4822 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2824,7 +2824,7 @@ extern int generic_file_open(struct inode * inode, struct file * filp); extern int nonseekable_open(struct inode * inode, struct file * filp); #ifdef CONFIG_BLOCK -typedef void (dio_submit_t)(int rw, struct bio *bio, struct inode *inode, +typedef void (dio_submit_t)(struct bio *bio, struct inode *inode, loff_t file_offset); enum { -- cgit v1.2.3 From e6047149db702374f240dc18bab665479e25a8cc Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:04 -0500 Subject: dm: use bio op accessors Separate the op from the rq_flag_bits and have dm set/get the bio using bio_set_op_attrs/bio_op. Signed-off-by: Mike Christie Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/dm-io.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/dm-io.h b/include/linux/dm-io.h index a68cbe59e6ad..b91b023deffb 100644 --- a/include/linux/dm-io.h +++ b/include/linux/dm-io.h @@ -57,7 +57,8 @@ struct dm_io_notify { */ struct dm_io_client; struct dm_io_request { - int bi_rw; /* READ|WRITE - not READA */ + int bi_op; /* REQ_OP */ + int bi_op_flags; /* rq_flag_bits */ struct dm_io_memory mem; /* Memory to use for io */ struct dm_io_notify notify; /* Synchronous if notify.fn is NULL */ struct dm_io_client *client; /* Client memory handler */ -- cgit v1.2.3 From ba568ea0a2ef9a193ca24874228474ec7ae2ff98 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:13 -0500 Subject: block: prepare elevator to use REQ_OPs. This patch converts the elevator code to use separate variables for the operation and flags, and to check req_op for the REQ_OP. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/elevator.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 638b324f0291..953d28647435 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -26,7 +26,7 @@ typedef int (elevator_dispatch_fn) (struct request_queue *, int); typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); -typedef int (elevator_may_queue_fn) (struct request_queue *, int); +typedef int (elevator_may_queue_fn) (struct request_queue *, int, int); typedef void (elevator_init_icq_fn) (struct io_cq *); typedef void (elevator_exit_icq_fn) (struct io_cq *); @@ -134,7 +134,7 @@ extern struct request *elv_former_request(struct request_queue *, struct request extern struct request *elv_latter_request(struct request_queue *, struct request *); extern int elv_register_queue(struct request_queue *q); extern void elv_unregister_queue(struct request_queue *q); -extern int elv_may_queue(struct request_queue *, int); +extern int elv_may_queue(struct request_queue *, int, int); extern void elv_completed_request(struct request_queue *, struct request *); extern int elv_set_request(struct request_queue *q, struct request *rq, struct bio *bio, gfp_t gfp_mask); -- cgit v1.2.3 From 63a4cc24867de73626e16767ce616c50dc5438d3 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:14 -0500 Subject: blkg_rwstat: separate op from flags The bio and request operation and flags are going to be separate definitions, so we cannot pass them in as a bitmap. This patch converts the blkg_rwstat code and its caller, cfq, to pass in the values separately. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 04f9c8d64060..f77150a4a96a 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -590,25 +590,26 @@ static inline void blkg_rwstat_exit(struct blkg_rwstat *rwstat) /** * blkg_rwstat_add - add a value to a blkg_rwstat * @rwstat: target blkg_rwstat - * @rw: mask of REQ_{WRITE|SYNC} + * @op: REQ_OP + * @op_flags: rq_flag_bits * @val: value to add * * Add @val to @rwstat. The counters are chosen according to @rw. The * caller is responsible for synchronizing calls to this function. */ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, - int rw, uint64_t val) + int op, int op_flags, uint64_t val) { struct percpu_counter *cnt; - if (op_is_write(rw)) + if (op_is_write(op)) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_WRITE]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_READ]; __percpu_counter_add(cnt, val, BLKG_STAT_CPU_BATCH); - if (rw & REQ_SYNC) + if (op_flags & REQ_SYNC) cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_SYNC]; else cnt = &rwstat->cpu_cnt[BLKG_RWSTAT_ASYNC]; @@ -713,9 +714,9 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, if (!throtl) { blkg = blkg ?: q->root_blkg; - blkg_rwstat_add(&blkg->stat_bytes, bio->bi_rw, + blkg_rwstat_add(&blkg->stat_bytes, bio_op(bio), bio->bi_rw, bio->bi_iter.bi_size); - blkg_rwstat_add(&blkg->stat_ios, bio->bi_rw, 1); + blkg_rwstat_add(&blkg->stat_ios, bio_op(bio), bio->bi_rw, 1); } rcu_read_unlock(); -- cgit v1.2.3 From 8fe0d473f5477e9916d3ac581a226acfe83142be Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:15 -0500 Subject: block: convert merge/insert code to check for REQ_OPs. This patch converts the block layer merging code to use separate variables for the operation and flags, and to check req_op for the REQ_OP. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 20 ++++++++++---------- 1 file changed, 10 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8c78aca080af..25f01ff19780 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -666,16 +666,16 @@ static inline bool rq_mergeable(struct request *rq) return true; } -static inline bool blk_check_merge_flags(unsigned int flags1, - unsigned int flags2) +static inline bool blk_check_merge_flags(unsigned int flags1, unsigned int op1, + unsigned int flags2, unsigned int op2) { - if ((flags1 & REQ_DISCARD) != (flags2 & REQ_DISCARD)) + if ((op1 == REQ_OP_DISCARD) != (op2 == REQ_OP_DISCARD)) return false; if ((flags1 & REQ_SECURE) != (flags2 & REQ_SECURE)) return false; - if ((flags1 & REQ_WRITE_SAME) != (flags2 & REQ_WRITE_SAME)) + if ((op1 == REQ_OP_WRITE_SAME) != (op2 == REQ_OP_WRITE_SAME)) return false; return true; @@ -887,12 +887,12 @@ static inline unsigned int blk_rq_cur_sectors(const struct request *rq) } static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, - unsigned int cmd_flags) + int op) { - if (unlikely(cmd_flags & REQ_DISCARD)) + if (unlikely(op == REQ_OP_DISCARD)) return min(q->limits.max_discard_sectors, UINT_MAX >> 9); - if (unlikely(cmd_flags & REQ_WRITE_SAME)) + if (unlikely(op == REQ_OP_WRITE_SAME)) return q->limits.max_write_same_sectors; return q->limits.max_sectors; @@ -919,11 +919,11 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq) if (unlikely(rq->cmd_type != REQ_TYPE_FS)) return q->limits.max_hw_sectors; - if (!q->limits.chunk_sectors || (rq->cmd_flags & REQ_DISCARD)) - return blk_queue_get_max_sectors(q, rq->cmd_flags); + if (!q->limits.chunk_sectors || (req_op(rq) == REQ_OP_DISCARD)) + return blk_queue_get_max_sectors(q, req_op(rq)); return min(blk_max_size_offset(q, blk_rq_pos(rq)), - blk_queue_get_max_sectors(q, rq->cmd_flags)); + blk_queue_get_max_sectors(q, req_op(rq))); } static inline unsigned int blk_rq_count_bios(struct request *rq) -- cgit v1.2.3 From d9d8c5c489f4969667a05727e9c2c4f78cffef1a Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:16 -0500 Subject: block: convert is_sync helpers to use REQ_OPs. This patch converts the is_sync helpers to use separate variables for the operation and flags. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 25f01ff19780..4937c056fcbd 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -624,14 +624,14 @@ static inline unsigned int blk_queue_cluster(struct request_queue *q) /* * We regard a request as sync, if either a read or a sync write */ -static inline bool rw_is_sync(unsigned int rw_flags) +static inline bool rw_is_sync(int op, unsigned int rw_flags) { - return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC); + return op == REQ_OP_READ || (rw_flags & REQ_SYNC); } static inline bool rq_is_sync(struct request *rq) { - return rw_is_sync(rq->cmd_flags); + return rw_is_sync(req_op(rq), rq->cmd_flags); } static inline bool blk_rl_full(struct request_list *rl, bool sync) -- cgit v1.2.3 From 1b9a9ab78b0ab79dc1f0ddd5fbed7833ec7de3a4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:18 -0500 Subject: blktrace: use op accessors Have blktrace use the req/bio op accessor to get the REQ_OP. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blktrace_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h index 0f3172b8b225..cceb72f9e29f 100644 --- a/include/linux/blktrace_api.h +++ b/include/linux/blktrace_api.h @@ -118,7 +118,7 @@ static inline int blk_cmd_buf_len(struct request *rq) } extern void blk_dump_cmd(char *buf, struct request *rq); -extern void blk_fill_rwbs(char *rwbs, u32 rw, int bytes); +extern void blk_fill_rwbs(char *rwbs, int op, u32 rw, int bytes); #endif /* CONFIG_EVENT_TRACING && CONFIG_BLOCK */ -- cgit v1.2.3 From 43b62ce3ff0ac1f13b732cb1bd130f522af1dba4 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:20 -0500 Subject: block: move bio io prio to a new field In the next patch, we move drop the compat code and make the op a separate value that is hidden in bi_rw. To give the op and rq bits flags room to grow this moves prio to its own field. Signed-off-by: Mike Christie Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/bio.h | 14 ++------------ include/linux/blk_types.h | 5 ++--- 2 files changed, 4 insertions(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 4568647269a7..35108c2f8ea9 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -47,18 +47,8 @@ #define bio_op(bio) (op_from_rq_bits((bio)->bi_rw)) #define bio_set_op_attrs(bio, op, flags) ((bio)->bi_rw |= (op | flags)) -/* - * upper 16 bits of bi_rw define the io priority of this bio - */ -#define BIO_PRIO_SHIFT (8 * sizeof(unsigned long) - IOPRIO_BITS) -#define bio_prio(bio) ((bio)->bi_rw >> BIO_PRIO_SHIFT) -#define bio_prio_valid(bio) ioprio_valid(bio_prio(bio)) - -#define bio_set_prio(bio, prio) do { \ - WARN_ON(prio >= (1 << IOPRIO_BITS)); \ - (bio)->bi_rw &= ((1UL << BIO_PRIO_SHIFT) - 1); \ - (bio)->bi_rw |= ((unsigned long) (prio) << BIO_PRIO_SHIFT); \ -} while (0) +#define bio_prio(bio) (bio)->bi_ioprio +#define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) /* * various member access, note that bio_data should of course not be used diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 6e60baa583da..27384134f8a0 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -48,9 +48,8 @@ struct bio { struct block_device *bi_bdev; unsigned int bi_flags; /* status, command, etc */ int bi_error; - unsigned long bi_rw; /* bottom bits READ/WRITE, - * top bits priority - */ + unsigned long bi_rw; /* READ/WRITE */ + unsigned short bi_ioprio; struct bvec_iter bi_iter; -- cgit v1.2.3 From 6296b9604fcebc2dd8d6ec396de80b2da84d9700 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:21 -0500 Subject: block, drivers, fs: shrink bi_rw from long to int We don't need bi_rw to be so large on 64 bit archs, so reduce it to unsigned int. Signed-off-by: Mike Christie Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 27384134f8a0..5efb6f12d2f5 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -48,7 +48,7 @@ struct bio { struct block_device *bi_bdev; unsigned int bi_flags; /* status, command, etc */ int bi_error; - unsigned long bi_rw; /* READ/WRITE */ + unsigned int bi_rw; /* READ/WRITE */ unsigned short bi_ioprio; struct bvec_iter bi_iter; -- cgit v1.2.3 From 4e1b2d52a80d79296a5d899d73249748dea71a53 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:22 -0500 Subject: block, fs, drivers: remove REQ_OP compat defs and related code This patch drops the compat definition of req_op where it matches the rq_flag_bits definitions, and drops the related old and compat code that allowed users to set either the op or flags for the operation. We also then store the operation in the bi_rw/cmd_flags field similar to how we used to store the bio ioprio where it sat in the upper bits of the field. Signed-off-by: Mike Christie Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 --- include/linux/blk_types.h | 52 ++++++++++++++++++----------------------------- include/linux/blkdev.h | 14 +++++++++---- include/linux/fs.h | 37 ++++++++++++++------------------- 4 files changed, 45 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 35108c2f8ea9..0bbb2e332410 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -44,9 +44,6 @@ #define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_SHIFT) #define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9) -#define bio_op(bio) (op_from_rq_bits((bio)->bi_rw)) -#define bio_set_op_attrs(bio, op, flags) ((bio)->bi_rw |= (op | flags)) - #define bio_prio(bio) (bio)->bi_ioprio #define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 5efb6f12d2f5..23c1ab2a9475 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -48,7 +48,9 @@ struct bio { struct block_device *bi_bdev; unsigned int bi_flags; /* status, command, etc */ int bi_error; - unsigned int bi_rw; /* READ/WRITE */ + unsigned int bi_rw; /* bottom bits req flags, + * top bits REQ_OP + */ unsigned short bi_ioprio; struct bvec_iter bi_iter; @@ -106,6 +108,16 @@ struct bio { struct bio_vec bi_inline_vecs[0]; }; +#define BIO_OP_SHIFT (8 * sizeof(unsigned int) - REQ_OP_BITS) +#define bio_op(bio) ((bio)->bi_rw >> BIO_OP_SHIFT) + +#define bio_set_op_attrs(bio, op, op_flags) do { \ + WARN_ON(op >= (1 << REQ_OP_BITS)); \ + (bio)->bi_rw &= ((1 << BIO_OP_SHIFT) - 1); \ + (bio)->bi_rw |= ((unsigned int) (op) << BIO_OP_SHIFT); \ + (bio)->bi_rw |= op_flags; \ +} while (0) + #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) /* @@ -144,7 +156,6 @@ struct bio { */ enum rq_flag_bits { /* common flags */ - __REQ_WRITE, /* not set, read. set, write */ __REQ_FAILFAST_DEV, /* no driver retries of device errors */ __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ @@ -152,9 +163,7 @@ enum rq_flag_bits { __REQ_SYNC, /* request is sync (sync write or read) */ __REQ_META, /* metadata io request */ __REQ_PRIO, /* boost priority in cfq */ - __REQ_DISCARD, /* request to discard sectors */ - __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ - __REQ_WRITE_SAME, /* write same block many times */ + __REQ_SECURE, /* secure discard (used with REQ_OP_DISCARD) */ __REQ_NOIDLE, /* don't anticipate more IO after this one */ __REQ_INTEGRITY, /* I/O includes block integrity payload */ @@ -190,28 +199,22 @@ enum rq_flag_bits { __REQ_NR_BITS, /* stops here */ }; -#define REQ_WRITE (1ULL << __REQ_WRITE) #define REQ_FAILFAST_DEV (1ULL << __REQ_FAILFAST_DEV) #define REQ_FAILFAST_TRANSPORT (1ULL << __REQ_FAILFAST_TRANSPORT) #define REQ_FAILFAST_DRIVER (1ULL << __REQ_FAILFAST_DRIVER) #define REQ_SYNC (1ULL << __REQ_SYNC) #define REQ_META (1ULL << __REQ_META) #define REQ_PRIO (1ULL << __REQ_PRIO) -#define REQ_DISCARD (1ULL << __REQ_DISCARD) -#define REQ_WRITE_SAME (1ULL << __REQ_WRITE_SAME) #define REQ_NOIDLE (1ULL << __REQ_NOIDLE) #define REQ_INTEGRITY (1ULL << __REQ_INTEGRITY) #define REQ_FAILFAST_MASK \ (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ - (REQ_WRITE | REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | \ - REQ_DISCARD | REQ_WRITE_SAME | REQ_NOIDLE | REQ_FLUSH | REQ_FUA | \ - REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE) + (REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \ + REQ_FLUSH | REQ_FUA | REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE) #define REQ_CLONE_MASK REQ_COMMON_MASK -#define BIO_NO_ADVANCE_ITER_MASK (REQ_DISCARD|REQ_WRITE_SAME) - /* This mask is used for both bio and request merge checking */ #define REQ_NOMERGE_FLAGS \ (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ) @@ -243,27 +246,12 @@ enum rq_flag_bits { enum req_op { REQ_OP_READ, - REQ_OP_WRITE = REQ_WRITE, - REQ_OP_DISCARD = REQ_DISCARD, - REQ_OP_WRITE_SAME = REQ_WRITE_SAME, + REQ_OP_WRITE, + REQ_OP_DISCARD, /* request to discard sectors */ + REQ_OP_WRITE_SAME, /* write same block many times */ }; -/* - * tmp cpmpat. Users used to set the write bit for all non reads, but - * we will be dropping the bitmap use for ops. Support both until - * the end of the patchset. - */ -static inline int op_from_rq_bits(u64 flags) -{ - if (flags & REQ_OP_DISCARD) - return REQ_OP_DISCARD; - else if (flags & REQ_OP_WRITE_SAME) - return REQ_OP_WRITE_SAME; - else if (flags & REQ_OP_WRITE) - return REQ_OP_WRITE; - else - return REQ_OP_READ; -} +#define REQ_OP_BITS 2 typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4937c056fcbd..78ae3dbf2de1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -200,8 +200,15 @@ struct request { struct request *next_rq; }; -#define req_op(req) (op_from_rq_bits((req)->cmd_flags)) -#define req_set_op(req, op) ((req)->cmd_flags |= op) +#define REQ_OP_SHIFT (8 * sizeof(u64) - REQ_OP_BITS) +#define req_op(req) ((req)->cmd_flags >> REQ_OP_SHIFT) + +#define req_set_op(req, op) do { \ + WARN_ON(op >= (1 << REQ_OP_BITS)); \ + (req)->cmd_flags &= ((1ULL << REQ_OP_SHIFT) - 1); \ + (req)->cmd_flags |= ((u64) (op) << REQ_OP_SHIFT); \ +} while (0) + #define req_set_op_attrs(req, op, flags) do { \ req_set_op(req, op); \ (req)->cmd_flags |= flags; \ @@ -604,8 +611,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -#define rq_data_dir(rq) \ - (op_is_write(op_from_rq_bits(rq->cmd_flags)) ? WRITE : READ) +#define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) /* * Driver can handle struct request, if it either has an old style diff --git a/include/linux/fs.h b/include/linux/fs.h index af6f3c7e4822..ccd166477487 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -152,9 +152,10 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define CHECK_IOVEC_ONLY -1 /* - * The below are the various read and write types that we support. Some of + * The below are the various read and write flags that we support. Some of * them include behavioral modifiers that send information down to the - * block layer and IO scheduler. Terminology: + * block layer and IO scheduler. They should be used along with a req_op. + * Terminology: * * The block layer uses device plugging to defer IO a little bit, in * the hope that we will see more IO very shortly. This increases @@ -193,19 +194,19 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, * non-volatile media on completion. * */ -#define RW_MASK REQ_WRITE +#define RW_MASK REQ_OP_WRITE #define RWA_MASK REQ_RAHEAD -#define READ 0 +#define READ REQ_OP_READ #define WRITE RW_MASK #define READA RWA_MASK -#define READ_SYNC (READ | REQ_SYNC) -#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) -#define WRITE_ODIRECT (WRITE | REQ_SYNC) -#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) -#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) -#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define READ_SYNC REQ_SYNC +#define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) +#define WRITE_ODIRECT REQ_SYNC +#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) +#define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA) +#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) /* * Attribute flags. These should be or-ed together to figure out what @@ -2464,17 +2465,9 @@ extern void make_bad_inode(struct inode *); extern bool is_bad_inode(struct inode *); #ifdef CONFIG_BLOCK -/* - * tmp cpmpat. Users used to set the write bit for all non reads, but - * we will be dropping the bitmap use for ops. Support both until - * the end of the patchset. - */ -static inline bool op_is_write(unsigned long flags) +static inline bool op_is_write(unsigned int op) { - if (flags & (REQ_OP_WRITE | REQ_OP_WRITE_SAME | REQ_OP_DISCARD)) - return true; - else - return false; + return op == REQ_OP_READ ? false : true; } /* @@ -2482,7 +2475,7 @@ static inline bool op_is_write(unsigned long flags) */ static inline int bio_rw(struct bio *bio) { - if (op_is_write(op_from_rq_bits(bio->bi_rw))) + if (op_is_write(bio_op(bio))) return WRITE; return bio->bi_rw & RWA_MASK; @@ -2493,7 +2486,7 @@ static inline int bio_rw(struct bio *bio) */ static inline int bio_data_dir(struct bio *bio) { - return op_is_write(op_from_rq_bits(bio->bi_rw)) ? WRITE : READ; + return op_is_write(bio_op(bio)) ? WRITE : READ; } extern void check_disk_size_change(struct gendisk *disk, -- cgit v1.2.3 From 3a5e02ced11e22ecd9da3d6710afe15bcfee1d10 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:23 -0500 Subject: block, drivers: add REQ_OP_FLUSH operation This adds a REQ_OP_FLUSH operation that is sent to request_fn based drivers by the block layer's flush code, instead of sending requests with the request->cmd_flags REQ_FLUSH bit set. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 ++- include/linux/blkdev.h | 3 +++ 2 files changed, 5 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 23c1ab2a9475..32d87522f349 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -249,9 +249,10 @@ enum req_op { REQ_OP_WRITE, REQ_OP_DISCARD, /* request to discard sectors */ REQ_OP_WRITE_SAME, /* write same block many times */ + REQ_OP_FLUSH, /* request for cache flush */ }; -#define REQ_OP_BITS 2 +#define REQ_OP_BITS 3 typedef unsigned int blk_qc_t; #define BLK_QC_T_NONE -1U diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 78ae3dbf2de1..0c9f8793c87e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -666,6 +666,9 @@ static inline bool rq_mergeable(struct request *rq) if (rq->cmd_type != REQ_TYPE_FS) return false; + if (req_op(rq) == REQ_OP_FLUSH) + return false; + if (rq->cmd_flags & REQ_NOMERGE_FLAGS) return false; -- cgit v1.2.3 From 28a8f0d317bf225ff15008f5dd66ae16242dd843 Mon Sep 17 00:00:00 2001 From: Mike Christie Date: Sun, 5 Jun 2016 14:32:25 -0500 Subject: block, drivers, fs: rename REQ_FLUSH to REQ_PREFLUSH To avoid confusion between REQ_OP_FLUSH, which is handled by request_fn drivers, and upper layers requesting the block layer perform a flush sequence along with possibly a WRITE, this patch renames REQ_FLUSH to REQ_PREFLUSH. Signed-off-by: Mike Christie Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 8 ++++---- include/linux/fs.h | 4 ++-- 2 files changed, 6 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 32d87522f349..562ab8301217 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -168,7 +168,7 @@ enum rq_flag_bits { __REQ_NOIDLE, /* don't anticipate more IO after this one */ __REQ_INTEGRITY, /* I/O includes block integrity payload */ __REQ_FUA, /* forced unit access */ - __REQ_FLUSH, /* request for cache flush */ + __REQ_PREFLUSH, /* request for cache flush */ /* bio only flags */ __REQ_RAHEAD, /* read ahead, can fail anytime */ @@ -212,12 +212,12 @@ enum rq_flag_bits { (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | REQ_FAILFAST_DRIVER) #define REQ_COMMON_MASK \ (REQ_FAILFAST_MASK | REQ_SYNC | REQ_META | REQ_PRIO | REQ_NOIDLE | \ - REQ_FLUSH | REQ_FUA | REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE) + REQ_PREFLUSH | REQ_FUA | REQ_SECURE | REQ_INTEGRITY | REQ_NOMERGE) #define REQ_CLONE_MASK REQ_COMMON_MASK /* This mask is used for both bio and request merge checking */ #define REQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA | REQ_FLUSH_SEQ) + (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_PREFLUSH | REQ_FUA | REQ_FLUSH_SEQ) #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_THROTTLED (1ULL << __REQ_THROTTLED) @@ -235,7 +235,7 @@ enum rq_flag_bits { #define REQ_PREEMPT (1ULL << __REQ_PREEMPT) #define REQ_ALLOCED (1ULL << __REQ_ALLOCED) #define REQ_COPY_USER (1ULL << __REQ_COPY_USER) -#define REQ_FLUSH (1ULL << __REQ_FLUSH) +#define REQ_PREFLUSH (1ULL << __REQ_PREFLUSH) #define REQ_FLUSH_SEQ (1ULL << __REQ_FLUSH_SEQ) #define REQ_IO_STAT (1ULL << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1ULL << __REQ_MIXED_MERGE) diff --git a/include/linux/fs.h b/include/linux/fs.h index ccd166477487..183024525d40 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -204,9 +204,9 @@ typedef int (dio_iodone_t)(struct kiocb *iocb, loff_t offset, #define READ_SYNC REQ_SYNC #define WRITE_SYNC (REQ_SYNC | REQ_NOIDLE) #define WRITE_ODIRECT REQ_SYNC -#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) +#define WRITE_FLUSH (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH) #define WRITE_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FUA) -#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) +#define WRITE_FLUSH_FUA (REQ_SYNC | REQ_NOIDLE | REQ_PREFLUSH | REQ_FUA) /* * Attribute flags. These should be or-ed together to figure out what -- cgit v1.2.3 From ca93e45347e61cd85e5d71961f6c94fad143593d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 9 Jun 2016 16:00:35 +0200 Subject: block: better packing for struct request Keep the 32-bit CPU and cmd_type flags together to avoid holes on 64-bit architectures. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0c9f8793c87e..9746d223494c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -96,12 +96,11 @@ struct request { struct request_queue *q; struct blk_mq_ctx *mq_ctx; - u64 cmd_flags; + int cpu; unsigned cmd_type; + u64 cmd_flags; unsigned long atomic_flags; - int cpu; - /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ sector_t __sector; /* sector cursor */ -- cgit v1.2.3 From 8fc554552c9d89e7bf76cd4cbc0085648bf3125b Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 9 Jun 2016 10:00:58 -0600 Subject: block: move bvec iterator into include/linux/bvec.h bvec iterator helpers should be used to implement by iterate_bvec():lib/iov_iter.c too, and move them into one header, so that we can keep bvec iterator header out of CONFIG_BLOCK. Then we can remove the reinventing of wheel in iterate_bvec(). Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Tested-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/bio.h | 51 +----------------------------------- include/linux/bvec.h | 74 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 75 insertions(+), 50 deletions(-) create mode 100644 include/linux/bvec.h (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 0bbb2e332410..8e0f677c26d2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -31,6 +31,7 @@ /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include +#include #define BIO_DEBUG @@ -47,29 +48,6 @@ #define bio_prio(bio) (bio)->bi_ioprio #define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) -/* - * various member access, note that bio_data should of course not be used - * on highmem page vectors - */ -#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx]) - -#define bvec_iter_page(bvec, iter) \ - (__bvec_iter_bvec((bvec), (iter))->bv_page) - -#define bvec_iter_len(bvec, iter) \ - min((iter).bi_size, \ - __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done) - -#define bvec_iter_offset(bvec, iter) \ - (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done) - -#define bvec_iter_bvec(bvec, iter) \ -((struct bio_vec) { \ - .bv_page = bvec_iter_page((bvec), (iter)), \ - .bv_len = bvec_iter_len((bvec), (iter)), \ - .bv_offset = bvec_iter_offset((bvec), (iter)), \ -}) - #define bio_iter_iovec(bio, iter) \ bvec_iter_bvec((bio)->bi_io_vec, (iter)) @@ -188,33 +166,6 @@ static inline void *bio_data(struct bio *bio) #define bio_for_each_segment_all(bvl, bio, i) \ for (i = 0, bvl = (bio)->bi_io_vec; i < (bio)->bi_vcnt; i++, bvl++) -static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter, - unsigned bytes) -{ - WARN_ONCE(bytes > iter->bi_size, - "Attempted to advance past end of bvec iter\n"); - - while (bytes) { - unsigned len = min(bytes, bvec_iter_len(bv, *iter)); - - bytes -= len; - iter->bi_size -= len; - iter->bi_bvec_done += len; - - if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) { - iter->bi_bvec_done = 0; - iter->bi_idx++; - } - } -} - -#define for_each_bvec(bvl, bio_vec, iter, start) \ - for (iter = (start); \ - (iter).bi_size && \ - ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ - bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) - - static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, unsigned bytes) { diff --git a/include/linux/bvec.h b/include/linux/bvec.h new file mode 100644 index 000000000000..29c459da277e --- /dev/null +++ b/include/linux/bvec.h @@ -0,0 +1,74 @@ +/* + * bvec iterator + * + * Copyright (C) 2001 Ming Lei + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public Licens + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- + */ +#ifndef __LINUX_BVEC_ITER_H +#define __LINUX_BVEC_ITER_H + +#include + +/* + * various member access, note that bio_data should of course not be used + * on highmem page vectors + */ +#define __bvec_iter_bvec(bvec, iter) (&(bvec)[(iter).bi_idx]) + +#define bvec_iter_page(bvec, iter) \ + (__bvec_iter_bvec((bvec), (iter))->bv_page) + +#define bvec_iter_len(bvec, iter) \ + min((iter).bi_size, \ + __bvec_iter_bvec((bvec), (iter))->bv_len - (iter).bi_bvec_done) + +#define bvec_iter_offset(bvec, iter) \ + (__bvec_iter_bvec((bvec), (iter))->bv_offset + (iter).bi_bvec_done) + +#define bvec_iter_bvec(bvec, iter) \ +((struct bio_vec) { \ + .bv_page = bvec_iter_page((bvec), (iter)), \ + .bv_len = bvec_iter_len((bvec), (iter)), \ + .bv_offset = bvec_iter_offset((bvec), (iter)), \ +}) + +static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter, + unsigned bytes) +{ + WARN_ONCE(bytes > iter->bi_size, + "Attempted to advance past end of bvec iter\n"); + + while (bytes) { + unsigned len = min(bytes, bvec_iter_len(bv, *iter)); + + bytes -= len; + iter->bi_size -= len; + iter->bi_bvec_done += len; + + if (iter->bi_bvec_done == __bvec_iter_bvec(bv, *iter)->bv_len) { + iter->bi_bvec_done = 0; + iter->bi_idx++; + } + } +} + +#define for_each_bvec(bvl, bio_vec, iter, start) \ + for (iter = (start); \ + (iter).bi_size && \ + ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ + bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) + +#endif /* __LINUX_BVEC_ITER_H */ -- cgit v1.2.3 From 0781e79eb206a62c30cc3eff75aaa889cd85b276 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 30 May 2016 21:34:30 +0800 Subject: block: move two bvec structure into bvec.h This patch moves 'struct bio_vec' and 'struct bvec_iter' into 'include/linux/bvec.h', then always include this header into 'include/linux/blk_types.h'. With this change, both 'struct bvec_iter' and bvec iterator helpers don't depend on CONFIG_BLOCK any more, then we can use bvec iterator to implement iterate_bvec(): lib/iov_iter.c. Reviewed-by: Christoph Hellwig Suggested-by: Christoph Hellwig Signed-off-by: Ming Lei Tested-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 - include/linux/blk_types.h | 22 +--------------------- include/linux/bvec.h | 23 ++++++++++++++++++++++- 3 files changed, 23 insertions(+), 23 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 8e0f677c26d2..95500fee0544 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -31,7 +31,6 @@ /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ #include -#include #define BIO_DEBUG diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 562ab8301217..b588e968dc01 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -6,6 +6,7 @@ #define __LINUX_BLK_TYPES_H #include +#include struct bio_set; struct bio; @@ -17,28 +18,7 @@ struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); typedef void (bio_destructor_t) (struct bio *); -/* - * was unsigned short, but we might as well be ready for > 64kB I/O pages - */ -struct bio_vec { - struct page *bv_page; - unsigned int bv_len; - unsigned int bv_offset; -}; - #ifdef CONFIG_BLOCK - -struct bvec_iter { - sector_t bi_sector; /* device address in 512 byte - sectors */ - unsigned int bi_size; /* residual I/O count */ - - unsigned int bi_idx; /* current index into bvl_vec */ - - unsigned int bi_bvec_done; /* number of bytes completed in - current bvec */ -}; - /* * main unit of I/O for the block layer and lower layers (ie drivers and * stacking drivers) diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 29c459da277e..096efd2c83e5 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -20,7 +20,28 @@ #ifndef __LINUX_BVEC_ITER_H #define __LINUX_BVEC_ITER_H -#include +#include +#include + +/* + * was unsigned short, but we might as well be ready for > 64kB I/O pages + */ +struct bio_vec { + struct page *bv_page; + unsigned int bv_len; + unsigned int bv_offset; +}; + +struct bvec_iter { + sector_t bi_sector; /* device address in 512 byte + sectors */ + unsigned int bi_size; /* residual I/O count */ + + unsigned int bi_idx; /* current index into bvl_vec */ + + unsigned int bi_bvec_done; /* number of bytes completed in + current bvec */ +}; /* * various member access, note that bio_data should of course not be used -- cgit v1.2.3 From 80f162ff061c9e280589bb1a1890c7fc21c932cd Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 30 May 2016 21:34:31 +0800 Subject: block: mark 1st parameter of bvec_iter_advance as const bvec_iter_advance() only writes the parameter of iterator, so the base address of bvec can be marked as const safely. Without the change, we can see compiling warning in the following patch for implementing iterate_bvec(): lib/iov_iter.c with bvec iterator. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Tested-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/bvec.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 096efd2c83e5..701b64a3b7c5 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -66,7 +66,8 @@ struct bvec_iter { .bv_offset = bvec_iter_offset((bvec), (iter)), \ }) -static inline void bvec_iter_advance(struct bio_vec *bv, struct bvec_iter *iter, +static inline void bvec_iter_advance(const struct bio_vec *bv, + struct bvec_iter *iter, unsigned bytes) { WARN_ONCE(bytes > iter->bi_size, -- cgit v1.2.3 From 30ac4607d553277b927457678e48bb2e5de1717d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 9 Jun 2016 10:03:28 -0600 Subject: block: bio: remove BIO_MAX_SECTORS No one need this macro, so remove it. The motivation is for supporting multipage bvecs, in which we only know what the max count of bvecs is supported in the bio, instead of max size or max sectors. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Tested-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 95500fee0544..b64a3d9434d4 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -42,7 +42,6 @@ #define BIO_MAX_PAGES 256 #define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_SHIFT) -#define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9) #define bio_prio(bio) (bio)->bi_ioprio #define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) -- cgit v1.2.3 From 1a89694f7899d39aa58cc6f061e97a17089ac025 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Fri, 10 Jun 2016 11:27:12 +0800 Subject: block: bio: kill BIO_MAX_SIZE No one need this macro now, so remove it. Basically only how many bvecs in one bio matters instead of how many bytes in this bio. The motivation is for supporting multipage bvecs, in which we only know what the max count of bvecs is supported in the bio. Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index b64a3d9434d4..b7e1a00810f2 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -41,7 +41,6 @@ #endif #define BIO_MAX_PAGES 256 -#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_SHIFT) #define bio_prio(bio) (bio)->bi_ioprio #define bio_set_prio(bio, prio) ((bio)->bi_ioprio = prio) -- cgit v1.2.3 From 9828c2c6c1048c61034a8b94e6376aeff6d2284f Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Tue, 28 Jun 2016 09:03:59 +0200 Subject: block: Convert fifo_time from ulong to u64 Currently rq->fifo_time is unsigned long but CFQ stores nanosecond timestamp in it which would overflow on 32-bit archs. Convert it to u64 to avoid the overflow. Since the rq->fifo_time is unioned with struct call_single_data(), this does not change the size of struct request in any way. We have to slightly fixup block/deadline-iosched.c so that comparison happens in the right types. Fixes: 9a7f38c42c2b92391d9dabaf9f51df7cfe5608e4 Signed-off-by: Jan Kara Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9746d223494c..d116d3b52c73 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -90,7 +90,7 @@ struct request { struct list_head queuelist; union { struct call_single_data csd; - unsigned long fifo_time; + u64 fifo_time; }; struct request_queue *q; -- cgit v1.2.3 From 163d4baaebe39c0e56d9c08597eab7b3ae0bf334 Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Thu, 23 Jun 2016 17:05:50 -0400 Subject: block: add QUEUE_FLAG_DAX for devices to advertise their DAX support Currently, presence of direct_access() in block_device_operations indicates support of DAX on its block device. Because block_device_operations is instantiated with 'const', this DAX capablity may not be enabled conditinally. In preparation for supporting DAX to device-mapper devices, add QUEUE_FLAG_DAX to request_queue flags to advertise their DAX support. This will allow to set the DAX capability based on how mapped device is composed. Signed-off-by: Toshi Kani Acked-by: Dan Williams Signed-off-by: Mike Snitzer Cc: Jens Axboe Cc: Ross Zwisler Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d116d3b52c73..9d84c98b5c79 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -505,6 +505,7 @@ struct request_queue { #define QUEUE_FLAG_WC 23 /* Write back caching */ #define QUEUE_FLAG_FUA 24 /* device supports FUA writes */ #define QUEUE_FLAG_FLUSH_NQ 25 /* flush not queueuable */ +#define QUEUE_FLAG_DAX 26 /* device supports DAX */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -594,6 +595,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) #define blk_queue_secdiscard(q) (blk_queue_discard(q) && \ test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags)) +#define blk_queue_dax(q) test_bit(QUEUE_FLAG_DAX, &(q)->queue_flags) #define blk_noretry_request(rq) \ ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ -- cgit v1.2.3 From 72ef799b3f14f4cb4c56ba3af6e6bdcbae6df368 Mon Sep 17 00:00:00 2001 From: Tahsin Erdogan Date: Thu, 7 Jul 2016 11:48:22 -0700 Subject: block: do not merge requests without consulting with io scheduler Before merging a bio into an existing request, io scheduler is called to get its approval first. However, the requests that come from a plug flush may get merged by block layer without consulting with io scheduler. In case of CFQ, this can cause fairness problems. For instance, if a request gets merged into a low weight cgroup's request, high weight cgroup now will depend on low weight cgroup to get scheduled. If high weigt cgroup needs that io request to complete before submitting more requests, then it will also lose its timeslice. Following script demonstrates the problem. Group g1 has a low weight, g2 and g3 have equal high weights but g2's requests are adjacent to g1's requests so they are subject to merging. Due to these merges, g2 gets poor disk time allocation. cat > cfq-merge-repro.sh << "EOF" #!/bin/bash set -e IO_ROOT=/mnt-cgroup/io mkdir -p $IO_ROOT if ! mount | grep -qw $IO_ROOT; then mount -t cgroup none -oblkio $IO_ROOT fi cd $IO_ROOT for i in g1 g2 g3; do if [ -d $i ]; then rmdir $i fi done mkdir g1 && echo 10 > g1/blkio.weight mkdir g2 && echo 495 > g2/blkio.weight mkdir g3 && echo 495 > g3/blkio.weight RUNTIME=10 (echo $BASHPID > g1/cgroup.procs && fio --readonly --name name1 --filename /dev/sdb \ --rw read --size 64k --bs 64k --time_based \ --runtime=$RUNTIME --offset=0k &> /dev/null)& (echo $BASHPID > g2/cgroup.procs && fio --readonly --name name1 --filename /dev/sdb \ --rw read --size 64k --bs 64k --time_based \ --runtime=$RUNTIME --offset=64k &> /dev/null)& (echo $BASHPID > g3/cgroup.procs && fio --readonly --name name1 --filename /dev/sdb \ --rw read --size 64k --bs 64k --time_based \ --runtime=$RUNTIME --offset=256k &> /dev/null)& sleep $((RUNTIME+1)) for i in g1 g2 g3; do echo ---- $i ---- cat $i/blkio.time done EOF # ./cfq-merge-repro.sh ---- g1 ---- 8:16 162 ---- g2 ---- 8:16 165 ---- g3 ---- 8:16 686 After applying the patch: # ./cfq-merge-repro.sh ---- g1 ---- 8:16 90 ---- g2 ---- 8:16 445 ---- g3 ---- 8:16 471 Signed-off-by: Tahsin Erdogan Signed-off-by: Jens Axboe --- include/linux/elevator.h | 11 ++++++++--- 1 file changed, 8 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 953d28647435..e7f358d2e5fc 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -16,7 +16,11 @@ typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, typedef void (elevator_merged_fn) (struct request_queue *, struct request *, int); -typedef int (elevator_allow_merge_fn) (struct request_queue *, struct request *, struct bio *); +typedef int (elevator_allow_bio_merge_fn) (struct request_queue *, + struct request *, struct bio *); + +typedef int (elevator_allow_rq_merge_fn) (struct request_queue *, + struct request *, struct request *); typedef void (elevator_bio_merged_fn) (struct request_queue *, struct request *, struct bio *); @@ -46,7 +50,8 @@ struct elevator_ops elevator_merge_fn *elevator_merge_fn; elevator_merged_fn *elevator_merged_fn; elevator_merge_req_fn *elevator_merge_req_fn; - elevator_allow_merge_fn *elevator_allow_merge_fn; + elevator_allow_bio_merge_fn *elevator_allow_bio_merge_fn; + elevator_allow_rq_merge_fn *elevator_allow_rq_merge_fn; elevator_bio_merged_fn *elevator_bio_merged_fn; elevator_dispatch_fn *elevator_dispatch_fn; @@ -157,7 +162,7 @@ extern ssize_t elv_iosched_store(struct request_queue *, const char *, size_t); extern int elevator_init(struct request_queue *, char *); extern void elevator_exit(struct elevator_queue *); extern int elevator_change(struct request_queue *, const char *); -extern bool elv_rq_merge_ok(struct request *, struct bio *); +extern bool elv_bio_merge_ok(struct request *, struct bio *); extern struct elevator_queue *elevator_alloc(struct request_queue *, struct elevator_type *); -- cgit v1.2.3 From 17007f3994cdb4643355c73f54f0adad006cf59e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 20 Jul 2016 21:40:47 -0600 Subject: block: Fix front merge check For a front merge, the maximum number of sectors of the request must be checked against the front merge BIO sector, not the current sector of the request. Signed-off-by: Damien Le Moal Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9d84c98b5c79..48f05d768a53 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -922,7 +922,8 @@ static inline unsigned int blk_max_size_offset(struct request_queue *q, (offset & (q->limits.chunk_sectors - 1)); } -static inline unsigned int blk_rq_get_max_sectors(struct request *rq) +static inline unsigned int blk_rq_get_max_sectors(struct request *rq, + sector_t offset) { struct request_queue *q = rq->q; @@ -932,7 +933,7 @@ static inline unsigned int blk_rq_get_max_sectors(struct request *rq) if (!q->limits.chunk_sectors || (req_op(rq) == REQ_OP_DISCARD)) return blk_queue_get_max_sectors(q, req_op(rq)); - return min(blk_max_size_offset(q, blk_rq_pos(rq)), + return min(blk_max_size_offset(q, offset), blk_queue_get_max_sectors(q, req_op(rq))); } -- cgit v1.2.3