From 1e2a410ff71504a64d1af2e354287ac51aeac1b0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Sep 2012 15:34:56 -0700 Subject: block: Ues bi_pool for bio_integrity_alloc() Now that bios keep track of where they were allocated from, bio_integrity_alloc_bioset() becomes redundant. Remove bio_integrity_alloc_bioset() and drop bio_set argument from the related functions and make them use bio->bi_pool. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: Martin K. Petersen Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 4b4dbdfbca89..95c493511be7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2788,7 +2788,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, __bio_clone(bio, bio_src); if (bio_integrity(bio_src) && - bio_integrity_clone(bio, bio_src, gfp_mask, bs)) + bio_integrity_clone(bio, bio_src, gfp_mask)) goto free_and_out; if (bio_ctr && bio_ctr(bio, bio_src, data)) -- cgit v1.2.3 From 4254bba17d92d53822a56ebc2a0c1eb7e2a71155 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Sep 2012 15:35:00 -0700 Subject: block: Kill bi_destructor Now that we've got generic code for freeing bios allocated from bio pools, this isn't needed anymore. This patch also makes bio_free() static, since without bi_destructor there should be no need for it to be called anywhere else. bio_free() is now only called from bio_put, so we can refactor those a bit - move some code from bio_put() to bio_free() and kill the redundant bio->bi_next = NULL. v5: Switch to BIO_KMALLOC_POOL ((void *)~0), per Boaz v6: BIO_KMALLOC_POOL now NULL, drop bio_free's EXPORT_SYMBOL v7: No #define BIO_KMALLOC_POOL anymore Signed-off-by: Kent Overstreet CC: Jens Axboe Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 95c493511be7..b776cc90a4e7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2807,7 +2807,7 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, free_and_out: if (bio) - bio_free(bio, bs); + bio_put(bio); blk_rq_unprep_clone(rq); return -ENOMEM; -- cgit v1.2.3 From bf800ef1816b4283a885e55ad38068aec9711e4d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 6 Sep 2012 15:35:02 -0700 Subject: block: Add bio_clone_bioset(), bio_clone_kmalloc() Previously, there was bio_clone() but it only allocated from the fs bio set; as a result various users were open coding it and using __bio_clone(). This changes bio_clone() to become bio_clone_bioset(), and then we add bio_clone() and bio_clone_kmalloc() as wrappers around it, making use of the functionality the last patch adedd. This will also help in a later patch changing how bio cloning works. Signed-off-by: Kent Overstreet CC: Jens Axboe CC: NeilBrown CC: Alasdair Kergon CC: Boaz Harrosh CC: Jeff Garzik Acked-by: Jeff Garzik Signed-off-by: Jens Axboe --- block/blk-core.c | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index b776cc90a4e7..82aab2815858 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2781,16 +2781,10 @@ int blk_rq_prep_clone(struct request *rq, struct request *rq_src, blk_rq_init(NULL, rq); __rq_for_each_bio(bio_src, rq_src) { - bio = bio_alloc_bioset(gfp_mask, bio_src->bi_max_vecs, bs); + bio = bio_clone_bioset(bio_src, gfp_mask, bs); if (!bio) goto free_and_out; - __bio_clone(bio, bio_src); - - if (bio_integrity(bio_src) && - bio_integrity_clone(bio, bio_src, gfp_mask)) - goto free_and_out; - if (bio_ctr && bio_ctr(bio, bio_src, data)) goto free_and_out; -- cgit v1.2.3 From e32463b2f7801d6561887c01db37b34958504635 Mon Sep 17 00:00:00 2001 From: Jaehoon Chung Date: Fri, 31 Aug 2012 17:24:13 +0900 Subject: block: remove the duplicated setting for congestion_threshold Before call the blk_queue_congestion_threshold(), the blk_queue_congestion_threshold() is already called at blk_queue_make_rquest(). Because this code is the duplicated, it has removed. Signed-off-by: Jaehoon Chung Signed-off-by: Kyungmin Park Signed-off-by: Jens Axboe --- block/blk-core.c | 2 -- 1 file changed, 2 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 82aab2815858..2d739ca10923 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -713,8 +713,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, if (elevator_init(q, NULL)) return NULL; - blk_queue_congestion_threshold(q); - /* all done, end the initial bypass */ blk_queue_bypass_end(q); return q; -- cgit v1.2.3 From e2a60da74fc8215c68509a89e9a69c66363153db Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 18 Sep 2012 12:19:25 -0400 Subject: block: Clean up special command handling logic Remove special-casing of non-rw fs style requests (discard). The nomerge flags are consolidated in blk_types.h, and rq_mergeable() and bio_mergeable() have been modified to use them. bio_is_rw() is used in place of bio_has_data() a few places. This is done to to distinguish true reads and writes from other fs type requests that carry a payload (e.g. write same). Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 13 ++++++------- 1 file changed, 6 insertions(+), 7 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 2d739ca10923..5cc29299f6ac 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1657,8 +1657,8 @@ generic_make_request_checks(struct bio *bio) goto end_io; } - if (unlikely(!(bio->bi_rw & REQ_DISCARD) && - nr_sectors > queue_max_hw_sectors(q))) { + if (likely(bio_is_rw(bio) && + nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", bdevname(bio->bi_bdev, b), bio_sectors(bio), @@ -1699,8 +1699,7 @@ generic_make_request_checks(struct bio *bio) if ((bio->bi_rw & REQ_DISCARD) && (!blk_queue_discard(q) || - ((bio->bi_rw & REQ_SECURE) && - !blk_queue_secdiscard(q)))) { + ((bio->bi_rw & REQ_SECURE) && !blk_queue_secdiscard(q)))) { err = -EOPNOTSUPP; goto end_io; } @@ -1818,7 +1817,7 @@ void submit_bio(int rw, struct bio *bio) * If it's a regular read/write or a barrier with data attached, * go through the normal accounting stuff before submission. */ - if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { + if (bio_has_data(bio)) { if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { @@ -1864,7 +1863,7 @@ EXPORT_SYMBOL(submit_bio); */ int blk_rq_check_limits(struct request_queue *q, struct request *rq) { - if (rq->cmd_flags & REQ_DISCARD) + if (!rq_mergeable(rq)) return 0; if (blk_rq_sectors(rq) > queue_max_sectors(q) || @@ -2338,7 +2337,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->buffer = bio_data(req->bio); /* update sector only for requests with clear definition of sector */ - if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) + if (req->cmd_type == REQ_TYPE_FS) req->__sector += total_bytes >> 9; /* mixed attributes always follow the first bio */ -- cgit v1.2.3 From f31dc1cd490539e2b62a126bc4dc2495b165d772 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 18 Sep 2012 12:19:26 -0400 Subject: block: Consolidate command flag and queue limit checks for merges - blk_check_merge_flags() verifies that cmd_flags / bi_rw are compatible. This function is called for both req-req and req-bio merging. - blk_rq_get_max_sectors() and blk_queue_get_max_sectors() can be used to query the maximum sector count for a given request or queue. The calls will return the right value from the queue limits given the type of command (RW, discard, write same, etc.) Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 5cc29299f6ac..33eded00c5b1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1866,8 +1866,7 @@ int blk_rq_check_limits(struct request_queue *q, struct request *rq) if (!rq_mergeable(rq)) return 0; - if (blk_rq_sectors(rq) > queue_max_sectors(q) || - blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { + if (blk_rq_sectors(rq) > blk_queue_get_max_sectors(q, rq->cmd_flags)) { printk(KERN_ERR "%s: over max size limit.\n", __func__); return -EIO; } -- cgit v1.2.3 From 4363ac7c13a9a4b763c6e8d9fdbfc2468f3b8ca4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Tue, 18 Sep 2012 12:19:27 -0400 Subject: block: Implement support for WRITE SAME The WRITE SAME command supported on some SCSI devices allows the same block to be efficiently replicated throughout a block range. Only a single logical block is transferred from the host and the storage device writes the same data to all blocks described by the I/O. This patch implements support for WRITE SAME in the block layer. The blkdev_issue_write_same() function can be used by filesystems and block drivers to replicate a buffer across a block range. This can be used to efficiently initialize software RAID devices, etc. Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 14 ++++++++++++-- 1 file changed, 12 insertions(+), 2 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 33eded00c5b1..3b080541098e 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1704,6 +1704,11 @@ generic_make_request_checks(struct bio *bio) goto end_io; } + if (bio->bi_rw & REQ_WRITE_SAME && !bdev_write_same(bio->bi_bdev)) { + err = -EOPNOTSUPP; + goto end_io; + } + /* * Various block parts want %current->io_context and lazy ioc * allocation ends up trading a lot of pain for a small amount of @@ -1809,8 +1814,6 @@ EXPORT_SYMBOL(generic_make_request); */ void submit_bio(int rw, struct bio *bio) { - int count = bio_sectors(bio); - bio->bi_rw |= rw; /* @@ -1818,6 +1821,13 @@ void submit_bio(int rw, struct bio *bio) * go through the normal accounting stuff before submission. */ if (bio_has_data(bio)) { + unsigned int count; + + if (unlikely(rw & REQ_WRITE_SAME)) + count = bdev_logical_block_size(bio->bi_bdev) >> 9; + else + count = bio_sectors(bio); + if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { -- cgit v1.2.3 From 749fefe6778e98dfefe3b8bb72a93875196ec554 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Sep 2012 14:08:52 -0700 Subject: block: lift the initial queue bypass mode on blk_register_queue() instead of blk_init_allocated_queue() b82d4b197c ("blkcg: make request_queue bypassing on allocation") made request_queues bypassed on allocation to avoid switching on and off bypass mode on a queue being initialized. Some drivers allocate and then destroy a lot of queues without fully initializing them and incurring bypass latency overhead on each of them could add upto significant overhead. Unfortunately, blk_init_allocated_queue() is never used by queues of bio-based drivers, which means that all bio-based driver queues are in bypass mode even after initialization and registration complete successfully. Due to the limited way request_queues are used by bio drivers, this problem is hidden pretty well but it shows up when blk-throttle is used in combination with a bio-based driver. Trying to configure (echoing to cgroupfs file) blk-throttle for a bio-based driver hangs indefinitely in blkg_conf_prep() waiting for bypass mode to end. This patch moves the initial blk_queue_bypass_end() call from blk_init_allocated_queue() to blk_register_queue() which is called for any userland-visible queues regardless of its type. I believe this is correct because I don't think there is any block driver which needs or wants working elevator and blk-cgroup on a queue which isn't visible to userland. If there are such users, we need a different solution. Signed-off-by: Tejun Heo Reported-by: Joseph Glanville Cc: stable@vger.kernel.org Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-core.c | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 3b080541098e..80e29c90723c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -608,8 +608,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) /* * A queue starts its life with bypass turned on to avoid * unnecessary bypass on/off overhead and nasty surprises during - * init. The initial bypass will be finished at the end of - * blk_init_allocated_queue(). + * init. The initial bypass will be finished when the queue is + * registered by blk_register_queue(). */ q->bypass_depth = 1; __set_bit(QUEUE_FLAG_BYPASS, &q->queue_flags); @@ -712,9 +712,6 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, /* init elevator */ if (elevator_init(q, NULL)) return NULL; - - /* all done, end the initial bypass */ - blk_queue_bypass_end(q); return q; } EXPORT_SYMBOL(blk_init_allocated_queue); -- cgit v1.2.3 From 60ea8226cbd5c8301f9a39edc574ddabcb8150e0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 20 Sep 2012 14:09:30 -0700 Subject: block: fix request_queue->flags initialization A queue newly allocated with blk_alloc_queue_node() has only QUEUE_FLAG_BYPASS set. For request-based drivers, blk_init_allocated_queue() is called and q->queue_flags is overwritten with QUEUE_FLAG_DEFAULT which doesn't include BYPASS even though the initial bypass is still in effect. In blk_init_allocated_queue(), or QUEUE_FLAG_DEFAULT to q->queue_flags instead of overwriting. Signed-off-by: Tejun Heo Cc: stable@vger.kernel.org Acked-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 80e29c90723c..a17869f337f7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -696,7 +696,7 @@ blk_init_allocated_queue(struct request_queue *q, request_fn_proc *rfn, q->request_fn = rfn; q->prep_rq_fn = NULL; q->unprep_rq_fn = NULL; - q->queue_flags = QUEUE_FLAG_DEFAULT; + q->queue_flags |= QUEUE_FLAG_DEFAULT; /* Override internal queue lock with supplied lock pointer */ if (lock) -- cgit v1.2.3