From 3f3299d5c0268d6cc3f47b446e8aca436e4a5651 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Nov 2012 13:42:38 +0100 Subject: block: Rename queue dead flag QUEUE_FLAG_DEAD is used to indicate that queuing new requests must stop. After this flag has been set queue draining starts. However, during the queue draining phase it is still safe to invoke the queue's request_fn, so QUEUE_FLAG_DYING is a better name for this flag. This patch has been generated by running the following command over the kernel source tree: git grep -lEw 'blk_queue_dead|QUEUE_FLAG_DEAD' | xargs sed -i.tmp -e 's/blk_queue_dead/blk_queue_dying/g' \ -e 's/QUEUE_FLAG_DEAD/QUEUE_FLAG_DYING/g'; \ sed -i.tmp -e "s/QUEUE_FLAG_DYING$(printf \\t)*5/QUEUE_FLAG_DYING$(printf \\t)5/g" \ include/linux/blkdev.h; \ sed -i.tmp -e 's/ DEAD/ DYING/g' -e 's/dead queue/a dying queue/' \ -e 's/Dead queue/A dying queue/' block/blk-core.c Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Cc: James Bottomley Cc: Mike Christie Cc: Jens Axboe Cc: Chanho Min Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1756001210d2..aba8246afe72 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -437,7 +437,7 @@ struct request_queue { #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ -#define QUEUE_FLAG_DEAD 5 /* queue being torn down */ +#define QUEUE_FLAG_DYING 5 /* queue being torn down */ #define QUEUE_FLAG_BYPASS 6 /* act as dumb FIFO queue */ #define QUEUE_FLAG_BIDI 7 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 8 /* disable merge attempts */ @@ -521,7 +521,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) -#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) +#define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ -- cgit v1.2.3 From c246e80d86736312933646896c4157daf511dadc Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 6 Dec 2012 14:32:01 +0100 Subject: block: Avoid that request_fn is invoked on a dead queue A block driver may start cleaning up resources needed by its request_fn as soon as blk_cleanup_queue() finished, so request_fn must not be invoked after draining finished. This is important when blk_run_queue() is invoked without any requests in progress. As an example, if blk_drain_queue() and scsi_run_queue() run in parallel, blk_drain_queue() may have finished all requests after scsi_run_queue() has taken a SCSI device off the starved list but before that last function has had a chance to run the queue. Signed-off-by: Bart Van Assche Cc: James Bottomley Cc: Mike Christie Cc: Chanho Min Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aba8246afe72..8bc46c250ca4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -452,6 +452,7 @@ struct request_queue { #define QUEUE_FLAG_ADD_RANDOM 16 /* Contributes to random pool */ #define QUEUE_FLAG_SECDISCARD 17 /* supports SECDISCARD */ #define QUEUE_FLAG_SAME_FORCE 18 /* force complete on same CPU */ +#define QUEUE_FLAG_DEAD 19 /* queue tear-down finished */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_STACKABLE) | \ @@ -522,6 +523,7 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) +#define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) #define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ -- cgit v1.2.3 From 24faf6f604efe18236bded4303009fc252913bf0 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 28 Nov 2012 13:46:45 +0100 Subject: block: Make blk_cleanup_queue() wait until request_fn finished Some request_fn implementations, e.g. scsi_request_fn(), unlock the queue lock internally. This may result in multiple threads executing request_fn for the same queue simultaneously. Keep track of the number of active request_fn calls and make sure that blk_cleanup_queue() waits until all active request_fn invocations have finished. A block driver may start cleaning up resources needed by its request_fn as soon as blk_cleanup_queue() finished, so blk_cleanup_queue() must wait for all outstanding request_fn invocations to finish. Signed-off-by: Bart Van Assche Reported-by: Chanho Min Cc: James Bottomley Cc: Mike Christie Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8bc46c250ca4..c9d233e727f2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -378,6 +378,12 @@ struct request_queue { unsigned int nr_sorted; unsigned int in_flight[2]; + /* + * Number of active block driver functions for which blk_drain_queue() + * must wait. Must be incremented around functions that unlock the + * queue_lock internally, e.g. scsi_request_fn(). + */ + unsigned int request_fn_active; unsigned int rq_timeout; struct timer_list timeout; -- cgit v1.2.3 From 8dd2cb7e880d2f77fba53b523c99133ad5054cfd Mon Sep 17 00:00:00 2001 From: Shaohua Li Date: Fri, 14 Dec 2012 11:15:36 +0800 Subject: block: discard granularity might not be power of 2 In MD raid case, discard granularity might not be power of 2, for example, a 4-disk raid5 has 3*chunk_size discard granularity. Correct the calculation for such cases. Reported-by: Neil Brown Signed-off-by: Shaohua Li Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c9d233e727f2..acb4f7bbbd32 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1188,13 +1188,14 @@ static inline int queue_discard_alignment(struct request_queue *q) static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) { - unsigned int alignment = (sector << 9) & (lim->discard_granularity - 1); + sector_t alignment = sector << 9; + alignment = sector_div(alignment, lim->discard_granularity); if (!lim->max_discard_sectors) return 0; - return (lim->discard_granularity + lim->discard_alignment - alignment) - & (lim->discard_granularity - 1); + alignment = lim->discard_granularity + lim->discard_alignment - alignment; + return sector_div(alignment, lim->discard_granularity); } static inline int bdev_discard_alignment(struct block_device *bdev) -- cgit v1.2.3 From 59771079c18c44e39106f0f30054025acafadb41 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Wed, 19 Dec 2012 07:18:35 -0800 Subject: blk: avoid divide-by-zero with zero discard granularity Commit 8dd2cb7e880d ("block: discard granularity might not be power of 2") changed a couple of 'binary and' operations into modulus operations. Which turned the harmless case of a zero discard_granularity into a possible divide-by-zero. The code also had a much more subtle bug: it was doing the modulus of a value in bytes using 'sector_t'. That was always conceptually wrong, but didn't actually matter back when the code assumed a power-of-two granularity: we only looked at the low bits anyway. But with potentially arbitrary sector numbers, using a 'sector_t' to express bytes is very very wrong: depending on configuration it limits the starting offset of the device to just 32 bits, and any overflow would result in a wrong value if the modulus wasn't a power-of-two. So re-write the code to not only protect against the divide-by-zero, but to do the starting sector arithmetic in sectors, and using the proper types. [ For any mathematicians out there: it also looks monumentally stupid to do the 'modulo granularity' operation *twice*, never mind having a "+ granularity" in the second modulus op. But that's the easiest way to avoid negative values or overflow, and it is how the original code was done. ] Reported-by: Ingo Molnar Reported-by: Doug Anderson Cc: Neil Brown Cc: Shaohua Li Acked-by: Jens Axboe Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 19 +++++++++++++++---- 1 file changed, 15 insertions(+), 4 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index acb4f7bbbd32..f94bc83011ed 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1188,14 +1188,25 @@ static inline int queue_discard_alignment(struct request_queue *q) static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector_t sector) { - sector_t alignment = sector << 9; - alignment = sector_div(alignment, lim->discard_granularity); + unsigned int alignment, granularity, offset; if (!lim->max_discard_sectors) return 0; - alignment = lim->discard_granularity + lim->discard_alignment - alignment; - return sector_div(alignment, lim->discard_granularity); + /* Why are these in bytes, not sectors? */ + alignment = lim->discard_alignment >> 9; + granularity = lim->discard_granularity >> 9; + if (!granularity) + return 0; + + /* Offset of the partition start in 'granularity' sectors */ + offset = sector_div(sector, granularity); + + /* And why do we do this modulus *again* in blkdev_issue_discard()? */ + offset = (granularity + alignment - offset) % granularity; + + /* Turn it back into bytes, gaah */ + return offset << 9; } static inline int bdev_discard_alignment(struct block_device *bdev) -- cgit v1.2.3 From 548bc8e1b38e48653a90f48f636f8d253504f8a2 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 9 Jan 2013 08:05:13 -0800 Subject: block: RCU free request_queue RCU free request_queue so that blkcg_gq->q can be dereferenced under RCU lock. This will be used to implement hierarchical stats. Signed-off-by: Tejun Heo Acked-by: Vivek Goyal --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f94bc83011ed..406343c43cda 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -19,6 +19,7 @@ #include #include #include +#include #include @@ -437,6 +438,7 @@ struct request_queue { /* Throttle data */ struct throtl_data *td; #endif + struct rcu_head rcu_head; }; #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ -- cgit v1.2.3 From 422765c2638924da10ff363b5eed77924911bdc7 Mon Sep 17 00:00:00 2001 From: Jianpeng Ma Date: Fri, 11 Jan 2013 14:46:09 +0100 Subject: block: Remove should_sort judgement when flush blk_plug In commit 975927b942c932,it add blk_rq_pos to sort rq when flushing. Although this commit was used for the situation which blk_plug handled multi devices on the same time like md device. I think there must be some situations like this but only single device. So remove the should_sort judgement. Because the parameter should_sort is only for this purpose,it can delete should_sort from blk_plug. CC: Shaohua Li Signed-off-by: Jianpeng Ma Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f94bc83011ed..dbe74279f3d6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -974,7 +974,6 @@ struct blk_plug { unsigned long magic; /* detect uninitialized use-cases */ struct list_head list; /* requests */ struct list_head cb_list; /* md requires an unplug callback */ - unsigned int should_sort; /* list to be sorted before flushing? */ }; #define BLK_MAX_REQUEST_COUNT 16 -- cgit v1.2.3