From e2e1a148bc45855816ae6b4692ce29d0020fa22e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Jun 2010 10:42:09 +0200 Subject: block: add sysfs knob for turning off disk entropy contributions There are two reasons for doing this: - On SSD disks, the completion times aren't as random as they are for rotational drives. So it's questionable whether they should contribute to the random pool in the first place. - Calling add_disk_randomness() has a lot of overhead. This adds /sys/block//queue/add_random that will allow you to switch off on a per-device basis. The default setting is on, so there should be no functional changes from this patch. Signed-off-by: Jens Axboe --- block/blk-core.c | 3 ++- block/blk-sysfs.c | 28 ++++++++++++++++++++++++++++ 2 files changed, 30 insertions(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index f0640d7f800f..b4131d29148c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2111,7 +2111,8 @@ static bool blk_update_bidi_request(struct request *rq, int error, blk_update_request(rq->next_rq, error, bidi_bytes)) return true; - add_disk_randomness(rq->rq_disk); + if (blk_queue_add_random(rq->q)) + add_disk_randomness(rq->rq_disk); return false; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 306759bbdf1b..58b53c354c2c 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -250,6 +250,27 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) return ret; } +static ssize_t queue_random_show(struct request_queue *q, char *page) +{ + return queue_var_show(blk_queue_add_random(q), page); +} + +static ssize_t queue_random_store(struct request_queue *q, const char *page, + size_t count) +{ + unsigned long val; + ssize_t ret = queue_var_store(&val, page, count); + + spin_lock_irq(q->queue_lock); + if (val) + queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q); + else + queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); + spin_unlock_irq(q->queue_lock); + + return ret; +} + static ssize_t queue_iostats_show(struct request_queue *q, char *page) { return queue_var_show(blk_queue_io_stat(q), page); @@ -374,6 +395,12 @@ static struct queue_sysfs_entry queue_iostats_entry = { .store = queue_iostats_store, }; +static struct queue_sysfs_entry queue_random_entry = { + .attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR }, + .show = queue_random_show, + .store = queue_random_store, +}; + static struct attribute *default_attrs[] = { &queue_requests_entry.attr, &queue_ra_entry.attr, @@ -394,6 +421,7 @@ static struct attribute *default_attrs[] = { &queue_nomerges_entry.attr, &queue_rq_affinity_entry.attr, &queue_iostats_entry.attr, + &queue_random_entry.attr, NULL, }; -- cgit v1.2.3 From 956bcb7c1a9a73c6d5db66e83f32c785d06dc8fc Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sat, 7 Aug 2010 18:13:50 +0200 Subject: block: add helpers for the trivial queue flag sysfs show/store entries The code for nonrot, random, and io stats are completely identical. Signed-off-by: Jens Axboe --- block/blk-sysfs.c | 104 +++++++++++++++++++----------------------------------- 1 file changed, 36 insertions(+), 68 deletions(-) (limited to 'block') diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 58b53c354c2c..001ab18078f5 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -180,26 +180,36 @@ static ssize_t queue_max_hw_sectors_show(struct request_queue *q, char *page) return queue_var_show(max_hw_sectors_kb, (page)); } -static ssize_t queue_nonrot_show(struct request_queue *q, char *page) -{ - return queue_var_show(!blk_queue_nonrot(q), page); -} - -static ssize_t queue_nonrot_store(struct request_queue *q, const char *page, - size_t count) -{ - unsigned long nm; - ssize_t ret = queue_var_store(&nm, page, count); - - spin_lock_irq(q->queue_lock); - if (nm) - queue_flag_clear(QUEUE_FLAG_NONROT, q); - else - queue_flag_set(QUEUE_FLAG_NONROT, q); - spin_unlock_irq(q->queue_lock); - - return ret; -} +#define QUEUE_SYSFS_BIT_FNS(name, flag, neg) \ +static ssize_t \ +queue_show_##name(struct request_queue *q, char *page) \ +{ \ + int bit; \ + bit = test_bit(QUEUE_FLAG_##flag, &q->queue_flags); \ + return queue_var_show(neg ? !bit : bit, page); \ +} \ +static ssize_t \ +queue_store_##name(struct request_queue *q, const char *page, size_t count) \ +{ \ + unsigned long val; \ + ssize_t ret; \ + ret = queue_var_store(&val, page, count); \ + if (neg) \ + val = !val; \ + \ + spin_lock_irq(q->queue_lock); \ + if (val) \ + queue_flag_set(QUEUE_FLAG_##flag, q); \ + else \ + queue_flag_clear(QUEUE_FLAG_##flag, q); \ + spin_unlock_irq(q->queue_lock); \ + return ret; \ +} + +QUEUE_SYSFS_BIT_FNS(nonrot, NONROT, 1); +QUEUE_SYSFS_BIT_FNS(random, ADD_RANDOM, 0); +QUEUE_SYSFS_BIT_FNS(iostats, IO_STAT, 0); +#undef QUEUE_SYSFS_BIT_FNS static ssize_t queue_nomerges_show(struct request_queue *q, char *page) { @@ -250,48 +260,6 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) return ret; } -static ssize_t queue_random_show(struct request_queue *q, char *page) -{ - return queue_var_show(blk_queue_add_random(q), page); -} - -static ssize_t queue_random_store(struct request_queue *q, const char *page, - size_t count) -{ - unsigned long val; - ssize_t ret = queue_var_store(&val, page, count); - - spin_lock_irq(q->queue_lock); - if (val) - queue_flag_set(QUEUE_FLAG_ADD_RANDOM, q); - else - queue_flag_clear(QUEUE_FLAG_ADD_RANDOM, q); - spin_unlock_irq(q->queue_lock); - - return ret; -} - -static ssize_t queue_iostats_show(struct request_queue *q, char *page) -{ - return queue_var_show(blk_queue_io_stat(q), page); -} - -static ssize_t queue_iostats_store(struct request_queue *q, const char *page, - size_t count) -{ - unsigned long stats; - ssize_t ret = queue_var_store(&stats, page, count); - - spin_lock_irq(q->queue_lock); - if (stats) - queue_flag_set(QUEUE_FLAG_IO_STAT, q); - else - queue_flag_clear(QUEUE_FLAG_IO_STAT, q); - spin_unlock_irq(q->queue_lock); - - return ret; -} - static struct queue_sysfs_entry queue_requests_entry = { .attr = {.name = "nr_requests", .mode = S_IRUGO | S_IWUSR }, .show = queue_requests_show, @@ -373,8 +341,8 @@ static struct queue_sysfs_entry queue_discard_zeroes_data_entry = { static struct queue_sysfs_entry queue_nonrot_entry = { .attr = {.name = "rotational", .mode = S_IRUGO | S_IWUSR }, - .show = queue_nonrot_show, - .store = queue_nonrot_store, + .show = queue_show_nonrot, + .store = queue_store_nonrot, }; static struct queue_sysfs_entry queue_nomerges_entry = { @@ -391,14 +359,14 @@ static struct queue_sysfs_entry queue_rq_affinity_entry = { static struct queue_sysfs_entry queue_iostats_entry = { .attr = {.name = "iostats", .mode = S_IRUGO | S_IWUSR }, - .show = queue_iostats_show, - .store = queue_iostats_store, + .show = queue_show_iostats, + .store = queue_store_iostats, }; static struct queue_sysfs_entry queue_random_entry = { .attr = {.name = "add_random", .mode = S_IRUGO | S_IWUSR }, - .show = queue_random_show, - .store = queue_random_store, + .show = queue_show_random, + .store = queue_store_random, }; static struct attribute *default_attrs[] = { -- cgit v1.2.3 From 33659ebbae262228eef4e0fe990f393d1f0ed941 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 7 Aug 2010 18:17:56 +0200 Subject: block: remove wrappers for request type/flags Remove all the trivial wrappers for the cmd_type and cmd_flags fields in struct requests. This allows much easier grepping for different request types instead of unwinding through macros. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 7 ++++--- block/blk-core.c | 13 +++++++------ block/blk-exec.c | 2 +- block/blk-merge.c | 4 ++-- block/blk.h | 6 ++++-- block/cfq-iosched.c | 19 +++++++++++-------- block/elevator.c | 16 ++++++++++------ 7 files changed, 39 insertions(+), 28 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 0d710c9d403b..74e404393172 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -79,7 +79,7 @@ unsigned blk_ordered_req_seq(struct request *rq) * * http://thread.gmane.org/gmane.linux.kernel/537473 */ - if (!blk_fs_request(rq)) + if (rq->cmd_type != REQ_TYPE_FS) return QUEUE_ORDSEQ_DRAIN; if ((rq->cmd_flags & REQ_ORDERED_COLOR) == @@ -236,7 +236,8 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) bool blk_do_ordered(struct request_queue *q, struct request **rqp) { struct request *rq = *rqp; - const int is_barrier = blk_fs_request(rq) && blk_barrier_rq(rq); + const int is_barrier = rq->cmd_type == REQ_TYPE_FS && + (rq->cmd_flags & REQ_HARDBARRIER); if (!q->ordseq) { if (!is_barrier) @@ -261,7 +262,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) */ /* Special requests are not subject to ordering rules. */ - if (!blk_fs_request(rq) && + if (rq->cmd_type != REQ_TYPE_FS && rq != &q->pre_flush_rq && rq != &q->post_flush_rq) return true; diff --git a/block/blk-core.c b/block/blk-core.c index b4131d29148c..dca43a31e725 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -184,7 +184,7 @@ void blk_dump_rq_flags(struct request *rq, char *msg) printk(KERN_INFO " bio %p, biotail %p, buffer %p, len %u\n", rq->bio, rq->biotail, rq->buffer, blk_rq_bytes(rq)); - if (blk_pc_request(rq)) { + if (rq->cmd_type == REQ_TYPE_BLOCK_PC) { printk(KERN_INFO " cdb: "); for (bit = 0; bit < BLK_MAX_CDB; bit++) printk("%02x ", rq->cmd[bit]); @@ -1796,7 +1796,7 @@ struct request *blk_peek_request(struct request_queue *q) * sees this request (possibly after * requeueing). Notify IO scheduler. */ - if (blk_sorted_rq(rq)) + if (rq->cmd_flags & REQ_SORTED) elv_activate_rq(q, rq); /* @@ -1984,10 +1984,11 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) * TODO: tj: This is too subtle. It would be better to let * low level drivers do what they see fit. */ - if (blk_fs_request(req)) + if (req->cmd_type == REQ_TYPE_FS) req->errors = 0; - if (error && (blk_fs_request(req) && !(req->cmd_flags & REQ_QUIET))) { + if (error && req->cmd_type == REQ_TYPE_FS && + !(req->cmd_flags & REQ_QUIET)) { printk(KERN_ERR "end_request: I/O error, dev %s, sector %llu\n", req->rq_disk ? req->rq_disk->disk_name : "?", (unsigned long long)blk_rq_pos(req)); @@ -2074,7 +2075,7 @@ bool blk_update_request(struct request *req, int error, unsigned int nr_bytes) req->buffer = bio_data(req->bio); /* update sector only for requests with clear definition of sector */ - if (blk_fs_request(req) || blk_discard_rq(req)) + if (req->cmd_type == REQ_TYPE_FS || (req->cmd_flags & REQ_DISCARD)) req->__sector += total_bytes >> 9; /* mixed attributes always follow the first bio */ @@ -2127,7 +2128,7 @@ static void blk_finish_request(struct request *req, int error) BUG_ON(blk_queued_rq(req)); - if (unlikely(laptop_mode) && blk_fs_request(req)) + if (unlikely(laptop_mode) && req->cmd_type == REQ_TYPE_FS) laptop_io_completion(&req->q->backing_dev_info); blk_delete_timer(req); diff --git a/block/blk-exec.c b/block/blk-exec.c index 49557e91f0da..e1672f14840e 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -57,7 +57,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, __elv_add_request(q, rq, where, 1); __generic_unplug_device(q); /* the queue is stopped so it won't be plugged+unplugged */ - if (blk_pm_resume_request(rq)) + if (rq->cmd_type == REQ_TYPE_PM_RESUME) q->request_fn(q); spin_unlock_irq(q->queue_lock); } diff --git a/block/blk-merge.c b/block/blk-merge.c index 5e7dc9973458..87e4fb7d0e98 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -226,7 +226,7 @@ int ll_back_merge_fn(struct request_queue *q, struct request *req, { unsigned short max_sectors; - if (unlikely(blk_pc_request(req))) + if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) max_sectors = queue_max_hw_sectors(q); else max_sectors = queue_max_sectors(q); @@ -250,7 +250,7 @@ int ll_front_merge_fn(struct request_queue *q, struct request *req, { unsigned short max_sectors; - if (unlikely(blk_pc_request(req))) + if (unlikely(req->cmd_type == REQ_TYPE_BLOCK_PC)) max_sectors = queue_max_hw_sectors(q); else max_sectors = queue_max_sectors(q); diff --git a/block/blk.h b/block/blk.h index 5ee3d7e72feb..6e7dc87141e4 100644 --- a/block/blk.h +++ b/block/blk.h @@ -161,8 +161,10 @@ static inline int blk_cpu_to_group(int cpu) */ static inline int blk_do_io_stat(struct request *rq) { - return rq->rq_disk && blk_rq_io_stat(rq) && - (blk_fs_request(rq) || blk_discard_rq(rq)); + return rq->rq_disk && + (rq->cmd_flags & REQ_IO_STAT) && + (rq->cmd_type == REQ_TYPE_FS || + (rq->cmd_flags & REQ_DISCARD)); } #endif diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 7982b830db58..d4edeb8fceb8 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -646,9 +646,10 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, return rq1; else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) return rq2; - if (rq_is_meta(rq1) && !rq_is_meta(rq2)) + if ((rq1->cmd_flags & REQ_RW_META) && !(rq2->cmd_flags & REQ_RW_META)) return rq1; - else if (rq_is_meta(rq2) && !rq_is_meta(rq1)) + else if ((rq2->cmd_flags & REQ_RW_META) && + !(rq1->cmd_flags & REQ_RW_META)) return rq2; s1 = blk_rq_pos(rq1); @@ -1484,7 +1485,7 @@ static void cfq_remove_request(struct request *rq) cfqq->cfqd->rq_queued--; cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); - if (rq_is_meta(rq)) { + if (rq->cmd_flags & REQ_RW_META) { WARN_ON(!cfqq->meta_pending); cfqq->meta_pending--; } @@ -3176,7 +3177,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, * So both queues are sync. Let the new request get disk time if * it's a metadata request and the current queue is doing regular IO. */ - if (rq_is_meta(rq) && !cfqq->meta_pending) + if ((rq->cmd_flags & REQ_RW_META) && !cfqq->meta_pending) return true; /* @@ -3230,7 +3231,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic = RQ_CIC(rq); cfqd->rq_queued++; - if (rq_is_meta(rq)) + if (rq->cmd_flags & REQ_RW_META) cfqq->meta_pending++; cfq_update_io_thinktime(cfqd, cic); @@ -3365,7 +3366,8 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) unsigned long now; now = jiffies; - cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", !!rq_noidle(rq)); + cfq_log_cfqq(cfqd, cfqq, "complete rqnoidle %d", + !!(rq->cmd_flags & REQ_NOIDLE)); cfq_update_hw_tag(cfqd); @@ -3419,11 +3421,12 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_slice_expired(cfqd, 1); else if (sync && cfqq_empty && !cfq_close_cooperator(cfqd, cfqq)) { - cfqd->noidle_tree_requires_idle |= !rq_noidle(rq); + cfqd->noidle_tree_requires_idle |= + !(rq->cmd_flags & REQ_NOIDLE); /* * Idling is enabled for SYNC_WORKLOAD. * SYNC_NOIDLE_WORKLOAD idles at the end of the tree - * only if we processed at least one !rq_noidle request + * only if we processed at least one !REQ_NOIDLE request */ if (cfqd->serving_type == SYNC_WORKLOAD || cfqd->noidle_tree_requires_idle diff --git a/block/elevator.c b/block/elevator.c index 923a9139106c..aa99b59c03d6 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -428,7 +428,8 @@ void elv_dispatch_sort(struct request_queue *q, struct request *rq) list_for_each_prev(entry, &q->queue_head) { struct request *pos = list_entry_rq(entry); - if (blk_discard_rq(rq) != blk_discard_rq(pos)) + if ((rq->cmd_flags & REQ_DISCARD) != + (pos->cmd_flags & REQ_DISCARD)) break; if (rq_data_dir(rq) != rq_data_dir(pos)) break; @@ -558,7 +559,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) */ if (blk_account_rq(rq)) { q->in_flight[rq_is_sync(rq)]--; - if (blk_sorted_rq(rq)) + if (rq->cmd_flags & REQ_SORTED) elv_deactivate_rq(q, rq); } @@ -644,7 +645,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) break; case ELEVATOR_INSERT_SORT: - BUG_ON(!blk_fs_request(rq) && !blk_discard_rq(rq)); + BUG_ON(rq->cmd_type != REQ_TYPE_FS && + !(rq->cmd_flags & REQ_DISCARD)); rq->cmd_flags |= REQ_SORTED; q->nr_sorted++; if (rq_mergeable(rq)) { @@ -716,7 +718,7 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where, /* * toggle ordered color */ - if (blk_barrier_rq(rq)) + if (rq->cmd_flags & REQ_HARDBARRIER) q->ordcolor ^= 1; /* @@ -729,7 +731,8 @@ void __elv_add_request(struct request_queue *q, struct request *rq, int where, * this request is scheduling boundary, update * end_sector */ - if (blk_fs_request(rq) || blk_discard_rq(rq)) { + if (rq->cmd_type == REQ_TYPE_FS || + (rq->cmd_flags & REQ_DISCARD)) { q->end_sector = rq_end_sector(rq); q->boundary_rq = rq; } @@ -843,7 +846,8 @@ void elv_completed_request(struct request_queue *q, struct request *rq) */ if (blk_account_rq(rq)) { q->in_flight[rq_is_sync(rq)]--; - if (blk_sorted_rq(rq) && e->ops->elevator_completed_req_fn) + if ((rq->cmd_flags & REQ_SORTED) && + e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); } -- cgit v1.2.3 From 7b6d91daee5cac6402186ff224c3af39d79f4a0e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 7 Aug 2010 18:20:39 +0200 Subject: block: unify flags for struct bio and struct request Remove the current bio flags and reuse the request flags for the bio, too. This allows to more easily trace the type of I/O from the filesystem down to the block driver. There were two flags in the bio that were missing in the requests: BIO_RW_UNPLUG and BIO_RW_AHEAD. Also I've renamed two request flags that had a superflous RW in them. Note that the flags are in bio.h despite having the REQ_ name - as blkdev.h includes bio.h that is the only way to go for now. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 2 +- block/blk-core.c | 37 ++++++++++--------------------------- block/blk-map.c | 2 +- block/blk-merge.c | 2 +- block/cfq-iosched.c | 14 +++++++------- block/elevator.c | 3 +-- 6 files changed, 21 insertions(+), 39 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 74e404393172..7c6f4a714687 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -203,7 +203,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) /* initialize proxy request and queue it */ blk_rq_init(q, rq); if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) - rq->cmd_flags |= REQ_RW; + rq->cmd_flags |= REQ_WRITE; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; init_request_from_bio(rq, q->orig_bar_rq->bio); diff --git a/block/blk-core.c b/block/blk-core.c index dca43a31e725..66c3cfe94d0a 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1140,25 +1140,9 @@ void init_request_from_bio(struct request *req, struct bio *bio) req->cpu = bio->bi_comp_cpu; req->cmd_type = REQ_TYPE_FS; - /* - * Inherit FAILFAST from bio (for read-ahead, and explicit - * FAILFAST). FAILFAST flags are identical for req and bio. - */ - if (bio_rw_flagged(bio, BIO_RW_AHEAD)) + req->cmd_flags |= bio->bi_rw & REQ_COMMON_MASK; + if (bio->bi_rw & REQ_RAHEAD) req->cmd_flags |= REQ_FAILFAST_MASK; - else - req->cmd_flags |= bio->bi_rw & REQ_FAILFAST_MASK; - - if (bio_rw_flagged(bio, BIO_RW_DISCARD)) - req->cmd_flags |= REQ_DISCARD; - if (bio_rw_flagged(bio, BIO_RW_BARRIER)) - req->cmd_flags |= REQ_HARDBARRIER; - if (bio_rw_flagged(bio, BIO_RW_SYNCIO)) - req->cmd_flags |= REQ_RW_SYNC; - if (bio_rw_flagged(bio, BIO_RW_META)) - req->cmd_flags |= REQ_RW_META; - if (bio_rw_flagged(bio, BIO_RW_NOIDLE)) - req->cmd_flags |= REQ_NOIDLE; req->errors = 0; req->__sector = bio->bi_sector; @@ -1181,12 +1165,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) int el_ret; unsigned int bytes = bio->bi_size; const unsigned short prio = bio_prio(bio); - const bool sync = bio_rw_flagged(bio, BIO_RW_SYNCIO); - const bool unplug = bio_rw_flagged(bio, BIO_RW_UNPLUG); + const bool sync = (bio->bi_rw & REQ_SYNC); + const bool unplug = (bio->bi_rw & REQ_UNPLUG); const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; int rw_flags; - if (bio_rw_flagged(bio, BIO_RW_BARRIER) && + if ((bio->bi_rw & REQ_HARDBARRIER) && (q->next_ordered == QUEUE_ORDERED_NONE)) { bio_endio(bio, -EOPNOTSUPP); return 0; @@ -1200,7 +1184,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (unlikely(bio_rw_flagged(bio, BIO_RW_BARRIER)) || elv_queue_empty(q)) + if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1275,7 +1259,7 @@ get_rq: */ rw_flags = bio_data_dir(bio); if (sync) - rw_flags |= REQ_RW_SYNC; + rw_flags |= REQ_SYNC; /* * Grab a free request. This is might sleep but can not fail. @@ -1464,7 +1448,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; } - if (unlikely(!bio_rw_flagged(bio, BIO_RW_DISCARD) && + if (unlikely(!(bio->bi_rw & REQ_DISCARD) && nr_sectors > queue_max_hw_sectors(q))) { printk(KERN_ERR "bio too big device %s (%u > %u)\n", bdevname(bio->bi_bdev, b), @@ -1497,8 +1481,7 @@ static inline void __generic_make_request(struct bio *bio) if (bio_check_eod(bio, nr_sectors)) goto end_io; - if (bio_rw_flagged(bio, BIO_RW_DISCARD) && - !blk_queue_discard(q)) { + if ((bio->bi_rw & REQ_DISCARD) && !blk_queue_discard(q)) { err = -EOPNOTSUPP; goto end_io; } @@ -2365,7 +2348,7 @@ void blk_rq_bio_prep(struct request_queue *q, struct request *rq, struct bio *bio) { /* Bit 0 (R/W) is identical in rq->cmd_flags and bio->bi_rw */ - rq->cmd_flags |= bio->bi_rw & REQ_RW; + rq->cmd_flags |= bio->bi_rw & REQ_WRITE; if (bio_has_data(bio)) { rq->nr_phys_segments = bio_phys_segments(q, bio); diff --git a/block/blk-map.c b/block/blk-map.c index 9083cf0180cc..c65d7593f7f1 100644 --- a/block/blk-map.c +++ b/block/blk-map.c @@ -307,7 +307,7 @@ int blk_rq_map_kern(struct request_queue *q, struct request *rq, void *kbuf, return PTR_ERR(bio); if (rq_data_dir(rq) == WRITE) - bio->bi_rw |= (1 << BIO_RW); + bio->bi_rw |= (1 << REQ_WRITE); if (do_copy) rq->cmd_flags |= REQ_COPY_USER; diff --git a/block/blk-merge.c b/block/blk-merge.c index 87e4fb7d0e98..4852475521ea 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -180,7 +180,7 @@ new_segment: } if (q->dma_drain_size && q->dma_drain_needed(rq)) { - if (rq->cmd_flags & REQ_RW) + if (rq->cmd_flags & REQ_WRITE) memset(q->dma_drain_buffer, 0, q->dma_drain_size); sg->page_link &= ~0x02; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index d4edeb8fceb8..eb4086f7dfef 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -458,7 +458,7 @@ static inline struct cfq_data *cic_to_cfqd(struct cfq_io_context *cic) */ static inline bool cfq_bio_sync(struct bio *bio) { - return bio_data_dir(bio) == READ || bio_rw_flagged(bio, BIO_RW_SYNCIO); + return bio_data_dir(bio) == READ || (bio->bi_rw & REQ_SYNC); } /* @@ -646,10 +646,10 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, return rq1; else if (rq_is_sync(rq2) && !rq_is_sync(rq1)) return rq2; - if ((rq1->cmd_flags & REQ_RW_META) && !(rq2->cmd_flags & REQ_RW_META)) + if ((rq1->cmd_flags & REQ_META) && !(rq2->cmd_flags & REQ_META)) return rq1; - else if ((rq2->cmd_flags & REQ_RW_META) && - !(rq1->cmd_flags & REQ_RW_META)) + else if ((rq2->cmd_flags & REQ_META) && + !(rq1->cmd_flags & REQ_META)) return rq2; s1 = blk_rq_pos(rq1); @@ -1485,7 +1485,7 @@ static void cfq_remove_request(struct request *rq) cfqq->cfqd->rq_queued--; cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); - if (rq->cmd_flags & REQ_RW_META) { + if (rq->cmd_flags & REQ_META) { WARN_ON(!cfqq->meta_pending); cfqq->meta_pending--; } @@ -3177,7 +3177,7 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, * So both queues are sync. Let the new request get disk time if * it's a metadata request and the current queue is doing regular IO. */ - if ((rq->cmd_flags & REQ_RW_META) && !cfqq->meta_pending) + if ((rq->cmd_flags & REQ_META) && !cfqq->meta_pending) return true; /* @@ -3231,7 +3231,7 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic = RQ_CIC(rq); cfqd->rq_queued++; - if (rq->cmd_flags & REQ_RW_META) + if (rq->cmd_flags & REQ_META) cfqq->meta_pending++; cfq_update_io_thinktime(cfqd, cic); diff --git a/block/elevator.c b/block/elevator.c index aa99b59c03d6..816a7c8d6394 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -79,8 +79,7 @@ int elv_rq_merge_ok(struct request *rq, struct bio *bio) /* * Don't merge file system requests and discard requests */ - if (bio_rw_flagged(bio, BIO_RW_DISCARD) != - bio_rw_flagged(rq->bio, BIO_RW_DISCARD)) + if ((bio->bi_rw & REQ_DISCARD) != (rq->bio->bi_rw & REQ_DISCARD)) return 0; /* -- cgit v1.2.3 From 66ac0280197981f88774e74b60c8e5f9f07c1dba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jun 2010 16:59:42 +0200 Subject: block: don't allocate a payload for discard request Allocating a fixed payload for discard requests always was a horrible hack, and it's not coming to byte us when adding support for discard in DM/MD. So change the code to leave the allocation of a payload to the lowlevel driver. Unfortunately that means we'll need another hack, which allows us to update the various block layer length fields indicating that we have a payload. Instead of hiding this in sd.c, which we already partially do for UNMAP support add a documented helper in the core block layer for it. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 32 ++++++++++++++++++++++++++++++++ block/blk-lib.c | 33 ++++++--------------------------- 2 files changed, 38 insertions(+), 27 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 66c3cfe94d0a..3531d8e1da04 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1135,6 +1135,38 @@ void blk_put_request(struct request *req) } EXPORT_SYMBOL(blk_put_request); +/** + * blk_add_request_payload - add a payload to a request + * @rq: request to update + * @page: page backing the payload + * @len: length of the payload. + * + * This allows to later add a payload to an already submitted request by + * a block driver. The driver needs to take care of freeing the payload + * itself. + * + * Note that this is a quite horrible hack and nothing but handling of + * discard requests should ever use it. + */ +void blk_add_request_payload(struct request *rq, struct page *page, + unsigned int len) +{ + struct bio *bio = rq->bio; + + bio->bi_io_vec->bv_page = page; + bio->bi_io_vec->bv_offset = 0; + bio->bi_io_vec->bv_len = len; + + bio->bi_size = len; + bio->bi_vcnt = 1; + bio->bi_phys_segments = 1; + + rq->__data_len = rq->resid_len = len; + rq->nr_phys_segments = 1; + rq->buffer = bio_data(bio); +} +EXPORT_SYMBOL_GPL(blk_add_request_payload); + void init_request_from_bio(struct request *req, struct bio *bio) { req->cpu = bio->bi_comp_cpu; diff --git a/block/blk-lib.c b/block/blk-lib.c index d0216b9f22d4..e16185b0d8e1 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -19,7 +19,6 @@ static void blkdev_discard_end_io(struct bio *bio, int err) if (bio->bi_private) complete(bio->bi_private); - __free_page(bio_page(bio)); bio_put(bio); } @@ -43,7 +42,6 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, int type = flags & BLKDEV_IFL_BARRIER ? DISCARD_BARRIER : DISCARD_NOBARRIER; struct bio *bio; - struct page *page; int ret = 0; if (!q) @@ -53,35 +51,21 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, return -EOPNOTSUPP; while (nr_sects && !ret) { - unsigned int sector_size = q->limits.logical_block_size; unsigned int max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); bio = bio_alloc(gfp_mask, 1); - if (!bio) - goto out; + if (!bio) { + ret = -ENOMEM; + break; + } + bio->bi_sector = sector; bio->bi_end_io = blkdev_discard_end_io; bio->bi_bdev = bdev; if (flags & BLKDEV_IFL_WAIT) bio->bi_private = &wait; - /* - * Add a zeroed one-sector payload as that's what - * our current implementations need. If we'll ever need - * more the interface will need revisiting. - */ - page = alloc_page(gfp_mask | __GFP_ZERO); - if (!page) - goto out_free_bio; - if (bio_add_pc_page(q, bio, page, sector_size, 0) < sector_size) - goto out_free_page; - - /* - * And override the bio size - the way discard works we - * touch many more blocks on disk than the actual payload - * length. - */ if (nr_sects > max_discard_sectors) { bio->bi_size = max_discard_sectors << 9; nr_sects -= max_discard_sectors; @@ -103,13 +87,8 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, ret = -EIO; bio_put(bio); } + return ret; -out_free_page: - __free_page(page); -out_free_bio: - bio_put(bio); -out: - return -ENOMEM; } EXPORT_SYMBOL(blkdev_issue_discard); -- cgit v1.2.3 From 2c8919dee659928d66cc13333d4e7a5bdd2206d5 Mon Sep 17 00:00:00 2001 From: Andi Kleen Date: Mon, 21 Jun 2010 11:02:47 +0200 Subject: gcc-4.6: block: fix unused but set variables in blk-merge Just some dead code. Signed-off-by: Andi Kleen Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- block/blk-merge.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-merge.c b/block/blk-merge.c index 4852475521ea..3b0cd4249671 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -12,7 +12,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, struct bio *bio) { - unsigned int phys_size; struct bio_vec *bv, *bvprv = NULL; int cluster, i, high, highprv = 1; unsigned int seg_size, nr_phys_segs; @@ -24,7 +23,7 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, fbio = bio; cluster = test_bit(QUEUE_FLAG_CLUSTER, &q->queue_flags); seg_size = 0; - phys_size = nr_phys_segs = 0; + nr_phys_segs = 0; for_each_bio(bio) { bio_for_each_segment(bv, bio, i) { /* -- cgit v1.2.3 From 3ffb52e73b47d6ad86b645942ff49035efdbcd31 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 29 Jun 2010 13:33:38 +0200 Subject: block: fixup missing conversion from BIO_RW_DISCARD to REQ_DISCARD Didn't cause a merge conflict, so fixed this one up manually post merge. Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 3531d8e1da04..3c3789492c10 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1598,7 +1598,7 @@ void submit_bio(int rw, struct bio *bio) * If it's a regular read/write or a barrier with data attached, * go through the normal accounting stuff before submission. */ - if (bio_has_data(bio) && !(rw & (1 << BIO_RW_DISCARD))) { + if (bio_has_data(bio) && !(rw & REQ_DISCARD)) { if (rw & WRITE) { count_vm_events(PGPGOUT, count); } else { -- cgit v1.2.3 From 28018c242a4ec7017bbbf81d2d3952f820a27118 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 1 Jul 2010 19:49:17 +0900 Subject: block: implement an unprep function corresponding directly to prep Reviewed-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-core.c | 25 +++++++++++++++++++++++++ block/blk-settings.c | 17 +++++++++++++++++ 2 files changed, 42 insertions(+) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 3c3789492c10..5ab3ac22930c 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -608,6 +608,7 @@ blk_init_allocated_queue_node(struct request_queue *q, request_fn_proc *rfn, q->request_fn = rfn; q->prep_rq_fn = NULL; + q->unprep_rq_fn = NULL; q->unplug_fn = generic_unplug_device; q->queue_flags = QUEUE_FLAG_DEFAULT; q->queue_lock = lock; @@ -2133,6 +2134,26 @@ static bool blk_update_bidi_request(struct request *rq, int error, return false; } +/** + * blk_unprep_request - unprepare a request + * @req: the request + * + * This function makes a request ready for complete resubmission (or + * completion). It happens only after all error handling is complete, + * so represents the appropriate moment to deallocate any resources + * that were allocated to the request in the prep_rq_fn. The queue + * lock is held when calling this. + */ +void blk_unprep_request(struct request *req) +{ + struct request_queue *q = req->q; + + req->cmd_flags &= ~REQ_DONTPREP; + if (q->unprep_rq_fn) + q->unprep_rq_fn(q, req); +} +EXPORT_SYMBOL_GPL(blk_unprep_request); + /* * queue lock must be held */ @@ -2148,6 +2169,10 @@ static void blk_finish_request(struct request *req, int error) blk_delete_timer(req); + if (req->cmd_flags & REQ_DONTPREP) + blk_unprep_request(req); + + blk_account_io_done(req); if (req->end_io) diff --git a/block/blk-settings.c b/block/blk-settings.c index f5ed5a1187ba..a234f4bf1d6f 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -36,6 +36,23 @@ void blk_queue_prep_rq(struct request_queue *q, prep_rq_fn *pfn) } EXPORT_SYMBOL(blk_queue_prep_rq); +/** + * blk_queue_unprep_rq - set an unprepare_request function for queue + * @q: queue + * @ufn: unprepare_request function + * + * It's possible for a queue to register an unprepare_request callback + * which is invoked before the request is finally completed. The goal + * of the function is to deallocate any data that was allocated in the + * prepare_request callback. + * + */ +void blk_queue_unprep_rq(struct request_queue *q, unprep_rq_fn *ufn) +{ + q->unprep_rq_fn = ufn; +} +EXPORT_SYMBOL(blk_queue_unprep_rq); + /** * blk_queue_merge_bvec - set a merge_bvec function for queue * @q: queue -- cgit v1.2.3 From 8749534fe6826596b71bc409c872b047a8e2755b Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 3 Jul 2010 17:45:32 +0900 Subject: block: introduce REQ_FLUSH flag SCSI-ml needs a way to mark a request as flush request in q->prepare_flush_fn because it needs to identify them later (e.g. in q->request_fn or prep_rq_fn). queue_flush sets REQ_HARDBARRIER in rq->cmd_flags however the block layer also sends normal REQ_TYPE_FS requests with REQ_HARDBARRIER. So SCSI-ml can't use REQ_HARDBARRIER to identify flush requests. We could change the block layer to clear REQ_HARDBARRIER bit before sending non flush requests to the lower layers. However, intorudcing the new flag looks cleaner (surely easier). Signed-off-by: FUJITA Tomonori Cc: James Bottomley Cc: David S. Miller Cc: Rusty Russell Cc: Alasdair G Kergon Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 7c6f4a714687..a3482425c507 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -143,7 +143,7 @@ static void queue_flush(struct request_queue *q, unsigned which) } blk_rq_init(q, rq); - rq->cmd_flags = REQ_HARDBARRIER; + rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; rq->rq_disk = q->bar_rq.rq_disk; rq->end_io = end_io; q->prepare_flush_fn(q, rq); -- cgit v1.2.3 From b6a903151d05e3912ab66b186f74c61851efb88c Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 3 Jul 2010 17:45:33 +0900 Subject: block: permit PREFLUSH and POSTFLUSH without prepare_flush_fn This is preparation for removing q->prepare_flush_fn. Temporarily, blk_queue_ordered() permits QUEUE_ORDERED_DO_PREFLUSH and QUEUE_ORDERED_DO_POSTFLUSH without prepare_flush_fn. Signed-off-by: FUJITA Tomonori Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 9 ++------- 1 file changed, 2 insertions(+), 7 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index a3482425c507..7ce0a32a21fd 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -25,12 +25,6 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered, prepare_flush_fn *prepare_flush_fn) { - if (!prepare_flush_fn && (ordered & (QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH))) { - printk(KERN_ERR "%s: prepare_flush_fn required\n", __func__); - return -EINVAL; - } - if (ordered != QUEUE_ORDERED_NONE && ordered != QUEUE_ORDERED_DRAIN && ordered != QUEUE_ORDERED_DRAIN_FLUSH && @@ -146,7 +140,8 @@ static void queue_flush(struct request_queue *q, unsigned which) rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; rq->rq_disk = q->bar_rq.rq_disk; rq->end_io = end_io; - q->prepare_flush_fn(q, rq); + if (q->prepare_flush_fn) + q->prepare_flush_fn(q, rq); elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -- cgit v1.2.3 From 00fff26539bfe3fad21c164fc4002d9ede056fb0 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 3 Jul 2010 17:45:40 +0900 Subject: block: remove q->prepare_flush_fn completely This removes q->prepare_flush_fn completely (changes the blk_queue_ordered API). Signed-off-by: FUJITA Tomonori Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 7 +------ 1 file changed, 1 insertion(+), 6 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 7ce0a32a21fd..eefbde835308 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -13,7 +13,6 @@ * blk_queue_ordered - does this queue support ordered writes * @q: the request queue * @ordered: one of QUEUE_ORDERED_* - * @prepare_flush_fn: rq setup helper for cache flush ordered writes * * Description: * For journalled file systems, doing ordered writes on a commit @@ -22,8 +21,7 @@ * feature should call this function and indicate so. * **/ -int blk_queue_ordered(struct request_queue *q, unsigned ordered, - prepare_flush_fn *prepare_flush_fn) +int blk_queue_ordered(struct request_queue *q, unsigned ordered) { if (ordered != QUEUE_ORDERED_NONE && ordered != QUEUE_ORDERED_DRAIN && @@ -38,7 +36,6 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered, q->ordered = ordered; q->next_ordered = ordered; - q->prepare_flush_fn = prepare_flush_fn; return 0; } @@ -140,8 +137,6 @@ static void queue_flush(struct request_queue *q, unsigned which) rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; rq->rq_disk = q->bar_rq.rq_disk; rq->end_io = end_io; - if (q->prepare_flush_fn) - q->prepare_flush_fn(q, rq); elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -- cgit v1.2.3 From 8a6cfeb6deca3a8fefd639d898b0d163c0b5d368 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 8 Jul 2010 10:18:46 +0200 Subject: block: push down BKL into .locked_ioctl As a preparation for the removal of the big kernel lock in the block layer, this removes the BKL from the common ioctl handling code, moving it into every single driver still using it. Signed-off-by: Arnd Bergmann Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/ioctl.c | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'block') diff --git a/block/ioctl.c b/block/ioctl.c index e8eb679f2f9b..1cfa8d449d90 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -163,18 +163,10 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { struct gendisk *disk = bdev->bd_disk; - int ret; if (disk->fops->ioctl) return disk->fops->ioctl(bdev, mode, cmd, arg); - if (disk->fops->locked_ioctl) { - lock_kernel(); - ret = disk->fops->locked_ioctl(bdev, mode, cmd, arg); - unlock_kernel(); - return ret; - } - return -ENOTTY; } /* @@ -185,8 +177,7 @@ int __blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, EXPORT_SYMBOL_GPL(__blkdev_driver_ioctl); /* - * always keep this in sync with compat_blkdev_ioctl() and - * compat_blkdev_locked_ioctl() + * always keep this in sync with compat_blkdev_ioctl() */ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) -- cgit v1.2.3 From 62c2a7d969f30163f733c81158254b3095b23e72 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Jul 2010 16:51:26 +0200 Subject: block: push BKL into blktrace ioctls The blktrace driver currently needs the BKL, but we should not need to take that in the block layer, so just push it down into the driver itself. It is quite likely that the BKL is not actually required in blktrace code and could be removed in a follow-on patch. Signed-off-by: Arnd Bergmann Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/compat_ioctl.c | 56 ---------------------------------------------------- block/ioctl.c | 2 -- 2 files changed, 58 deletions(-) (limited to 'block') diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index f26051f44681..d53085637731 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -535,56 +535,6 @@ out: return err; } -struct compat_blk_user_trace_setup { - char name[32]; - u16 act_mask; - u32 buf_size; - u32 buf_nr; - compat_u64 start_lba; - compat_u64 end_lba; - u32 pid; -}; -#define BLKTRACESETUP32 _IOWR(0x12, 115, struct compat_blk_user_trace_setup) - -static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) -{ - struct blk_user_trace_setup buts; - struct compat_blk_user_trace_setup cbuts; - struct request_queue *q; - char b[BDEVNAME_SIZE]; - int ret; - - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - - if (copy_from_user(&cbuts, arg, sizeof(cbuts))) - return -EFAULT; - - bdevname(bdev, b); - - buts = (struct blk_user_trace_setup) { - .act_mask = cbuts.act_mask, - .buf_size = cbuts.buf_size, - .buf_nr = cbuts.buf_nr, - .start_lba = cbuts.start_lba, - .end_lba = cbuts.end_lba, - .pid = cbuts.pid, - }; - memcpy(&buts.name, &cbuts.name, 32); - - mutex_lock(&bdev->bd_mutex); - ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts); - mutex_unlock(&bdev->bd_mutex); - if (ret) - return ret; - - if (copy_to_user(arg, &buts.name, 32)) - return -EFAULT; - - return 0; -} - static int compat_blkdev_driver_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, unsigned long arg) { @@ -802,16 +752,10 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) return compat_put_u64(arg, bdev->bd_inode->i_size); case BLKTRACESETUP32: - lock_kernel(); - ret = compat_blk_trace_setup(bdev, compat_ptr(arg)); - unlock_kernel(); - return ret; case BLKTRACESTART: /* compatible */ case BLKTRACESTOP: /* compatible */ case BLKTRACETEARDOWN: /* compatible */ - lock_kernel(); ret = blk_trace_ioctl(bdev, cmd, compat_ptr(arg)); - unlock_kernel(); return ret; default: if (disk->fops->compat_ioctl) diff --git a/block/ioctl.c b/block/ioctl.c index 1cfa8d449d90..9d91e830b320 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -320,9 +320,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKTRACESTOP: case BLKTRACESETUP: case BLKTRACETEARDOWN: - lock_kernel(); ret = blk_trace_ioctl(bdev, cmd, (char __user *) arg); - unlock_kernel(); break; default: ret = __blkdev_driver_ioctl(bdev, mode, cmd, arg); -- cgit v1.2.3 From 6de43703108bb1d3fc9495b3e8107d6ec72f97e4 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Jul 2010 16:51:27 +0200 Subject: block: remove BKL from BLKROSET and BLKFLSBUF We only call the functions set_device_ro(), invalidate_bdev(), sync_filesystem() and sync_blockdev() while holding the BKL in these commands. All of these are also done in other code paths without the BKL, which leads me to the conclusion that the BKL is not needed here either. The reason we hold it here is that it was originally pushed down into the ioctl function from vfs_ioctl. Signed-off-by: Arnd Bergmann Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/ioctl.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'block') diff --git a/block/ioctl.c b/block/ioctl.c index 9d91e830b320..60f477c91eef 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -197,10 +197,8 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, if (ret != -EINVAL && ret != -ENOTTY) return ret; - lock_kernel(); fsync_bdev(bdev); invalidate_bdev(bdev); - unlock_kernel(); return 0; case BLKROSET: @@ -212,9 +210,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, return -EACCES; if (get_user(n, (int __user *)(arg))) return -EFAULT; - lock_kernel(); set_device_ro(bdev, n); - unlock_kernel(); return 0; case BLKDISCARD: { -- cgit v1.2.3 From 15392efb9d427482754f6d267262452878667499 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Wed, 7 Jul 2010 16:51:28 +0200 Subject: block: remove BKL from partition ioctls The blkpg_ioctl and blkdev_reread_part access fields of the bdev and gendisk structures, yet they always do so under the protection of bdev->bd_mutex, which seems sufficient. Signed-off-by: Arnd Bergmann cked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/ioctl.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'block') diff --git a/block/ioctl.c b/block/ioctl.c index 60f477c91eef..09fd7f1ef23a 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -296,14 +296,10 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, bd_release(bdev); return ret; case BLKPG: - lock_kernel(); ret = blkpg_ioctl(bdev, (struct blkpg_ioctl_arg __user *) arg); - unlock_kernel(); break; case BLKRRPART: - lock_kernel(); ret = blkdev_reread_part(bdev); - unlock_kernel(); break; case BLKGETSIZE: size = bdev->bd_inode->i_size; -- cgit v1.2.3 From f10d9f617a65905c556c3b37c9b9646ae7d04ed7 Mon Sep 17 00:00:00 2001 From: Dave Chinner Date: Tue, 13 Jul 2010 17:50:50 +1000 Subject: blkdev: check for valid request queue before issuing flush Issuing a blkdev_issue_flush() on an unconfigured loop device causes a panic as q->make_request_fn is not configured. This can occur when trying to mount the unconfigured loop device as an XFS filesystem. There are no guards that catch the bio before the request function is called because we don't add a payload to the bio. Instead, manually check this case as soon as we have a pointer to the queue to flush. Signed-off-by: Dave Chinner Signed-off-by: Jens Axboe --- block/blk-barrier.c | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index eefbde835308..557f69360b6a 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -310,6 +310,15 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, if (!q) return -ENXIO; + /* + * some block devices may not have their queue correctly set up here + * (e.g. loop device without a backing file) and so issuing a flush + * here will panic. Ensure there is a request function before issuing + * the barrier. + */ + if (!q->make_request_fn) + return -ENXIO; + bio = bio_alloc(gfp_mask, 0); bio->bi_end_io = bio_end_empty_barrier; bio->bi_bdev = bdev; -- cgit v1.2.3 From 10d1f9e2ccfff40665a00ea0e0a0d11e54c9cbb1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Jul 2010 10:49:31 -0600 Subject: block: fix problem with sending down discard that isn't of correct granularity If the queue doesn't have a limit set, or it just set UINT_MAX like we default to, we coud be sending down a discard request that isn't of the correct granularity if the block size is > 512b. Fix this by adjusting max_discard_sectors down to the proper alignment. Signed-off-by: Jens Axboe --- block/blk-lib.c | 15 ++++++++++++--- 1 file changed, 12 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-lib.c b/block/blk-lib.c index e16185b0d8e1..5d793e143f3c 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -41,6 +41,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, struct request_queue *q = bdev_get_queue(bdev); int type = flags & BLKDEV_IFL_BARRIER ? DISCARD_BARRIER : DISCARD_NOBARRIER; + unsigned int max_discard_sectors; struct bio *bio; int ret = 0; @@ -50,10 +51,18 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (!blk_queue_discard(q)) return -EOPNOTSUPP; - while (nr_sects && !ret) { - unsigned int max_discard_sectors = - min(q->limits.max_discard_sectors, UINT_MAX >> 9); + /* + * Ensure that max_discard_sectors is of the proper + * granularity + */ + max_discard_sectors = min(q->limits.max_discard_sectors, UINT_MAX >> 9); + if (q->limits.discard_granularity) { + unsigned int disc_sects = q->limits.discard_granularity >> 9; + max_discard_sectors &= ~(disc_sects - 1); + } + + while (nr_sects && !ret) { bio = bio_alloc(gfp_mask, 1); if (!bio) { ret = -ENOMEM; -- cgit v1.2.3 From 28e18d0188b9e3ab82ebd09d9b1d1c7f8d1822aa Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 9 Jul 2010 09:38:24 +0900 Subject: block: set REQ_TYPE_FS on flush requests the block layer doesn't set rq->cmd_type on flush requests. By definition, it should be REQ_TYPE_FS (the lower layers build a command and interpret the result of it, that is, the block layer doesn't know the details). Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-barrier.c | 1 + 1 file changed, 1 insertion(+) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index 557f69360b6a..d95a1440e29d 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -134,6 +134,7 @@ static void queue_flush(struct request_queue *q, unsigned which) } blk_rq_init(q, rq); + rq->cmd_type = REQ_TYPE_FS; rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; rq->rq_disk = q->bar_rq.rq_disk; rq->end_io = end_io; -- cgit v1.2.3 From 16f2319fd67b169c0b34391d3fa0870fff129891 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Fri, 9 Jul 2010 09:38:25 +0900 Subject: block: set up rq->rq_disk properly for flush requests q->bar_rq.rq_disk is NULL. Use the rq_disk of the original request instead. Signed-off-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- block/blk-barrier.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index d95a1440e29d..f0faefca032f 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -136,7 +136,7 @@ static void queue_flush(struct request_queue *q, unsigned which) blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; - rq->rq_disk = q->bar_rq.rq_disk; + rq->rq_disk = q->orig_bar_rq->rq_disk; rq->end_io = end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); -- cgit v1.2.3 From 3383977fadc4027f20907f6208a9033cd043adab Mon Sep 17 00:00:00 2001 From: ike Snitzer Date: Sun, 8 Aug 2010 12:11:33 -0400 Subject: block: update request stacking methods to support discards Propagate REQ_DISCARD in cmd_flags when cloning a discard request. Skip blk_rq_check_limits's existing checks for discard requests because discard limits will have already been checked in blkdev_issue_discard. Signed-off-by: Mike Snitzer Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 5ab3ac22930c..7da630e25ae7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1644,6 +1644,9 @@ EXPORT_SYMBOL(submit_bio); */ int blk_rq_check_limits(struct request_queue *q, struct request *rq) { + if (rq->cmd_flags & REQ_DISCARD) + return 0; + if (blk_rq_sectors(rq) > queue_max_sectors(q) || blk_rq_bytes(rq) > queue_max_hw_sectors(q) << 9) { printk(KERN_ERR "%s: over max size limit.\n", __func__); @@ -2492,6 +2495,8 @@ static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); + if (src->cmd_flags & REQ_DISCARD) + dst->cmd_flags |= REQ_DISCARD; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); -- cgit v1.2.3 From 18edc8eaa68070771bdb2098260e44efe74de722 Mon Sep 17 00:00:00 2001 From: Dmitry Monakhov Date: Fri, 6 Aug 2010 13:23:25 +0200 Subject: blkdev: fix blkdev_issue_zeroout return value - If function called without barrier option retvalue is incorrect Signed-off-by: Dmitry Monakhov Signed-off-by: Jens Axboe --- block/blk-lib.c | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'block') diff --git a/block/blk-lib.c b/block/blk-lib.c index 5d793e143f3c..c1fc55a83ba1 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -145,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err) int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) { - int ret = 0; + int ret; struct bio *bio; struct bio_batch bb; unsigned int sz, issued = 0; @@ -163,11 +163,14 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, return ret; } submit: + ret = 0; while (nr_sects != 0) { bio = bio_alloc(gfp_mask, min(nr_sects, (sector_t)BIO_MAX_PAGES)); - if (!bio) + if (!bio) { + ret = -ENOMEM; break; + } bio->bi_sector = sector; bio->bi_bdev = bdev; @@ -186,6 +189,7 @@ submit: if (ret < (sz << 9)) break; } + ret = 0; issued++; submit_bio(WRITE, bio); } -- cgit v1.2.3