diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/bio-integrity.c | 14 | ||||
-rw-r--r-- | block/bio.c | 3 | ||||
-rw-r--r-- | block/blk-core.c | 14 | ||||
-rw-r--r-- | block/blk-exec.c | 1 | ||||
-rw-r--r-- | block/blk-merge.c | 17 | ||||
-rw-r--r-- | block/blk-mq.c | 235 | ||||
-rw-r--r-- | block/blk-mq.h | 2 | ||||
-rw-r--r-- | block/blk-sysfs.c | 6 | ||||
-rw-r--r-- | block/cfq-iosched.c | 19 | ||||
-rw-r--r-- | block/compat_ioctl.c | 6 | ||||
-rw-r--r-- | block/genhd.c | 26 | ||||
-rw-r--r-- | block/ioctl.c | 5 | ||||
-rw-r--r-- | block/partition-generic.c | 2 | ||||
-rw-r--r-- | block/partitions/aix.c | 4 | ||||
-rw-r--r-- | block/partitions/amiga.c | 12 | ||||
-rw-r--r-- | block/partitions/efi.c | 46 | ||||
-rw-r--r-- | block/partitions/msdos.c | 13 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 60 |
18 files changed, 297 insertions, 188 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 9e241063a616..f14b4abbebd8 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -70,8 +70,10 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, bs->bvec_integrity_pool); if (!bip->bip_vec) goto err; + bip->bip_max_vcnt = bvec_nr_vecs(idx); } else { bip->bip_vec = bip->bip_inline_vecs; + bip->bip_max_vcnt = inline_vecs; } bip->bip_slab = idx; @@ -114,14 +116,6 @@ void bio_integrity_free(struct bio *bio) } EXPORT_SYMBOL(bio_integrity_free); -static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip) -{ - if (bip->bip_slab == BIO_POOL_NONE) - return BIP_INLINE_VECS; - - return bvec_nr_vecs(bip->bip_slab); -} - /** * bio_integrity_add_page - Attach integrity metadata * @bio: bio to update @@ -137,7 +131,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { + if (bip->bip_vcnt >= bip->bip_max_vcnt) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } @@ -526,7 +520,7 @@ void bio_integrity_endio(struct bio *bio, int error) */ if (error) { bio->bi_end_io = bip->bip_end_io; - bio_endio(bio, error); + bio_endio_nodec(bio, error); return; } diff --git a/block/bio.c b/block/bio.c index 0ec61c9e536c..3e6331d25d90 100644 --- a/block/bio.c +++ b/block/bio.c @@ -112,7 +112,8 @@ static struct kmem_cache *bio_find_or_create_slab(unsigned int extra_size) bslab = &bio_slabs[entry]; snprintf(bslab->name, sizeof(bslab->name), "bio-%d", entry); - slab = kmem_cache_create(bslab->name, sz, 0, SLAB_HWCACHE_ALIGN, NULL); + slab = kmem_cache_create(bslab->name, sz, ARCH_KMALLOC_MINALIGN, + SLAB_HWCACHE_ALIGN, NULL); if (!slab) goto out_unlock; diff --git a/block/blk-core.c b/block/blk-core.c index ce5a389cea94..9c888bd22b00 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -438,14 +438,17 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all) */ void blk_queue_bypass_start(struct request_queue *q) { - bool drain; - spin_lock_irq(q->queue_lock); - drain = !q->bypass_depth++; + q->bypass_depth++; queue_flag_set(QUEUE_FLAG_BYPASS, q); spin_unlock_irq(q->queue_lock); - if (drain) { + /* + * Queues start drained. Skip actual draining till init is + * complete. This avoids lenghty delays during queue init which + * can happen many times during boot. + */ + if (blk_queue_init_done(q)) { spin_lock_irq(q->queue_lock); __blk_drain_queue(q, false); spin_unlock_irq(q->queue_lock); @@ -511,7 +514,7 @@ void blk_cleanup_queue(struct request_queue *q) * prevent that q->request_fn() gets invoked after draining finished. */ if (q->mq_ops) { - blk_mq_drain_queue(q); + blk_mq_freeze_queue(q); spin_lock_irq(lock); } else { spin_lock_irq(lock); @@ -1249,7 +1252,6 @@ void blk_rq_set_block_pc(struct request *rq) rq->__sector = (sector_t) -1; rq->bio = rq->biotail = NULL; memset(rq->__cmd, 0, sizeof(rq->__cmd)); - rq->cmd = rq->__cmd; } EXPORT_SYMBOL(blk_rq_set_block_pc); diff --git a/block/blk-exec.c b/block/blk-exec.c index f4d27b12c90b..9924725fa50d 100644 --- a/block/blk-exec.c +++ b/block/blk-exec.c @@ -56,6 +56,7 @@ void blk_execute_rq_nowait(struct request_queue *q, struct gendisk *bd_disk, bool is_pm_resume; WARN_ON(irqs_disabled()); + WARN_ON(rq->cmd_type == REQ_TYPE_FS); rq->rq_disk = bd_disk; rq->end_io = done; diff --git a/block/blk-merge.c b/block/blk-merge.c index 54535831f1e1..77881798f793 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -10,10 +10,11 @@ #include "blk.h" static unsigned int __blk_recalc_rq_segments(struct request_queue *q, - struct bio *bio) + struct bio *bio, + bool no_sg_merge) { struct bio_vec bv, bvprv = { NULL }; - int cluster, high, highprv = 1, no_sg_merge; + int cluster, high, highprv = 1; unsigned int seg_size, nr_phys_segs; struct bio *fbio, *bbio; struct bvec_iter iter; @@ -35,7 +36,6 @@ static unsigned int __blk_recalc_rq_segments(struct request_queue *q, cluster = blk_queue_cluster(q); seg_size = 0; nr_phys_segs = 0; - no_sg_merge = test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags); high = 0; for_each_bio(bio) { bio_for_each_segment(bv, bio, iter) { @@ -88,18 +88,23 @@ new_segment: void blk_recalc_rq_segments(struct request *rq) { - rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio); + bool no_sg_merge = !!test_bit(QUEUE_FLAG_NO_SG_MERGE, + &rq->q->queue_flags); + + rq->nr_phys_segments = __blk_recalc_rq_segments(rq->q, rq->bio, + no_sg_merge); } void blk_recount_segments(struct request_queue *q, struct bio *bio) { - if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags)) + if (test_bit(QUEUE_FLAG_NO_SG_MERGE, &q->queue_flags) && + bio->bi_vcnt < queue_max_segments(q)) bio->bi_phys_segments = bio->bi_vcnt; else { struct bio *nxt = bio->bi_next; bio->bi_next = NULL; - bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio); + bio->bi_phys_segments = __blk_recalc_rq_segments(q, bio, false); bio->bi_next = nxt; } diff --git a/block/blk-mq.c b/block/blk-mq.c index ad69ef657e85..df8e1e09dd17 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -78,83 +78,74 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, static int blk_mq_queue_enter(struct request_queue *q) { - int ret; - - __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); - smp_wmb(); - - /* we have problems freezing the queue if it's initializing */ - if (!blk_queue_dying(q) && - (!blk_queue_bypass(q) || !blk_queue_init_done(q))) - return 0; - - __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); + while (true) { + int ret; - spin_lock_irq(q->queue_lock); - ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, - !blk_queue_bypass(q) || blk_queue_dying(q), - *q->queue_lock); - /* inc usage with lock hold to avoid freeze_queue runs here */ - if (!ret && !blk_queue_dying(q)) - __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); - else if (blk_queue_dying(q)) - ret = -ENODEV; - spin_unlock_irq(q->queue_lock); + if (percpu_ref_tryget_live(&q->mq_usage_counter)) + return 0; - return ret; + ret = wait_event_interruptible(q->mq_freeze_wq, + !q->mq_freeze_depth || blk_queue_dying(q)); + if (blk_queue_dying(q)) + return -ENODEV; + if (ret) + return ret; + } } static void blk_mq_queue_exit(struct request_queue *q) { - __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); + percpu_ref_put(&q->mq_usage_counter); } -void blk_mq_drain_queue(struct request_queue *q) +static void blk_mq_usage_counter_release(struct percpu_ref *ref) { - while (true) { - s64 count; - - spin_lock_irq(q->queue_lock); - count = percpu_counter_sum(&q->mq_usage_counter); - spin_unlock_irq(q->queue_lock); + struct request_queue *q = + container_of(ref, struct request_queue, mq_usage_counter); - if (count == 0) - break; - blk_mq_start_hw_queues(q); - msleep(10); - } + wake_up_all(&q->mq_freeze_wq); } /* * Guarantee no request is in use, so we can change any data structure of * the queue afterward. */ -static void blk_mq_freeze_queue(struct request_queue *q) +void blk_mq_freeze_queue(struct request_queue *q) { - bool drain; + bool freeze; spin_lock_irq(q->queue_lock); - drain = !q->bypass_depth++; - queue_flag_set(QUEUE_FLAG_BYPASS, q); + freeze = !q->mq_freeze_depth++; spin_unlock_irq(q->queue_lock); - if (drain) - blk_mq_drain_queue(q); + if (freeze) { + /* + * XXX: Temporary kludge to work around SCSI blk-mq stall. + * SCSI synchronously creates and destroys many queues + * back-to-back during probe leading to lengthy stalls. + * This will be fixed by keeping ->mq_usage_counter in + * atomic mode until genhd registration, but, for now, + * let's work around using expedited synchronization. + */ + __percpu_ref_kill_expedited(&q->mq_usage_counter); + + blk_mq_run_queues(q, false); + } + wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); } static void blk_mq_unfreeze_queue(struct request_queue *q) { - bool wake = false; + bool wake; spin_lock_irq(q->queue_lock); - if (!--q->bypass_depth) { - queue_flag_clear(QUEUE_FLAG_BYPASS, q); - wake = true; - } - WARN_ON_ONCE(q->bypass_depth < 0); + wake = !--q->mq_freeze_depth; + WARN_ON_ONCE(q->mq_freeze_depth < 0); spin_unlock_irq(q->queue_lock); - if (wake) + if (wake) { + percpu_ref_reinit(&q->mq_usage_counter); wake_up_all(&q->mq_freeze_wq); + } } bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) @@ -194,6 +185,8 @@ static void blk_mq_rq_ctx_init(struct request_queue *q, struct blk_mq_ctx *ctx, /* tag was already set */ rq->errors = 0; + rq->cmd = rq->__cmd; + rq->extra_len = 0; rq->sense_len = 0; rq->resid_len = 0; @@ -219,7 +212,6 @@ __blk_mq_alloc_request(struct blk_mq_alloc_data *data, int rw) if (tag != BLK_MQ_TAG_FAIL) { rq = data->hctx->tags->rqs[tag]; - rq->cmd_flags = 0; if (blk_mq_tag_busy(data->hctx)) { rq->cmd_flags = REQ_MQ_INFLIGHT; atomic_inc(&data->hctx->nr_active); @@ -274,6 +266,7 @@ static void __blk_mq_free_request(struct blk_mq_hw_ctx *hctx, if (rq->cmd_flags & REQ_MQ_INFLIGHT) atomic_dec(&hctx->nr_active); + rq->cmd_flags = 0; clear_bit(REQ_ATOM_STARTED, &rq->atomic_flags); blk_mq_put_tag(hctx, tag, &ctx->last_tag); @@ -409,6 +402,12 @@ static void blk_mq_start_request(struct request *rq, bool last) blk_add_timer(rq); /* + * Ensure that ->deadline is visible before set the started + * flag and clear the completed flag. + */ + smp_mb__before_atomic(); + + /* * Mark us as started and clear complete. Complete might have been * set if requeue raced with timeout, which then marked it as * complete. So be sure to clear complete again when we start @@ -489,7 +488,11 @@ static void blk_mq_requeue_work(struct work_struct *work) blk_mq_insert_request(rq, false, false, false); } - blk_mq_run_queues(q, false); + /* + * Use the start variant of queue running here, so that running + * the requeue work will kick stopped queues. + */ + blk_mq_start_hw_queues(q); } void blk_mq_add_to_requeue_list(struct request *rq, bool at_head) @@ -973,14 +976,9 @@ void blk_mq_insert_request(struct request *rq, bool at_head, bool run_queue, hctx = q->mq_ops->map_queue(q, ctx->cpu); - if (rq->cmd_flags & (REQ_FLUSH | REQ_FUA) && - !(rq->cmd_flags & (REQ_FLUSH_SEQ))) { - blk_insert_flush(rq); - } else { - spin_lock(&ctx->lock); - __blk_mq_insert_request(hctx, rq, at_head); - spin_unlock(&ctx->lock); - } + spin_lock(&ctx->lock); + __blk_mq_insert_request(hctx, rq, at_head); + spin_unlock(&ctx->lock); if (run_queue) blk_mq_run_hw_queue(hctx, async); @@ -1090,13 +1088,17 @@ static void blk_mq_bio_to_request(struct request *rq, struct bio *bio) blk_account_io_start(rq, 1); } +static inline bool hctx_allow_merges(struct blk_mq_hw_ctx *hctx) +{ + return (hctx->flags & BLK_MQ_F_SHOULD_MERGE) && + !blk_queue_nomerges(hctx->queue); +} + static inline bool blk_mq_merge_queue_io(struct blk_mq_hw_ctx *hctx, struct blk_mq_ctx *ctx, struct request *rq, struct bio *bio) { - struct request_queue *q = hctx->queue; - - if (!(hctx->flags & BLK_MQ_F_SHOULD_MERGE)) { + if (!hctx_allow_merges(hctx)) { blk_mq_bio_to_request(rq, bio); spin_lock(&ctx->lock); insert_rq: @@ -1104,6 +1106,8 @@ insert_rq: spin_unlock(&ctx->lock); return false; } else { + struct request_queue *q = hctx->queue; + spin_lock(&ctx->lock); if (!blk_mq_attempt_merge(q, ctx, bio)) { blk_mq_bio_to_request(rq, bio); @@ -1331,6 +1335,7 @@ static void blk_mq_free_rq_map(struct blk_mq_tag_set *set, continue; set->ops->exit_request(set->driver_data, tags->rqs[i], hctx_idx, i); + tags->rqs[i] = NULL; } } @@ -1364,8 +1369,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, INIT_LIST_HEAD(&tags->page_list); - tags->rqs = kmalloc_node(set->queue_depth * sizeof(struct request *), - GFP_KERNEL, set->numa_node); + tags->rqs = kzalloc_node(set->queue_depth * sizeof(struct request *), + GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, + set->numa_node); if (!tags->rqs) { blk_mq_free_tags(tags); return NULL; @@ -1389,8 +1395,9 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, this_order--; do { - page = alloc_pages_node(set->numa_node, GFP_KERNEL, - this_order); + page = alloc_pages_node(set->numa_node, + GFP_KERNEL | __GFP_NOWARN | __GFP_NORETRY, + this_order); if (page) break; if (!this_order--) @@ -1411,11 +1418,15 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, left -= to_do * rq_size; for (j = 0; j < to_do; j++) { tags->rqs[i] = p; + tags->rqs[i]->atomic_flags = 0; + tags->rqs[i]->cmd_flags = 0; if (set->ops->init_request) { if (set->ops->init_request(set->driver_data, tags->rqs[i], hctx_idx, i, - set->numa_node)) + set->numa_node)) { + tags->rqs[i] = NULL; goto fail; + } } p += rq_size; @@ -1426,7 +1437,6 @@ static struct blk_mq_tags *blk_mq_init_rq_map(struct blk_mq_tag_set *set, return tags; fail: - pr_warn("%s: failed to allocate requests\n", __func__); blk_mq_free_rq_map(set, tags, hctx_idx); return NULL; } @@ -1596,7 +1606,7 @@ static int blk_mq_init_hw_queues(struct request_queue *q, hctx->tags = set->tags[i]; /* - * Allocate space for all possible cpus to avoid allocation in + * Allocate space for all possible cpus to avoid allocation at * runtime */ hctx->ctxs = kmalloc_node(nr_cpu_ids * sizeof(void *), @@ -1684,8 +1694,8 @@ static void blk_mq_map_swqueue(struct request_queue *q) queue_for_each_hw_ctx(q, hctx, i) { /* - * If not software queues are mapped to this hardware queue, - * disable it and free the request entries + * If no software queues are mapped to this hardware queue, + * disable it and free the request entries. */ if (!hctx->nr_ctx) { struct blk_mq_tag_set *set = q->tag_set; @@ -1735,14 +1745,10 @@ static void blk_mq_del_queue_tag_set(struct request_queue *q) { struct blk_mq_tag_set *set = q->tag_set; - blk_mq_freeze_queue(q); - mutex_lock(&set->tag_list_lock); list_del_init(&q->tag_set_list); blk_mq_update_tag_set_depth(set); mutex_unlock(&set->tag_list_lock); - - blk_mq_unfreeze_queue(q); } static void blk_mq_add_queue_tag_set(struct blk_mq_tag_set *set, @@ -1798,7 +1804,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!q) goto err_hctxs; - if (percpu_counter_init(&q->mq_usage_counter, 0)) + if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release)) goto err_map; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); @@ -1891,7 +1897,7 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); blk_mq_free_hw_queues(q, set); - percpu_counter_destroy(&q->mq_usage_counter); + percpu_ref_exit(&q->mq_usage_counter); free_percpu(q->queue_ctx); kfree(q->queue_hw_ctx); @@ -1950,6 +1956,60 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, return NOTIFY_OK; } +static int __blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) +{ + int i; + + for (i = 0; i < set->nr_hw_queues; i++) { + set->tags[i] = blk_mq_init_rq_map(set, i); + if (!set->tags[i]) + goto out_unwind; + } + + return 0; + +out_unwind: + while (--i >= 0) + blk_mq_free_rq_map(set, set->tags[i], i); + + return -ENOMEM; +} + +/* + * Allocate the request maps associated with this tag_set. Note that this + * may reduce the depth asked for, if memory is tight. set->queue_depth + * will be updated to reflect the allocated depth. + */ +static int blk_mq_alloc_rq_maps(struct blk_mq_tag_set *set) +{ + unsigned int depth; + int err; + + depth = set->queue_depth; + do { + err = __blk_mq_alloc_rq_maps(set); + if (!err) + break; + + set->queue_depth >>= 1; + if (set->queue_depth < set->reserved_tags + BLK_MQ_TAG_MIN) { + err = -ENOMEM; + break; + } + } while (set->queue_depth); + + if (!set->queue_depth || err) { + pr_err("blk-mq: failed to allocate request map\n"); + return -ENOMEM; + } + + if (depth != set->queue_depth) + pr_info("blk-mq: reduced tag depth (%u -> %u)\n", + depth, set->queue_depth); + + return 0; +} + /* * Alloc a tag set to be associated with one or more request queues. * May fail with EINVAL for various error conditions. May adjust the @@ -1958,8 +2018,6 @@ static int blk_mq_queue_reinit_notify(struct notifier_block *nb, */ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) { - int i; - if (!set->nr_hw_queues) return -EINVAL; if (!set->queue_depth) @@ -1980,23 +2038,18 @@ int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set) sizeof(struct blk_mq_tags *), GFP_KERNEL, set->numa_node); if (!set->tags) - goto out; + return -ENOMEM; - for (i = 0; i < set->nr_hw_queues; i++) { - set->tags[i] = blk_mq_init_rq_map(set, i); - if (!set->tags[i]) - goto out_unwind; - } + if (blk_mq_alloc_rq_maps(set)) + goto enomem; mutex_init(&set->tag_list_lock); INIT_LIST_HEAD(&set->tag_list); return 0; - -out_unwind: - while (--i >= 0) - blk_mq_free_rq_map(set, set->tags[i], i); -out: +enomem: + kfree(set->tags); + set->tags = NULL; return -ENOMEM; } EXPORT_SYMBOL(blk_mq_alloc_tag_set); @@ -2011,6 +2064,7 @@ void blk_mq_free_tag_set(struct blk_mq_tag_set *set) } kfree(set->tags); + set->tags = NULL; } EXPORT_SYMBOL(blk_mq_free_tag_set); @@ -2050,8 +2104,7 @@ static int __init blk_mq_init(void) { blk_mq_cpu_init(); - /* Must be called after percpu_counter_hotcpu_callback() */ - hotcpu_notifier(blk_mq_queue_reinit_notify, -10); + hotcpu_notifier(blk_mq_queue_reinit_notify, 0); return 0; } diff --git a/block/blk-mq.h b/block/blk-mq.h index 26460884c6cd..ca4964a6295d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -28,7 +28,7 @@ struct blk_mq_ctx { void __blk_mq_complete_request(struct request *rq); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); -void blk_mq_drain_queue(struct request_queue *q); +void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); void blk_mq_clone_flush_request(struct request *flush_rq, struct request *orig_rq); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 23321fbab293..17f5c84ce7bf 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -554,8 +554,10 @@ int blk_register_queue(struct gendisk *disk) * Initialization must be complete by now. Finish the initial * bypass from queue allocation. */ - blk_queue_bypass_end(q); - queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); + if (!blk_queue_init_done(q)) { + queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); + blk_queue_bypass_end(q); + } ret = blk_trace_init_sysfs(dev); if (ret) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index cadc37841744..3f31cf9508e6 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -1272,15 +1272,22 @@ __cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) rb_insert_color(&cfqg->rb_node, &st->rb); } +/* + * This has to be called only on activation of cfqg + */ static void cfq_update_group_weight(struct cfq_group *cfqg) { - BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); - if (cfqg->new_weight) { cfqg->weight = cfqg->new_weight; cfqg->new_weight = 0; } +} + +static void +cfq_update_group_leaf_weight(struct cfq_group *cfqg) +{ + BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); if (cfqg->new_leaf_weight) { cfqg->leaf_weight = cfqg->new_leaf_weight; @@ -1299,7 +1306,12 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) /* add to the service tree */ BUG_ON(!RB_EMPTY_NODE(&cfqg->rb_node)); - cfq_update_group_weight(cfqg); + /* + * Update leaf_weight. We cannot update weight at this point + * because cfqg might already have been activated and is + * contributing its current weight to the parent's child_weight. + */ + cfq_update_group_leaf_weight(cfqg); __cfq_group_service_tree_add(st, cfqg); /* @@ -1323,6 +1335,7 @@ cfq_group_service_tree_add(struct cfq_rb_root *st, struct cfq_group *cfqg) */ while ((parent = cfqg_parent(pos))) { if (propagate) { + cfq_update_group_weight(pos); propagate = !parent->nr_active++; parent->children_weight += pos->weight; } diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index a0926a6094b2..18b282ce361e 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -663,6 +663,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) fmode_t mode = file->f_mode; struct backing_dev_info *bdi; loff_t size; + unsigned int max_sectors; /* * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have @@ -719,8 +720,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKSSZGET: /* get block device hardware sector size */ return compat_put_int(arg, bdev_logical_block_size(bdev)); case BLKSECTGET: - return compat_put_ushort(arg, - queue_max_sectors(bdev_get_queue(bdev))); + max_sectors = min_t(unsigned int, USHRT_MAX, + queue_max_sectors(bdev_get_queue(bdev))); + return compat_put_ushort(arg, max_sectors); case BLKROTATIONAL: return compat_put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); diff --git a/block/genhd.c b/block/genhd.c index 048e2cee43d2..bd3060684ab2 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -28,10 +28,10 @@ struct kobject *block_depr; /* for extended dynamic devt allocation, currently only one major is used */ #define NR_EXT_DEVT (1 << MINORBITS) -/* For extended devt allocation. ext_devt_mutex prevents look up +/* For extended devt allocation. ext_devt_lock prevents look up * results from going away underneath its user. */ -static DEFINE_MUTEX(ext_devt_mutex); +static DEFINE_SPINLOCK(ext_devt_lock); static DEFINE_IDR(ext_devt_idr); static struct device_type disk_type; @@ -420,9 +420,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) } /* allocate ext devt */ - mutex_lock(&ext_devt_mutex); - idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_KERNEL); - mutex_unlock(&ext_devt_mutex); + idr_preload(GFP_KERNEL); + + spin_lock(&ext_devt_lock); + idx = idr_alloc(&ext_devt_idr, part, 0, NR_EXT_DEVT, GFP_NOWAIT); + spin_unlock(&ext_devt_lock); + + idr_preload_end(); if (idx < 0) return idx == -ENOSPC ? -EBUSY : idx; @@ -441,15 +445,13 @@ int blk_alloc_devt(struct hd_struct *part, dev_t *devt) */ void blk_free_devt(dev_t devt) { - might_sleep(); - if (devt == MKDEV(0, 0)) return; if (MAJOR(devt) == BLOCK_EXT_MAJOR) { - mutex_lock(&ext_devt_mutex); + spin_lock(&ext_devt_lock); idr_remove(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); - mutex_unlock(&ext_devt_mutex); + spin_unlock(&ext_devt_lock); } } @@ -665,7 +667,6 @@ void del_gendisk(struct gendisk *disk) sysfs_remove_link(block_depr, dev_name(disk_to_dev(disk))); pm_runtime_set_memalloc_noio(disk_to_dev(disk), false); device_del(disk_to_dev(disk)); - blk_free_devt(disk_to_dev(disk)->devt); } EXPORT_SYMBOL(del_gendisk); @@ -690,13 +691,13 @@ struct gendisk *get_gendisk(dev_t devt, int *partno) } else { struct hd_struct *part; - mutex_lock(&ext_devt_mutex); + spin_lock(&ext_devt_lock); part = idr_find(&ext_devt_idr, blk_mangle_minor(MINOR(devt))); if (part && get_disk(part_to_disk(part))) { *partno = part->partno; disk = part_to_disk(part); } - mutex_unlock(&ext_devt_mutex); + spin_unlock(&ext_devt_lock); } return disk; @@ -1098,6 +1099,7 @@ static void disk_release(struct device *dev) { struct gendisk *disk = dev_to_disk(dev); + blk_free_devt(dev->devt); disk_release_events(disk); kfree(disk->random); disk_replace_part_tbl(disk, NULL); diff --git a/block/ioctl.c b/block/ioctl.c index 7d5c3b20af45..d6cda8147c91 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -278,6 +278,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, struct backing_dev_info *bdi; loff_t size; int ret, n; + unsigned int max_sectors; switch(cmd) { case BLKFLSBUF: @@ -375,7 +376,9 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKDISCARDZEROES: return put_uint(arg, bdev_discard_zeroes_data(bdev)); case BLKSECTGET: - return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); + max_sectors = min_t(unsigned int, USHRT_MAX, + queue_max_sectors(bdev_get_queue(bdev))); + return put_ushort(arg, max_sectors); case BLKROTATIONAL: return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); case BLKRASET: diff --git a/block/partition-generic.c b/block/partition-generic.c index 789cdea05893..0d9e5f97f0a8 100644 --- a/block/partition-generic.c +++ b/block/partition-generic.c @@ -211,6 +211,7 @@ static const struct attribute_group *part_attr_groups[] = { static void part_release(struct device *dev) { struct hd_struct *p = dev_to_part(dev); + blk_free_devt(dev->devt); free_part_stats(p); free_part_info(p); kfree(p); @@ -253,7 +254,6 @@ void delete_partition(struct gendisk *disk, int partno) rcu_assign_pointer(ptbl->last_lookup, NULL); kobject_put(part->holder_dir); device_del(part_to_dev(part)); - blk_free_devt(part_devt(part)); hd_struct_put(part); } diff --git a/block/partitions/aix.c b/block/partitions/aix.c index 43be471d9b1d..f3ed7b2d89bf 100644 --- a/block/partitions/aix.c +++ b/block/partitions/aix.c @@ -215,7 +215,7 @@ int aix_partition(struct parsed_partitions *state) numlvs = be16_to_cpu(p->numlvs); put_dev_sector(sect); } - lvip = kzalloc(sizeof(struct lv_info) * state->limit, GFP_KERNEL); + lvip = kcalloc(state->limit, sizeof(struct lv_info), GFP_KERNEL); if (!lvip) return 0; if (numlvs && (d = read_part_sector(state, vgda_sector + 1, §))) { @@ -253,7 +253,7 @@ int aix_partition(struct parsed_partitions *state) continue; } lv_ix = be16_to_cpu(p->lv_ix) - 1; - if (lv_ix > state->limit) { + if (lv_ix >= state->limit) { cur_lv_ix = -1; continue; } diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c index 70cbf44a1560..2b13533d60a2 100644 --- a/block/partitions/amiga.c +++ b/block/partitions/amiga.c @@ -7,6 +7,8 @@ * Re-organised Feb 1998 Russell King */ +#define pr_fmt(fmt) fmt + #include <linux/types.h> #include <linux/affs_hardblocks.h> @@ -40,7 +42,7 @@ int amiga_partition(struct parsed_partitions *state) data = read_part_sector(state, blk, §); if (!data) { if (warn_no_part) - printk("Dev %s: unable to read RDB block %d\n", + pr_err("Dev %s: unable to read RDB block %d\n", bdevname(state->bdev, b), blk); res = -1; goto rdb_done; @@ -57,12 +59,12 @@ int amiga_partition(struct parsed_partitions *state) *(__be32 *)(data+0xdc) = 0; if (checksum_block((__be32 *)data, be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) { - printk("Warning: Trashed word at 0xd0 in block %d " - "ignored in checksum calculation\n",blk); + pr_err("Trashed word at 0xd0 in block %d ignored in checksum calculation\n", + blk); break; } - printk("Dev %s: RDB in block %d has bad checksum\n", + pr_err("Dev %s: RDB in block %d has bad checksum\n", bdevname(state->bdev, b), blk); } @@ -83,7 +85,7 @@ int amiga_partition(struct parsed_partitions *state) data = read_part_sector(state, blk, §); if (!data) { if (warn_no_part) - printk("Dev %s: unable to read partition block %d\n", + pr_err("Dev %s: unable to read partition block %d\n", bdevname(state->bdev, b), blk); res = -1; goto rdb_done; diff --git a/block/partitions/efi.c b/block/partitions/efi.c index dc51f467a560..56d08fd75b1a 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -121,7 +121,7 @@ __setup("gpt", force_gpt_fn); /** * efi_crc32() - EFI version of crc32 function * @buf: buffer to calculate crc32 of - * @len - length of buf + * @len: length of buf * * Description: Returns EFI-style CRC32 value for @buf * @@ -240,10 +240,10 @@ done: /** * read_lba(): Read bytes from disk, starting at given LBA - * @state - * @lba - * @buffer - * @size_t + * @state: disk parsed partitions + * @lba: the Logical Block Address of the partition table + * @buffer: destination buffer + * @count: bytes to read * * Description: Reads @count bytes from @state->bdev into @buffer. * Returns number of bytes read on success, 0 on error. @@ -277,8 +277,8 @@ static size_t read_lba(struct parsed_partitions *state, /** * alloc_read_gpt_entries(): reads partition entries from disk - * @state - * @gpt - GPT header + * @state: disk parsed partitions + * @gpt: GPT header * * Description: Returns ptes on success, NULL on error. * Allocates space for PTEs based on information found in @gpt. @@ -312,8 +312,8 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, /** * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk - * @state - * @lba is the Logical Block Address of the partition table + * @state: disk parsed partitions + * @lba: the Logical Block Address of the partition table * * Description: returns GPT header on success, NULL on error. Allocates * and fills a GPT header starting at @ from @state->bdev. @@ -340,10 +340,10 @@ static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state, /** * is_gpt_valid() - tests one GPT header and PTEs for validity - * @state - * @lba is the logical block address of the GPT header to test - * @gpt is a GPT header ptr, filled on return. - * @ptes is a PTEs ptr, filled on return. + * @state: disk parsed partitions + * @lba: logical block address of the GPT header to test + * @gpt: GPT header ptr, filled on return. + * @ptes: PTEs ptr, filled on return. * * Description: returns 1 if valid, 0 on error. * If valid, returns pointers to newly allocated GPT header and PTEs. @@ -461,8 +461,8 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, /** * is_pte_valid() - tests one PTE for validity - * @pte is the pte to check - * @lastlba is last lba of the disk + * @pte:pte to check + * @lastlba: last lba of the disk * * Description: returns 1 if valid, 0 on error. */ @@ -478,9 +478,10 @@ is_pte_valid(const gpt_entry *pte, const u64 lastlba) /** * compare_gpts() - Search disk for valid GPT headers and PTEs - * @pgpt is the primary GPT header - * @agpt is the alternate GPT header - * @lastlba is the last LBA number + * @pgpt: primary GPT header + * @agpt: alternate GPT header + * @lastlba: last LBA number + * * Description: Returns nothing. Sanity checks pgpt and agpt fields * and prints warnings on discrepancies. * @@ -572,9 +573,10 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) /** * find_valid_gpt() - Search disk for valid GPT headers and PTEs - * @state - * @gpt is a GPT header ptr, filled on return. - * @ptes is a PTEs ptr, filled on return. + * @state: disk parsed partitions + * @gpt: GPT header ptr, filled on return. + * @ptes: PTEs ptr, filled on return. + * * Description: Returns 1 if valid, 0 on error. * If valid, returns pointers to newly allocated GPT header and PTEs. * Validity depends on PMBR being valid (or being overridden by the @@ -663,7 +665,7 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, /** * efi_partition(struct parsed_partitions *state) - * @state + * @state: disk parsed partitions * * Description: called from check.c, if the disk contains GPT * partitions, sets up partition entries in the kernel. diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 9123f250b425..93e7c1b32edd 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -159,8 +159,9 @@ static void parse_extended(struct parsed_partitions *state, /* * First process the data partition(s) */ - for (i=0; i<4; i++, p++) { + for (i = 0; i < 4; i++, p++) { sector_t offs, size, next; + if (!nr_sects(p) || is_extended_partition(p)) continue; @@ -194,7 +195,7 @@ static void parse_extended(struct parsed_partitions *state, * It should be a link to the next logical partition. */ p -= 4; - for (i=0; i<4; i++, p++) + for (i = 0; i < 4; i++, p++) if (nr_sects(p) && is_extended_partition(p)) break; if (i == 4) @@ -243,8 +244,8 @@ static void parse_solaris_x86(struct parsed_partitions *state, return; } /* Ensure we can handle previous case of VTOC with 8 entries gracefully */ - max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; - for (i=0; i<max_nparts && state->next<state->limit; i++) { + max_nparts = le16_to_cpu(v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; + for (i = 0; i < max_nparts && state->next < state->limit; i++) { struct solaris_x86_slice *s = &v->v_slice[i]; char tmp[3 + 10 + 1 + 1]; @@ -409,7 +410,7 @@ static void parse_minix(struct parsed_partitions *state, /* The first sector of a Minix partition can have either * a secondary MBR describing its subpartitions, or * the normal boot sector. */ - if (msdos_magic_present (data + 510) && + if (msdos_magic_present(data + 510) && SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; @@ -527,6 +528,7 @@ int msdos_partition(struct parsed_partitions *state) for (slot = 1 ; slot <= 4 ; slot++, p++) { sector_t start = start_sect(p)*sector_size; sector_t size = nr_sects(p)*sector_size; + if (!size) continue; if (is_extended_partition(p)) { @@ -537,6 +539,7 @@ int msdos_partition(struct parsed_partitions *state) * sector, although it may not be enough/proper. */ sector_t n = 2; + n = min(size, max(sector_size, n)); put_partition(state, slot, start, n); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 14695c6221c8..9b8eaeca6a79 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -82,9 +82,18 @@ static int sg_set_timeout(struct request_queue *q, int __user *p) return err; } +static int max_sectors_bytes(struct request_queue *q) +{ + unsigned int max_sectors = queue_max_sectors(q); + + max_sectors = min_t(unsigned int, max_sectors, INT_MAX >> 9); + + return max_sectors << 9; +} + static int sg_get_reserved_size(struct request_queue *q, int __user *p) { - unsigned val = min(q->sg_reserved_size, queue_max_sectors(q) << 9); + int val = min_t(int, q->sg_reserved_size, max_sectors_bytes(q)); return put_user(val, p); } @@ -98,10 +107,8 @@ static int sg_set_reserved_size(struct request_queue *q, int __user *p) if (size < 0) return -EINVAL; - if (size > (queue_max_sectors(q) << 9)) - size = queue_max_sectors(q) << 9; - q->sg_reserved_size = size; + q->sg_reserved_size = min(size, max_sectors_bytes(q)); return 0; } @@ -272,7 +279,6 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, r = blk_rq_unmap_user(bio); if (!ret) ret = r; - blk_put_request(rq); return ret; } @@ -283,14 +289,13 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, unsigned long start_time; ssize_t ret = 0; int writing = 0; + int at_head = 0; struct request *rq; char sense[SCSI_SENSE_BUFFERSIZE]; struct bio *bio; if (hdr->interface_id != 'S') return -EINVAL; - if (hdr->cmd_len > BLK_MAX_CDB) - return -EINVAL; if (hdr->dxfer_len > (queue_max_hw_sectors(q) << 9)) return -EIO; @@ -306,17 +311,26 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, case SG_DXFER_FROM_DEV: break; } + if (hdr->flags & SG_FLAG_Q_AT_HEAD) + at_head = 1; + ret = -ENOMEM; rq = blk_get_request(q, writing ? WRITE : READ, GFP_KERNEL); if (!rq) - return -ENOMEM; + goto out; blk_rq_set_block_pc(rq); - if (blk_fill_sghdr_rq(q, rq, hdr, mode)) { - blk_put_request(rq); - return -EFAULT; + if (hdr->cmd_len > BLK_MAX_CDB) { + rq->cmd = kzalloc(hdr->cmd_len, GFP_KERNEL); + if (!rq->cmd) + goto out_put_request; } + ret = -EFAULT; + if (blk_fill_sghdr_rq(q, rq, hdr, mode)) + goto out_free_cdb; + + ret = 0; if (hdr->iovec_count) { size_t iov_data_len; struct iovec *iov = NULL; @@ -325,7 +339,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, 0, NULL, &iov); if (ret < 0) { kfree(iov); - goto out; + goto out_free_cdb; } iov_data_len = ret; @@ -348,7 +362,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, GFP_KERNEL); if (ret) - goto out; + goto out_free_cdb; bio = rq->bio; memset(sense, 0, sizeof(sense)); @@ -362,13 +376,18 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, * (if he doesn't check that is his problem). * N.B. a non-zero SCSI status is _not_ necessarily an error. */ - blk_execute_rq(q, bd_disk, rq, 0); + blk_execute_rq(q, bd_disk, rq, at_head); hdr->duration = jiffies_to_msecs(jiffies - start_time); - return blk_complete_sghdr_rq(rq, hdr, bio); -out: + ret = blk_complete_sghdr_rq(rq, hdr, bio); + +out_free_cdb: + if (rq->cmd != rq->__cmd) + kfree(rq->cmd); +out_put_request: blk_put_request(rq); +out: return ret; } @@ -438,6 +457,11 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, } rq = blk_get_request(q, in_len ? WRITE : READ, __GFP_WAIT); + if (!rq) { + err = -ENOMEM; + goto error; + } + blk_rq_set_block_pc(rq); cmdlen = COMMAND_SIZE(opcode); @@ -491,7 +515,6 @@ int sg_scsi_ioctl(struct request_queue *q, struct gendisk *disk, fmode_t mode, memset(sense, 0, sizeof(sense)); rq->sense = sense; rq->sense_len = 0; - blk_rq_set_block_pc(rq); blk_execute_rq(q, disk, rq, 0); @@ -511,7 +534,8 @@ out: error: kfree(buffer); - blk_put_request(rq); + if (rq) + blk_put_request(rq); return err; } EXPORT_SYMBOL_GPL(sg_scsi_ioctl); |