diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-14 09:07:02 -0600 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-08-14 09:07:02 -0600 |
commit | 4a319a490ca59a746b3d36768c0e29ee19832366 (patch) | |
tree | 41c34e68fdf2477aa9ae40d8c3d77cdec93f568b /block | |
parent | f0094b28f3038936c1985be64dbe83f0e950b671 (diff) | |
parent | d97a86c170b4e432f76db072a827fe30b4d6f659 (diff) |
Merge branch 'for-3.17/core' of git://git.kernel.dk/linux-block
Pull block core bits from Jens Axboe:
"Small round this time, after the massive blk-mq dump for 3.16. This
pull request contains:
- Fixes for max_sectors overflow in ioctls from Akinoby Mita.
- Partition off-by-one bug fix in aix partitions from Dan Carpenter.
- Various small partition cleanups from Fabian Frederick.
- Fix for the block integrity code sometimes returning the wrong
vector count from Gu Zheng.
- Cleanup an re-org of the blk-mq queue enter/exit percpu counters
from Tejun. Dependent on the percpu pull for 3.17 (which was in
the block tree too), that you have already pulled in.
- A blkcg oops fix, also from Tejun"
* 'for-3.17/core' of git://git.kernel.dk/linux-block:
partitions: aix.c: off by one bug
blkcg: don't call into policy draining if root_blkg is already gone
Revert "bio: modify __bio_add_page() to accept pages that don't start a new segment"
bio: modify __bio_add_page() to accept pages that don't start a new segment
block: fix SG_[GS]ET_RESERVED_SIZE ioctl when max_sectors is huge
block: fix BLKSECTGET ioctl when max_sectors is greater than USHRT_MAX
block/partitions/efi.c: kerneldoc fixing
block/partitions/msdos.c: code clean-up
block/partitions/amiga.c: replace nolevel printk by pr_err
block/partitions/aix.c: replace count*size kzalloc by kcalloc
bio-integrity: add "bip_max_vcnt" into struct bio_integrity_payload
blk-mq: use percpu_ref for mq usage count
blk-mq: collapse __blk_mq_drain_queue() into blk_mq_freeze_queue()
blk-mq: decouble blk-mq freezing from generic bypassing
block, blk-mq: draining can't be skipped even if bypass_depth was non-zero
blk-mq: fix a memory ordering bug in blk_mq_queue_enter()
Diffstat (limited to 'block')
-rw-r--r-- | block/bio-integrity.c | 12 | ||||
-rw-r--r-- | block/blk-core.c | 13 | ||||
-rw-r--r-- | block/blk-mq.c | 81 | ||||
-rw-r--r-- | block/blk-mq.h | 2 | ||||
-rw-r--r-- | block/blk-sysfs.c | 2 | ||||
-rw-r--r-- | block/compat_ioctl.c | 6 | ||||
-rw-r--r-- | block/ioctl.c | 5 | ||||
-rw-r--r-- | block/partitions/aix.c | 4 | ||||
-rw-r--r-- | block/partitions/amiga.c | 12 | ||||
-rw-r--r-- | block/partitions/efi.c | 46 | ||||
-rw-r--r-- | block/partitions/msdos.c | 13 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 15 |
12 files changed, 102 insertions, 109 deletions
diff --git a/block/bio-integrity.c b/block/bio-integrity.c index 9e241063a616..bc423f7b02da 100644 --- a/block/bio-integrity.c +++ b/block/bio-integrity.c @@ -70,8 +70,10 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio, bs->bvec_integrity_pool); if (!bip->bip_vec) goto err; + bip->bip_max_vcnt = bvec_nr_vecs(idx); } else { bip->bip_vec = bip->bip_inline_vecs; + bip->bip_max_vcnt = inline_vecs; } bip->bip_slab = idx; @@ -114,14 +116,6 @@ void bio_integrity_free(struct bio *bio) } EXPORT_SYMBOL(bio_integrity_free); -static inline unsigned int bip_integrity_vecs(struct bio_integrity_payload *bip) -{ - if (bip->bip_slab == BIO_POOL_NONE) - return BIP_INLINE_VECS; - - return bvec_nr_vecs(bip->bip_slab); -} - /** * bio_integrity_add_page - Attach integrity metadata * @bio: bio to update @@ -137,7 +131,7 @@ int bio_integrity_add_page(struct bio *bio, struct page *page, struct bio_integrity_payload *bip = bio->bi_integrity; struct bio_vec *iv; - if (bip->bip_vcnt >= bip_integrity_vecs(bip)) { + if (bip->bip_vcnt >= bip->bip_max_vcnt) { printk(KERN_ERR "%s: bip_vec full\n", __func__); return 0; } diff --git a/block/blk-core.c b/block/blk-core.c index 6f8dba161bfe..c359d72e9d76 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -438,14 +438,17 @@ static void __blk_drain_queue(struct request_queue *q, bool drain_all) */ void blk_queue_bypass_start(struct request_queue *q) { - bool drain; - spin_lock_irq(q->queue_lock); - drain = !q->bypass_depth++; + q->bypass_depth++; queue_flag_set(QUEUE_FLAG_BYPASS, q); spin_unlock_irq(q->queue_lock); - if (drain) { + /* + * Queues start drained. Skip actual draining till init is + * complete. This avoids lenghty delays during queue init which + * can happen many times during boot. + */ + if (blk_queue_init_done(q)) { spin_lock_irq(q->queue_lock); __blk_drain_queue(q, false); spin_unlock_irq(q->queue_lock); @@ -511,7 +514,7 @@ void blk_cleanup_queue(struct request_queue *q) * prevent that q->request_fn() gets invoked after draining finished. */ if (q->mq_ops) { - blk_mq_drain_queue(q); + blk_mq_freeze_queue(q); spin_lock_irq(lock); } else { spin_lock_irq(lock); diff --git a/block/blk-mq.c b/block/blk-mq.c index ad69ef657e85..5189cb1e478a 100644 --- a/block/blk-mq.c +++ b/block/blk-mq.c @@ -78,68 +78,47 @@ static void blk_mq_hctx_clear_pending(struct blk_mq_hw_ctx *hctx, static int blk_mq_queue_enter(struct request_queue *q) { - int ret; - - __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); - smp_wmb(); - - /* we have problems freezing the queue if it's initializing */ - if (!blk_queue_dying(q) && - (!blk_queue_bypass(q) || !blk_queue_init_done(q))) - return 0; - - __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); + while (true) { + int ret; - spin_lock_irq(q->queue_lock); - ret = wait_event_interruptible_lock_irq(q->mq_freeze_wq, - !blk_queue_bypass(q) || blk_queue_dying(q), - *q->queue_lock); - /* inc usage with lock hold to avoid freeze_queue runs here */ - if (!ret && !blk_queue_dying(q)) - __percpu_counter_add(&q->mq_usage_counter, 1, 1000000); - else if (blk_queue_dying(q)) - ret = -ENODEV; - spin_unlock_irq(q->queue_lock); + if (percpu_ref_tryget_live(&q->mq_usage_counter)) + return 0; - return ret; + ret = wait_event_interruptible(q->mq_freeze_wq, + !q->mq_freeze_depth || blk_queue_dying(q)); + if (blk_queue_dying(q)) + return -ENODEV; + if (ret) + return ret; + } } static void blk_mq_queue_exit(struct request_queue *q) { - __percpu_counter_add(&q->mq_usage_counter, -1, 1000000); + percpu_ref_put(&q->mq_usage_counter); } -void blk_mq_drain_queue(struct request_queue *q) +static void blk_mq_usage_counter_release(struct percpu_ref *ref) { - while (true) { - s64 count; - - spin_lock_irq(q->queue_lock); - count = percpu_counter_sum(&q->mq_usage_counter); - spin_unlock_irq(q->queue_lock); + struct request_queue *q = + container_of(ref, struct request_queue, mq_usage_counter); - if (count == 0) - break; - blk_mq_start_hw_queues(q); - msleep(10); - } + wake_up_all(&q->mq_freeze_wq); } /* * Guarantee no request is in use, so we can change any data structure of * the queue afterward. */ -static void blk_mq_freeze_queue(struct request_queue *q) +void blk_mq_freeze_queue(struct request_queue *q) { - bool drain; - spin_lock_irq(q->queue_lock); - drain = !q->bypass_depth++; - queue_flag_set(QUEUE_FLAG_BYPASS, q); + q->mq_freeze_depth++; spin_unlock_irq(q->queue_lock); - if (drain) - blk_mq_drain_queue(q); + percpu_ref_kill(&q->mq_usage_counter); + blk_mq_run_queues(q, false); + wait_event(q->mq_freeze_wq, percpu_ref_is_zero(&q->mq_usage_counter)); } static void blk_mq_unfreeze_queue(struct request_queue *q) @@ -147,14 +126,13 @@ static void blk_mq_unfreeze_queue(struct request_queue *q) bool wake = false; spin_lock_irq(q->queue_lock); - if (!--q->bypass_depth) { - queue_flag_clear(QUEUE_FLAG_BYPASS, q); - wake = true; - } - WARN_ON_ONCE(q->bypass_depth < 0); + wake = !--q->mq_freeze_depth; + WARN_ON_ONCE(q->mq_freeze_depth < 0); spin_unlock_irq(q->queue_lock); - if (wake) + if (wake) { + percpu_ref_reinit(&q->mq_usage_counter); wake_up_all(&q->mq_freeze_wq); + } } bool blk_mq_can_queue(struct blk_mq_hw_ctx *hctx) @@ -1798,7 +1776,7 @@ struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *set) if (!q) goto err_hctxs; - if (percpu_counter_init(&q->mq_usage_counter, 0)) + if (percpu_ref_init(&q->mq_usage_counter, blk_mq_usage_counter_release)) goto err_map; setup_timer(&q->timeout, blk_mq_rq_timer, (unsigned long) q); @@ -1891,7 +1869,7 @@ void blk_mq_free_queue(struct request_queue *q) blk_mq_exit_hw_queues(q, set, set->nr_hw_queues); blk_mq_free_hw_queues(q, set); - percpu_counter_destroy(&q->mq_usage_counter); + percpu_ref_exit(&q->mq_usage_counter); free_percpu(q->queue_ctx); kfree(q->queue_hw_ctx); @@ -2050,8 +2028,7 @@ static int __init blk_mq_init(void) { blk_mq_cpu_init(); - /* Must be called after percpu_counter_hotcpu_callback() */ - hotcpu_notifier(blk_mq_queue_reinit_notify, -10); + hotcpu_notifier(blk_mq_queue_reinit_notify, 0); return 0; } diff --git a/block/blk-mq.h b/block/blk-mq.h index 26460884c6cd..ca4964a6295d 100644 --- a/block/blk-mq.h +++ b/block/blk-mq.h @@ -28,7 +28,7 @@ struct blk_mq_ctx { void __blk_mq_complete_request(struct request *rq); void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async); void blk_mq_init_flush(struct request_queue *q); -void blk_mq_drain_queue(struct request_queue *q); +void blk_mq_freeze_queue(struct request_queue *q); void blk_mq_free_queue(struct request_queue *q); void blk_mq_clone_flush_request(struct request *flush_rq, struct request *orig_rq); diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index 23321fbab293..4db5abf96b9e 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -554,8 +554,8 @@ int blk_register_queue(struct gendisk *disk) * Initialization must be complete by now. Finish the initial * bypass from queue allocation. */ - blk_queue_bypass_end(q); queue_flag_set_unlocked(QUEUE_FLAG_INIT_DONE, q); + blk_queue_bypass_end(q); ret = blk_trace_init_sysfs(dev); if (ret) diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index a0926a6094b2..18b282ce361e 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -663,6 +663,7 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) fmode_t mode = file->f_mode; struct backing_dev_info *bdi; loff_t size; + unsigned int max_sectors; /* * O_NDELAY can be altered using fcntl(.., F_SETFL, ..), so we have @@ -719,8 +720,9 @@ long compat_blkdev_ioctl(struct file *file, unsigned cmd, unsigned long arg) case BLKSSZGET: /* get block device hardware sector size */ return compat_put_int(arg, bdev_logical_block_size(bdev)); case BLKSECTGET: - return compat_put_ushort(arg, - queue_max_sectors(bdev_get_queue(bdev))); + max_sectors = min_t(unsigned int, USHRT_MAX, + queue_max_sectors(bdev_get_queue(bdev))); + return compat_put_ushort(arg, max_sectors); case BLKROTATIONAL: return compat_put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); diff --git a/block/ioctl.c b/block/ioctl.c index 7d5c3b20af45..d6cda8147c91 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -278,6 +278,7 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, struct backing_dev_info *bdi; loff_t size; int ret, n; + unsigned int max_sectors; switch(cmd) { case BLKFLSBUF: @@ -375,7 +376,9 @@ int blkdev_ioctl(struct block_device *bdev, fmode_t mode, unsigned cmd, case BLKDISCARDZEROES: return put_uint(arg, bdev_discard_zeroes_data(bdev)); case BLKSECTGET: - return put_ushort(arg, queue_max_sectors(bdev_get_queue(bdev))); + max_sectors = min_t(unsigned int, USHRT_MAX, + queue_max_sectors(bdev_get_queue(bdev))); + return put_ushort(arg, max_sectors); case BLKROTATIONAL: return put_ushort(arg, !blk_queue_nonrot(bdev_get_queue(bdev))); case BLKRASET: diff --git a/block/partitions/aix.c b/block/partitions/aix.c index 43be471d9b1d..f3ed7b2d89bf 100644 --- a/block/partitions/aix.c +++ b/block/partitions/aix.c @@ -215,7 +215,7 @@ int aix_partition(struct parsed_partitions *state) numlvs = be16_to_cpu(p->numlvs); put_dev_sector(sect); } - lvip = kzalloc(sizeof(struct lv_info) * state->limit, GFP_KERNEL); + lvip = kcalloc(state->limit, sizeof(struct lv_info), GFP_KERNEL); if (!lvip) return 0; if (numlvs && (d = read_part_sector(state, vgda_sector + 1, §))) { @@ -253,7 +253,7 @@ int aix_partition(struct parsed_partitions *state) continue; } lv_ix = be16_to_cpu(p->lv_ix) - 1; - if (lv_ix > state->limit) { + if (lv_ix >= state->limit) { cur_lv_ix = -1; continue; } diff --git a/block/partitions/amiga.c b/block/partitions/amiga.c index 70cbf44a1560..2b13533d60a2 100644 --- a/block/partitions/amiga.c +++ b/block/partitions/amiga.c @@ -7,6 +7,8 @@ * Re-organised Feb 1998 Russell King */ +#define pr_fmt(fmt) fmt + #include <linux/types.h> #include <linux/affs_hardblocks.h> @@ -40,7 +42,7 @@ int amiga_partition(struct parsed_partitions *state) data = read_part_sector(state, blk, §); if (!data) { if (warn_no_part) - printk("Dev %s: unable to read RDB block %d\n", + pr_err("Dev %s: unable to read RDB block %d\n", bdevname(state->bdev, b), blk); res = -1; goto rdb_done; @@ -57,12 +59,12 @@ int amiga_partition(struct parsed_partitions *state) *(__be32 *)(data+0xdc) = 0; if (checksum_block((__be32 *)data, be32_to_cpu(rdb->rdb_SummedLongs) & 0x7F)==0) { - printk("Warning: Trashed word at 0xd0 in block %d " - "ignored in checksum calculation\n",blk); + pr_err("Trashed word at 0xd0 in block %d ignored in checksum calculation\n", + blk); break; } - printk("Dev %s: RDB in block %d has bad checksum\n", + pr_err("Dev %s: RDB in block %d has bad checksum\n", bdevname(state->bdev, b), blk); } @@ -83,7 +85,7 @@ int amiga_partition(struct parsed_partitions *state) data = read_part_sector(state, blk, §); if (!data) { if (warn_no_part) - printk("Dev %s: unable to read partition block %d\n", + pr_err("Dev %s: unable to read partition block %d\n", bdevname(state->bdev, b), blk); res = -1; goto rdb_done; diff --git a/block/partitions/efi.c b/block/partitions/efi.c index dc51f467a560..56d08fd75b1a 100644 --- a/block/partitions/efi.c +++ b/block/partitions/efi.c @@ -121,7 +121,7 @@ __setup("gpt", force_gpt_fn); /** * efi_crc32() - EFI version of crc32 function * @buf: buffer to calculate crc32 of - * @len - length of buf + * @len: length of buf * * Description: Returns EFI-style CRC32 value for @buf * @@ -240,10 +240,10 @@ done: /** * read_lba(): Read bytes from disk, starting at given LBA - * @state - * @lba - * @buffer - * @size_t + * @state: disk parsed partitions + * @lba: the Logical Block Address of the partition table + * @buffer: destination buffer + * @count: bytes to read * * Description: Reads @count bytes from @state->bdev into @buffer. * Returns number of bytes read on success, 0 on error. @@ -277,8 +277,8 @@ static size_t read_lba(struct parsed_partitions *state, /** * alloc_read_gpt_entries(): reads partition entries from disk - * @state - * @gpt - GPT header + * @state: disk parsed partitions + * @gpt: GPT header * * Description: Returns ptes on success, NULL on error. * Allocates space for PTEs based on information found in @gpt. @@ -312,8 +312,8 @@ static gpt_entry *alloc_read_gpt_entries(struct parsed_partitions *state, /** * alloc_read_gpt_header(): Allocates GPT header, reads into it from disk - * @state - * @lba is the Logical Block Address of the partition table + * @state: disk parsed partitions + * @lba: the Logical Block Address of the partition table * * Description: returns GPT header on success, NULL on error. Allocates * and fills a GPT header starting at @ from @state->bdev. @@ -340,10 +340,10 @@ static gpt_header *alloc_read_gpt_header(struct parsed_partitions *state, /** * is_gpt_valid() - tests one GPT header and PTEs for validity - * @state - * @lba is the logical block address of the GPT header to test - * @gpt is a GPT header ptr, filled on return. - * @ptes is a PTEs ptr, filled on return. + * @state: disk parsed partitions + * @lba: logical block address of the GPT header to test + * @gpt: GPT header ptr, filled on return. + * @ptes: PTEs ptr, filled on return. * * Description: returns 1 if valid, 0 on error. * If valid, returns pointers to newly allocated GPT header and PTEs. @@ -461,8 +461,8 @@ static int is_gpt_valid(struct parsed_partitions *state, u64 lba, /** * is_pte_valid() - tests one PTE for validity - * @pte is the pte to check - * @lastlba is last lba of the disk + * @pte:pte to check + * @lastlba: last lba of the disk * * Description: returns 1 if valid, 0 on error. */ @@ -478,9 +478,10 @@ is_pte_valid(const gpt_entry *pte, const u64 lastlba) /** * compare_gpts() - Search disk for valid GPT headers and PTEs - * @pgpt is the primary GPT header - * @agpt is the alternate GPT header - * @lastlba is the last LBA number + * @pgpt: primary GPT header + * @agpt: alternate GPT header + * @lastlba: last LBA number + * * Description: Returns nothing. Sanity checks pgpt and agpt fields * and prints warnings on discrepancies. * @@ -572,9 +573,10 @@ compare_gpts(gpt_header *pgpt, gpt_header *agpt, u64 lastlba) /** * find_valid_gpt() - Search disk for valid GPT headers and PTEs - * @state - * @gpt is a GPT header ptr, filled on return. - * @ptes is a PTEs ptr, filled on return. + * @state: disk parsed partitions + * @gpt: GPT header ptr, filled on return. + * @ptes: PTEs ptr, filled on return. + * * Description: Returns 1 if valid, 0 on error. * If valid, returns pointers to newly allocated GPT header and PTEs. * Validity depends on PMBR being valid (or being overridden by the @@ -663,7 +665,7 @@ static int find_valid_gpt(struct parsed_partitions *state, gpt_header **gpt, /** * efi_partition(struct parsed_partitions *state) - * @state + * @state: disk parsed partitions * * Description: called from check.c, if the disk contains GPT * partitions, sets up partition entries in the kernel. diff --git a/block/partitions/msdos.c b/block/partitions/msdos.c index 9123f250b425..93e7c1b32edd 100644 --- a/block/partitions/msdos.c +++ b/block/partitions/msdos.c @@ -159,8 +159,9 @@ static void parse_extended(struct parsed_partitions *state, /* * First process the data partition(s) */ - for (i=0; i<4; i++, p++) { + for (i = 0; i < 4; i++, p++) { sector_t offs, size, next; + if (!nr_sects(p) || is_extended_partition(p)) continue; @@ -194,7 +195,7 @@ static void parse_extended(struct parsed_partitions *state, * It should be a link to the next logical partition. */ p -= 4; - for (i=0; i<4; i++, p++) + for (i = 0; i < 4; i++, p++) if (nr_sects(p) && is_extended_partition(p)) break; if (i == 4) @@ -243,8 +244,8 @@ static void parse_solaris_x86(struct parsed_partitions *state, return; } /* Ensure we can handle previous case of VTOC with 8 entries gracefully */ - max_nparts = le16_to_cpu (v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; - for (i=0; i<max_nparts && state->next<state->limit; i++) { + max_nparts = le16_to_cpu(v->v_nparts) > 8 ? SOLARIS_X86_NUMSLICE : 8; + for (i = 0; i < max_nparts && state->next < state->limit; i++) { struct solaris_x86_slice *s = &v->v_slice[i]; char tmp[3 + 10 + 1 + 1]; @@ -409,7 +410,7 @@ static void parse_minix(struct parsed_partitions *state, /* The first sector of a Minix partition can have either * a secondary MBR describing its subpartitions, or * the normal boot sector. */ - if (msdos_magic_present (data + 510) && + if (msdos_magic_present(data + 510) && SYS_IND(p) == MINIX_PARTITION) { /* subpartition table present */ char tmp[1 + BDEVNAME_SIZE + 10 + 9 + 1]; @@ -527,6 +528,7 @@ int msdos_partition(struct parsed_partitions *state) for (slot = 1 ; slot <= 4 ; slot++, p++) { sector_t start = start_sect(p)*sector_size; sector_t size = nr_sects(p)*sector_size; + if (!size) continue; if (is_extended_partition(p)) { @@ -537,6 +539,7 @@ int msdos_partition(struct parsed_partitions *state) * sector, although it may not be enough/proper. */ sector_t n = 2; + n = min(size, max(sector_size, n)); put_partition(state, slot, start, n); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 14695c6221c8..bda1497add4c 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -82,9 +82,18 @@ static int sg_set_timeout(struct request_queue *q, int __user *p) return err; } +static int max_sectors_bytes(struct request_queue *q) +{ + unsigned int max_sectors = queue_max_sectors(q); + + max_sectors = min_t(unsigned int, max_sectors, INT_MAX >> 9); + + return max_sectors << 9; +} + static int sg_get_reserved_size(struct request_queue *q, int __user *p) { - unsigned val = min(q->sg_reserved_size, queue_max_sectors(q) << 9); + int val = min_t(int, q->sg_reserved_size, max_sectors_bytes(q)); return put_user(val, p); } @@ -98,10 +107,8 @@ static int sg_set_reserved_size(struct request_queue *q, int __user *p) if (size < 0) return -EINVAL; - if (size > (queue_max_sectors(q) << 9)) - size = queue_max_sectors(q) << 9; - q->sg_reserved_size = size; + q->sg_reserved_size = min(size, max_sectors_bytes(q)); return 0; } |