diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 31 | ||||
-rw-r--r-- | block/blk-merge.c | 5 | ||||
-rw-r--r-- | block/blk-settings.c | 20 | ||||
-rw-r--r-- | block/blk-sysfs.c | 11 | ||||
-rw-r--r-- | block/blk-timeout.c | 13 | ||||
-rw-r--r-- | block/blk.h | 7 | ||||
-rw-r--r-- | block/bsg.c | 3 | ||||
-rw-r--r-- | block/cfq-iosched.c | 66 | ||||
-rw-r--r-- | block/compat_ioctl.c | 2 | ||||
-rw-r--r-- | block/elevator.c | 8 | ||||
-rw-r--r-- | block/genhd.c | 12 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 13 |
12 files changed, 116 insertions, 75 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index cce7a88dc612..648f15cb41f1 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -28,22 +28,14 @@ #include <linux/task_io_accounting_ops.h> #include <linux/blktrace_api.h> #include <linux/fault-inject.h> -#include <trace/block.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/block.h> #include "blk.h" -DEFINE_TRACE(block_plug); -DEFINE_TRACE(block_unplug_io); -DEFINE_TRACE(block_unplug_timer); -DEFINE_TRACE(block_getrq); -DEFINE_TRACE(block_sleeprq); -DEFINE_TRACE(block_rq_requeue); -DEFINE_TRACE(block_bio_backmerge); -DEFINE_TRACE(block_bio_frontmerge); -DEFINE_TRACE(block_bio_queue); -DEFINE_TRACE(block_rq_complete); -DEFINE_TRACE(block_remap); /* Also used in drivers/md/dm.c */ EXPORT_TRACEPOINT_SYMBOL_GPL(block_remap); +EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_complete); static int __make_request(struct request_queue *q, struct bio *bio); @@ -643,7 +635,7 @@ static inline void blk_free_request(struct request_queue *q, struct request *rq) } static struct request * -blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) +blk_alloc_request(struct request_queue *q, int flags, int priv, gfp_t gfp_mask) { struct request *rq = mempool_alloc(q->rq.rq_pool, gfp_mask); @@ -652,7 +644,7 @@ blk_alloc_request(struct request_queue *q, int rw, int priv, gfp_t gfp_mask) blk_rq_init(q, rq); - rq->cmd_flags = rw | REQ_ALLOCED; + rq->cmd_flags = flags | REQ_ALLOCED; if (priv) { if (unlikely(elv_set_request(q, rq, gfp_mask))) { @@ -792,6 +784,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags, if (priv) rl->elvpriv++; + if (blk_queue_io_stat(q)) + rw_flags |= REQ_IO_STAT; spin_unlock_irq(q->queue_lock); rq = blk_alloc_request(q, rw_flags, priv, gfp_mask); @@ -1275,7 +1269,7 @@ static inline void blk_partition_remap(struct bio *bio) bio->bi_bdev = bdev->bd_contains; trace_block_remap(bdev_get_queue(bio->bi_bdev), bio, - bdev->bd_dev, bio->bi_sector, + bdev->bd_dev, bio->bi_sector - p->start_sect); } } @@ -1444,8 +1438,7 @@ static inline void __generic_make_request(struct bio *bio) goto end_io; if (old_sector != -1) - trace_block_remap(q, bio, old_dev, bio->bi_sector, - old_sector); + trace_block_remap(q, bio, old_dev, old_sector); trace_block_bio_queue(q, bio); @@ -1770,10 +1763,10 @@ static int __end_that_request_first(struct request *req, int error, } else { int idx = bio->bi_idx + next_idx; - if (unlikely(bio->bi_idx >= bio->bi_vcnt)) { + if (unlikely(idx >= bio->bi_vcnt)) { blk_dump_rq_flags(req, "__end_that"); printk(KERN_ERR "%s: bio idx %d >= vcnt %d\n", - __func__, bio->bi_idx, bio->bi_vcnt); + __func__, idx, bio->bi_vcnt); break; } diff --git a/block/blk-merge.c b/block/blk-merge.c index 63760ca3da0f..23d2a6fe34a3 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -402,7 +402,10 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); - blk_account_io_merge(req); + /* + * 'next' is going away, so update stats accordingly + */ + blk_account_io_merge(next); req->ioprio = ioprio_best(req->ioprio, next->ioprio); if (blk_rq_cpu_valid(next)) diff --git a/block/blk-settings.c b/block/blk-settings.c index 69c42adde52b..57af728d94bb 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -156,26 +156,28 @@ EXPORT_SYMBOL(blk_queue_make_request); /** * blk_queue_bounce_limit - set bounce buffer limit for queue - * @q: the request queue for the device - * @dma_addr: bus address limit + * @q: the request queue for the device + * @dma_mask: the maximum address the device can handle * * Description: * Different hardware can have different requirements as to what pages * it can do I/O directly to. A low level driver can call * blk_queue_bounce_limit to have lower memory pages allocated as bounce - * buffers for doing I/O to pages residing above @dma_addr. + * buffers for doing I/O to pages residing above @dma_mask. **/ -void blk_queue_bounce_limit(struct request_queue *q, u64 dma_addr) +void blk_queue_bounce_limit(struct request_queue *q, u64 dma_mask) { - unsigned long b_pfn = dma_addr >> PAGE_SHIFT; + unsigned long b_pfn = dma_mask >> PAGE_SHIFT; int dma = 0; q->bounce_gfp = GFP_NOIO; #if BITS_PER_LONG == 64 - /* Assume anything <= 4GB can be handled by IOMMU. - Actually some IOMMUs can handle everything, but I don't - know of a way to test this here. */ - if (b_pfn < (min_t(u64, 0x100000000UL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) + /* + * Assume anything <= 4GB can be handled by IOMMU. Actually + * some IOMMUs can handle everything, but I don't know of a + * way to test this here. + */ + if (b_pfn < (min_t(u64, 0xffffffffUL, BLK_BOUNCE_HIGH) >> PAGE_SHIFT)) dma = 1; q->bounce_pfn = max_low_pfn; #else diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index cac4e9febe6a..26f9ec28f56c 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -209,14 +209,10 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&stats, page, count); spin_lock_irq(q->queue_lock); - elv_quiesce_start(q); - if (stats) queue_flag_set(QUEUE_FLAG_IO_STAT, q); else queue_flag_clear(QUEUE_FLAG_IO_STAT, q); - - elv_quiesce_end(q); spin_unlock_irq(q->queue_lock); return ret; @@ -387,16 +383,21 @@ struct kobj_type blk_queue_ktype = { int blk_register_queue(struct gendisk *disk) { int ret; + struct device *dev = disk_to_dev(disk); struct request_queue *q = disk->queue; if (WARN_ON(!q)) return -ENXIO; + ret = blk_trace_init_sysfs(dev); + if (ret) + return ret; + if (!q->request_fn) return 0; - ret = kobject_add(&q->kobj, kobject_get(&disk_to_dev(disk)->kobj), + ret = kobject_add(&q->kobj, kobject_get(&dev->kobj), "%s", "queue"); if (ret < 0) return ret; diff --git a/block/blk-timeout.c b/block/blk-timeout.c index bbbdc4b8ccf2..1ec0d503cacd 100644 --- a/block/blk-timeout.c +++ b/block/blk-timeout.c @@ -211,6 +211,12 @@ void blk_abort_queue(struct request_queue *q) struct request *rq, *tmp; LIST_HEAD(list); + /* + * Not a request based block device, nothing to abort + */ + if (!q->request_fn) + return; + spin_lock_irqsave(q->queue_lock, flags); elv_abort_queue(q); @@ -224,6 +230,13 @@ void blk_abort_queue(struct request_queue *q) list_for_each_entry_safe(rq, tmp, &list, timeout_list) blk_abort_request(rq); + /* + * Occasionally, blk_abort_request() will return without + * deleting the element from the list. Make sure we add those back + * instead of leaving them on the local stack list. + */ + list_splice(&list, &q->timeout_list); + spin_unlock_irqrestore(q->queue_lock, flags); } diff --git a/block/blk.h b/block/blk.h index 5dfc41267a08..79c85f7c9ff5 100644 --- a/block/blk.h +++ b/block/blk.h @@ -114,12 +114,7 @@ static inline int blk_cpu_to_group(int cpu) static inline int blk_do_io_stat(struct request *rq) { - struct gendisk *disk = rq->rq_disk; - - if (!disk || !disk->queue) - return 0; - - return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV); + return rq->rq_disk && blk_rq_io_stat(rq); } #endif diff --git a/block/bsg.c b/block/bsg.c index 206060e795da..dd81be455e00 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -315,6 +315,7 @@ out: blk_put_request(rq); if (next_rq) { blk_rq_unmap_user(next_rq->bio); + next_rq->bio = NULL; blk_put_request(next_rq); } return ERR_PTR(ret); @@ -448,6 +449,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, hdr->dout_resid = rq->data_len; hdr->din_resid = rq->next_rq->data_len; blk_rq_unmap_user(bidi_bio); + rq->next_rq->bio = NULL; blk_put_request(rq->next_rq); } else if (rq_data_dir(rq) == READ) hdr->din_resid = rq->data_len; @@ -466,6 +468,7 @@ static int blk_complete_sgv4_hdr_rq(struct request *rq, struct sg_io_v4 *hdr, blk_rq_unmap_user(bio); if (rq->cmd != rq->__cmd) kfree(rq->cmd); + rq->bio = NULL; blk_put_request(rq); return ret; diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 0d3b70de3d80..a55a9bd75bd1 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -154,6 +154,8 @@ struct cfq_queue { unsigned long rb_key; /* prio tree member */ struct rb_node p_node; + /* prio tree root we belong to, if any */ + struct rb_root *p_root; /* sorted list of pending requests */ struct rb_root sort_list; /* if fifo isn't expired, next request to serve */ @@ -558,10 +560,10 @@ static void cfq_service_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq, } static struct cfq_queue * -cfq_prio_tree_lookup(struct cfq_data *cfqd, int ioprio, sector_t sector, - struct rb_node **ret_parent, struct rb_node ***rb_link) +cfq_prio_tree_lookup(struct cfq_data *cfqd, struct rb_root *root, + sector_t sector, struct rb_node **ret_parent, + struct rb_node ***rb_link) { - struct rb_root *root = &cfqd->prio_trees[ioprio]; struct rb_node **p, *parent; struct cfq_queue *cfqq = NULL; @@ -584,34 +586,38 @@ cfq_prio_tree_lookup(struct cfq_data *cfqd, int ioprio, sector_t sector, else break; p = n; + cfqq = NULL; } *ret_parent = parent; if (rb_link) *rb_link = p; - return NULL; + return cfqq; } static void cfq_prio_tree_add(struct cfq_data *cfqd, struct cfq_queue *cfqq) { - struct rb_root *root = &cfqd->prio_trees[cfqq->ioprio]; struct rb_node **p, *parent; struct cfq_queue *__cfqq; - if (!RB_EMPTY_NODE(&cfqq->p_node)) - rb_erase_init(&cfqq->p_node, root); + if (cfqq->p_root) { + rb_erase(&cfqq->p_node, cfqq->p_root); + cfqq->p_root = NULL; + } if (cfq_class_idle(cfqq)) return; if (!cfqq->next_rq) return; - __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->ioprio, cfqq->next_rq->sector, + cfqq->p_root = &cfqd->prio_trees[cfqq->org_ioprio]; + __cfqq = cfq_prio_tree_lookup(cfqd, cfqq->p_root, cfqq->next_rq->sector, &parent, &p); - BUG_ON(__cfqq); - - rb_link_node(&cfqq->p_node, parent, p); - rb_insert_color(&cfqq->p_node, root); + if (!__cfqq) { + rb_link_node(&cfqq->p_node, parent, p); + rb_insert_color(&cfqq->p_node, cfqq->p_root); + } else + cfqq->p_root = NULL; } /* @@ -656,8 +662,10 @@ static void cfq_del_cfqq_rr(struct cfq_data *cfqd, struct cfq_queue *cfqq) if (!RB_EMPTY_NODE(&cfqq->rb_node)) cfq_rb_erase(&cfqq->rb_node, &cfqd->service_tree); - if (!RB_EMPTY_NODE(&cfqq->p_node)) - rb_erase_init(&cfqq->p_node, &cfqd->prio_trees[cfqq->ioprio]); + if (cfqq->p_root) { + rb_erase(&cfqq->p_node, cfqq->p_root); + cfqq->p_root = NULL; + } BUG_ON(!cfqd->busy_queues); cfqd->busy_queues--; @@ -947,20 +955,24 @@ static inline sector_t cfq_dist_from_last(struct cfq_data *cfqd, return cfqd->last_position - rq->sector; } +#define CIC_SEEK_THR 8 * 1024 +#define CIC_SEEKY(cic) ((cic)->seek_mean > CIC_SEEK_THR) + static inline int cfq_rq_close(struct cfq_data *cfqd, struct request *rq) { struct cfq_io_context *cic = cfqd->active_cic; + sector_t sdist = cic->seek_mean; if (!sample_valid(cic->seek_samples)) - return 0; + sdist = CIC_SEEK_THR; - return cfq_dist_from_last(cfqd, rq) <= cic->seek_mean; + return cfq_dist_from_last(cfqd, rq) <= sdist; } static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, struct cfq_queue *cur_cfqq) { - struct rb_root *root = &cfqd->prio_trees[cur_cfqq->ioprio]; + struct rb_root *root = &cfqd->prio_trees[cur_cfqq->org_ioprio]; struct rb_node *parent, *node; struct cfq_queue *__cfqq; sector_t sector = cfqd->last_position; @@ -972,8 +984,7 @@ static struct cfq_queue *cfqq_close(struct cfq_data *cfqd, * First, if we find a request starting at the end of the last * request, choose it. */ - __cfqq = cfq_prio_tree_lookup(cfqd, cur_cfqq->ioprio, - sector, &parent, NULL); + __cfqq = cfq_prio_tree_lookup(cfqd, root, sector, &parent, NULL); if (__cfqq) return __cfqq; @@ -1039,9 +1050,6 @@ static struct cfq_queue *cfq_close_cooperator(struct cfq_data *cfqd, return cfqq; } - -#define CIC_SEEKY(cic) ((cic)->seek_mean > (8 * 1024)) - static void cfq_arm_slice_timer(struct cfq_data *cfqd) { struct cfq_queue *cfqq = cfqd->active_queue; @@ -1908,7 +1916,9 @@ cfq_update_io_seektime(struct cfq_data *cfqd, struct cfq_io_context *cic, sector_t sdist; u64 total; - if (cic->last_request_pos < rq->sector) + if (!cic->last_request_pos) + sdist = 0; + else if (cic->last_request_pos < rq->sector) sdist = rq->sector - cic->last_request_pos; else sdist = cic->last_request_pos - rq->sector; @@ -2443,12 +2453,22 @@ static void cfq_exit_queue(struct elevator_queue *e) static void *cfq_init_queue(struct request_queue *q) { struct cfq_data *cfqd; + int i; cfqd = kmalloc_node(sizeof(*cfqd), GFP_KERNEL | __GFP_ZERO, q->node); if (!cfqd) return NULL; cfqd->service_tree = CFQ_RB_ROOT; + + /* + * Not strictly needed (since RB_ROOT just clears the node and we + * zeroed cfqd on alloc), but better be safe in case someone decides + * to add magic to the rb code + */ + for (i = 0; i < CFQ_PRIO_LISTS; i++) + cfqd->prio_trees[i] = RB_ROOT; + INIT_LIST_HEAD(&cfqd->cic_list); cfqd->queue = q; diff --git a/block/compat_ioctl.c b/block/compat_ioctl.c index f87615dea46b..f8c218cd08e1 100644 --- a/block/compat_ioctl.c +++ b/block/compat_ioctl.c @@ -568,7 +568,7 @@ static int compat_blk_trace_setup(struct block_device *bdev, char __user *arg) memcpy(&buts.name, &cbuts.name, 32); mutex_lock(&bdev->bd_mutex); - ret = do_blk_trace_setup(q, b, bdev->bd_dev, &buts); + ret = do_blk_trace_setup(q, b, bdev->bd_dev, bdev, &buts); mutex_unlock(&bdev->bd_mutex); if (ret) return ret; diff --git a/block/elevator.c b/block/elevator.c index 7073a9072577..e220f0c543e3 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -33,17 +33,16 @@ #include <linux/compiler.h> #include <linux/delay.h> #include <linux/blktrace_api.h> -#include <trace/block.h> #include <linux/hash.h> #include <linux/uaccess.h> +#include <trace/events/block.h> + #include "blk.h" static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); -DEFINE_TRACE(block_rq_abort); - /* * Merge hash stuff. */ @@ -55,9 +54,6 @@ static const int elv_hash_shift = 6; #define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) -DEFINE_TRACE(block_rq_insert); -DEFINE_TRACE(block_rq_issue); - /* * Query io scheduler to see if the current process issuing bio may be * merged with rq. diff --git a/block/genhd.c b/block/genhd.c index a9ec910974c1..1a4916e01732 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -98,7 +98,7 @@ void disk_part_iter_init(struct disk_part_iter *piter, struct gendisk *disk, if (flags & DISK_PITER_REVERSE) piter->idx = ptbl->len - 1; - else if (flags & DISK_PITER_INCL_PART0) + else if (flags & (DISK_PITER_INCL_PART0 | DISK_PITER_INCL_EMPTY_PART0)) piter->idx = 0; else piter->idx = 1; @@ -134,7 +134,8 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) /* determine iteration parameters */ if (piter->flags & DISK_PITER_REVERSE) { inc = -1; - if (piter->flags & DISK_PITER_INCL_PART0) + if (piter->flags & (DISK_PITER_INCL_PART0 | + DISK_PITER_INCL_EMPTY_PART0)) end = -1; else end = 0; @@ -150,7 +151,10 @@ struct hd_struct *disk_part_iter_next(struct disk_part_iter *piter) part = rcu_dereference(ptbl->part[piter->idx]); if (!part) continue; - if (!(piter->flags & DISK_PITER_INCL_EMPTY) && !part->nr_sects) + if (!part->nr_sects && + !(piter->flags & DISK_PITER_INCL_EMPTY) && + !(piter->flags & DISK_PITER_INCL_EMPTY_PART0 && + piter->idx == 0)) continue; get_device(part_to_dev(part)); @@ -1011,7 +1015,7 @@ static int diskstats_show(struct seq_file *seqf, void *v) "\n\n"); */ - disk_part_iter_init(&piter, gp, DISK_PITER_INCL_PART0); + disk_part_iter_init(&piter, gp, DISK_PITER_INCL_EMPTY_PART0); while ((hd = disk_part_iter_next(&piter))) { cpu = part_stat_lock(); part_round_stats(cpu, hd); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index 84b7f8709f41..82a0ca2f6729 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -290,6 +290,7 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, if (hdr->iovec_count) { const int size = sizeof(struct sg_iovec) * hdr->iovec_count; + size_t iov_data_len; struct sg_iovec *iov; iov = kmalloc(size, GFP_KERNEL); @@ -304,8 +305,18 @@ static int sg_io(struct request_queue *q, struct gendisk *bd_disk, goto out; } + /* SG_IO howto says that the shorter of the two wins */ + iov_data_len = iov_length((struct iovec *)iov, + hdr->iovec_count); + if (hdr->dxfer_len < iov_data_len) { + hdr->iovec_count = iov_shorten((struct iovec *)iov, + hdr->iovec_count, + hdr->dxfer_len); + iov_data_len = hdr->dxfer_len; + } + ret = blk_rq_map_user_iov(q, rq, NULL, iov, hdr->iovec_count, - hdr->dxfer_len, GFP_KERNEL); + iov_data_len, GFP_KERNEL); kfree(iov); } else if (hdr->dxfer_len) ret = blk_rq_map_user(q, rq, NULL, hdr->dxferp, hdr->dxfer_len, |