diff options
author | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2011-03-25 17:41:20 +0200 |
---|---|---|
committer | Artem Bityutskiy <Artem.Bityutskiy@nokia.com> | 2011-03-25 17:41:20 +0200 |
commit | 7bf7e370d5919112c223a269462cd0b546903829 (patch) | |
tree | 03ccc715239df14ae168277dbccc9d9cf4d8a2c8 /block/blk-throttle.c | |
parent | 68b1a1e786f29c900fa1c516a402e24f0ece622a (diff) | |
parent | d39dd11c3e6a7af5c20bfac40594db36cf270f42 (diff) |
Merge branch 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6 into for-linus-1
* 'master' of git://git.kernel.org/pub/scm/linux/kernel/git/torvalds/linux-2.6: (9356 commits)
[media] rc: update for bitop name changes
fs: simplify iget & friends
fs: pull inode->i_lock up out of writeback_single_inode
fs: rename inode_lock to inode_hash_lock
fs: move i_wb_list out from under inode_lock
fs: move i_sb_list out from under inode_lock
fs: remove inode_lock from iput_final and prune_icache
fs: Lock the inode LRU list separately
fs: factor inode disposal
fs: protect inode->i_state with inode->i_lock
lib, arch: add filter argument to show_mem and fix private implementations
SLUB: Write to per cpu data when allocating it
slub: Fix debugobjects with lockless fastpath
autofs4: Do not potentially dereference NULL pointer returned by fget() in autofs_dev_ioctl_setpipefd()
autofs4 - remove autofs4_lock
autofs4 - fix d_manage() return on rcu-walk
autofs4 - fix autofs4_expire_indirect() traversal
autofs4 - fix dentry leak in autofs4_expire_direct()
autofs4 - reinstate last used update on access
vfs - check non-mountpoint dentry might block in __follow_mount_rcu()
...
NOTE!
This merge commit was created to fix compilation error. The block
tree was merged upstream and removed the 'elv_queue_empty()'
function which the new 'mtdswap' driver is using. So a simple
merge of the mtd tree with upstream does not compile. And the
mtd tree has already be published, so re-basing it is not an option.
To fix this unfortunate situation, I had to merge upstream into the
mtd-2.6.git tree without committing, put the fixup patch on top of
this, and then commit this. The result is that we do not have commits
which do not compile.
In other words, this merge commit "merges" 3 things: the MTD tree, the
upstream tree, and the fixup patch.
Diffstat (limited to 'block/blk-throttle.c')
-rw-r--r-- | block/blk-throttle.c | 170 |
1 files changed, 94 insertions, 76 deletions
diff --git a/block/blk-throttle.c b/block/blk-throttle.c index 381b09bb562b..5352bdafbcf0 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -20,6 +20,11 @@ static int throtl_quantum = 32; /* Throttling is performed over 100ms slice and after that slice is renewed */ static unsigned long throtl_slice = HZ/10; /* 100 ms */ +/* A workqueue to queue throttle related work */ +static struct workqueue_struct *kthrotld_workqueue; +static void throtl_schedule_delayed_work(struct throtl_data *td, + unsigned long delay); + struct throtl_rb_root { struct rb_root rb; struct rb_node *left; @@ -97,7 +102,7 @@ struct throtl_data /* Work for dispatching throttled bios */ struct delayed_work throtl_work; - atomic_t limits_changed; + bool limits_changed; }; enum tg_state_flags { @@ -168,7 +173,15 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, * tree of blkg (instead of traversing through hash list all * the time. */ - tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key)); + + /* + * This is the common case when there are no blkio cgroups. + * Avoid lookup in this case + */ + if (blkcg == &blkio_root_cgroup) + tg = &td->root_tg; + else + tg = tg_of_blkg(blkiocg_lookup_group(blkcg, key)); /* Fill in device details for root group */ if (tg && !tg->blkg.dev && bdi->dev && dev_name(bdi->dev)) { @@ -188,6 +201,7 @@ static struct throtl_grp * throtl_find_alloc_tg(struct throtl_data *td, RB_CLEAR_NODE(&tg->rb_node); bio_list_init(&tg->bio_lists[0]); bio_list_init(&tg->bio_lists[1]); + td->limits_changed = false; /* * Take the initial reference that will be released on destroy @@ -337,10 +351,9 @@ static void throtl_schedule_next_dispatch(struct throtl_data *td) update_min_dispatch_time(st); if (time_before_eq(st->min_disptime, jiffies)) - throtl_schedule_delayed_work(td->queue, 0); + throtl_schedule_delayed_work(td, 0); else - throtl_schedule_delayed_work(td->queue, - (st->min_disptime - jiffies)); + throtl_schedule_delayed_work(td, (st->min_disptime - jiffies)); } static inline void @@ -725,34 +738,36 @@ static void throtl_process_limit_change(struct throtl_data *td) struct throtl_grp *tg; struct hlist_node *pos, *n; - if (!atomic_read(&td->limits_changed)) + if (!td->limits_changed) return; - throtl_log(td, "limit changed =%d", atomic_read(&td->limits_changed)); + xchg(&td->limits_changed, false); - /* - * Make sure updates from throtl_update_blkio_group_read_bps() group - * of functions to tg->limits_changed are visible. We do not - * want update td->limits_changed to be visible but update to - * tg->limits_changed not being visible yet on this cpu. Hence - * the read barrier. - */ - smp_rmb(); + throtl_log(td, "limits changed"); hlist_for_each_entry_safe(tg, pos, n, &td->tg_list, tg_node) { - if (throtl_tg_on_rr(tg) && tg->limits_changed) { - throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" - " riops=%u wiops=%u", tg->bps[READ], - tg->bps[WRITE], tg->iops[READ], - tg->iops[WRITE]); + if (!tg->limits_changed) + continue; + + if (!xchg(&tg->limits_changed, false)) + continue; + + throtl_log_tg(td, tg, "limit change rbps=%llu wbps=%llu" + " riops=%u wiops=%u", tg->bps[READ], tg->bps[WRITE], + tg->iops[READ], tg->iops[WRITE]); + + /* + * Restart the slices for both READ and WRITES. It + * might happen that a group's limit are dropped + * suddenly and we don't want to account recently + * dispatched IO with new low rate + */ + throtl_start_new_slice(td, tg, 0); + throtl_start_new_slice(td, tg, 1); + + if (throtl_tg_on_rr(tg)) tg_update_disptime(td, tg); - tg->limits_changed = false; - } } - - smp_mb__before_atomic_dec(); - atomic_dec(&td->limits_changed); - smp_mb__after_atomic_dec(); } /* Dispatch throttled bios. Should be called without queue lock held. */ @@ -762,6 +777,7 @@ static int throtl_dispatch(struct request_queue *q) unsigned int nr_disp = 0; struct bio_list bio_list_on_stack; struct bio *bio; + struct blk_plug plug; spin_lock_irq(q->queue_lock); @@ -790,9 +806,10 @@ out: * immediate dispatch */ if (nr_disp) { + blk_start_plug(&plug); while((bio = bio_list_pop(&bio_list_on_stack))) generic_make_request(bio); - blk_unplug(q); + blk_finish_plug(&plug); } return nr_disp; } @@ -807,24 +824,24 @@ void blk_throtl_work(struct work_struct *work) } /* Call with queue lock held */ -void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) +static void +throtl_schedule_delayed_work(struct throtl_data *td, unsigned long delay) { - struct throtl_data *td = q->td; struct delayed_work *dwork = &td->throtl_work; - if (total_nr_queued(td) > 0) { + /* schedule work if limits changed even if no bio is queued */ + if (total_nr_queued(td) > 0 || td->limits_changed) { /* * We might have a work scheduled to be executed in future. * Cancel that and schedule a new one. */ __cancel_delayed_work(dwork); - kblockd_schedule_delayed_work(q, dwork, delay); + queue_delayed_work(kthrotld_workqueue, dwork, delay); throtl_log(td, "schedule work. delay=%lu jiffies=%lu", delay, jiffies); } } -EXPORT_SYMBOL(throtl_schedule_delayed_work); static void throtl_destroy_tg(struct throtl_data *td, struct throtl_grp *tg) @@ -887,6 +904,15 @@ void throtl_unlink_blkio_group(void *key, struct blkio_group *blkg) spin_unlock_irqrestore(td->queue->queue_lock, flags); } +static void throtl_update_blkio_group_common(struct throtl_data *td, + struct throtl_grp *tg) +{ + xchg(&tg->limits_changed, true); + xchg(&td->limits_changed, true); + /* Schedule a work now to process the limit change */ + throtl_schedule_delayed_work(td, 0); +} + /* * For all update functions, key should be a valid pointer because these * update functions are called under blkcg_lock, that means, blkg is @@ -900,64 +926,43 @@ static void throtl_update_blkio_group_read_bps(void *key, struct blkio_group *blkg, u64 read_bps) { struct throtl_data *td = key; + struct throtl_grp *tg = tg_of_blkg(blkg); - tg_of_blkg(blkg)->bps[READ] = read_bps; - /* Make sure read_bps is updated before setting limits_changed */ - smp_wmb(); - tg_of_blkg(blkg)->limits_changed = true; - - /* Make sure tg->limits_changed is updated before td->limits_changed */ - smp_mb__before_atomic_inc(); - atomic_inc(&td->limits_changed); - smp_mb__after_atomic_inc(); - - /* Schedule a work now to process the limit change */ - throtl_schedule_delayed_work(td->queue, 0); + tg->bps[READ] = read_bps; + throtl_update_blkio_group_common(td, tg); } static void throtl_update_blkio_group_write_bps(void *key, struct blkio_group *blkg, u64 write_bps) { struct throtl_data *td = key; + struct throtl_grp *tg = tg_of_blkg(blkg); - tg_of_blkg(blkg)->bps[WRITE] = write_bps; - smp_wmb(); - tg_of_blkg(blkg)->limits_changed = true; - smp_mb__before_atomic_inc(); - atomic_inc(&td->limits_changed); - smp_mb__after_atomic_inc(); - throtl_schedule_delayed_work(td->queue, 0); + tg->bps[WRITE] = write_bps; + throtl_update_blkio_group_common(td, tg); } static void throtl_update_blkio_group_read_iops(void *key, struct blkio_group *blkg, unsigned int read_iops) { struct throtl_data *td = key; + struct throtl_grp *tg = tg_of_blkg(blkg); - tg_of_blkg(blkg)->iops[READ] = read_iops; - smp_wmb(); - tg_of_blkg(blkg)->limits_changed = true; - smp_mb__before_atomic_inc(); - atomic_inc(&td->limits_changed); - smp_mb__after_atomic_inc(); - throtl_schedule_delayed_work(td->queue, 0); + tg->iops[READ] = read_iops; + throtl_update_blkio_group_common(td, tg); } static void throtl_update_blkio_group_write_iops(void *key, struct blkio_group *blkg, unsigned int write_iops) { struct throtl_data *td = key; + struct throtl_grp *tg = tg_of_blkg(blkg); - tg_of_blkg(blkg)->iops[WRITE] = write_iops; - smp_wmb(); - tg_of_blkg(blkg)->limits_changed = true; - smp_mb__before_atomic_inc(); - atomic_inc(&td->limits_changed); - smp_mb__after_atomic_inc(); - throtl_schedule_delayed_work(td->queue, 0); + tg->iops[WRITE] = write_iops; + throtl_update_blkio_group_common(td, tg); } -void throtl_shutdown_timer_wq(struct request_queue *q) +static void throtl_shutdown_wq(struct request_queue *q) { struct throtl_data *td = q->td; @@ -998,20 +1003,28 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) /* * There is already another bio queued in same dir. No * need to update dispatch time. - * Still update the disptime if rate limits on this group - * were changed. */ - if (!tg->limits_changed) - update_disptime = false; - else - tg->limits_changed = false; - + update_disptime = false; goto queue_bio; + } /* Bio is with-in rate limit of group */ if (tg_may_dispatch(td, tg, bio, NULL)) { throtl_charge_bio(tg, bio); + + /* + * We need to trim slice even when bios are not being queued + * otherwise it might happen that a bio is not queued for + * a long time and slice keeps on extending and trim is not + * called for a long time. Now if limits are reduced suddenly + * we take into account all the IO dispatched so far at new + * low rate and * newly queued IO gets a really long dispatch + * time. + * + * So keep on trimming slice even if bio is not queued. + */ + throtl_trim_slice(td, tg, rw); goto out; } @@ -1047,7 +1060,7 @@ int blk_throtl_init(struct request_queue *q) INIT_HLIST_HEAD(&td->tg_list); td->tg_service_tree = THROTL_RB_ROOT; - atomic_set(&td->limits_changed, 0); + td->limits_changed = false; /* Init root group */ tg = &td->root_tg; @@ -1059,6 +1072,7 @@ int blk_throtl_init(struct request_queue *q) /* Practically unlimited BW */ tg->bps[0] = tg->bps[1] = -1; tg->iops[0] = tg->iops[1] = -1; + td->limits_changed = false; /* * Set root group reference to 2. One reference will be dropped when @@ -1091,7 +1105,7 @@ void blk_throtl_exit(struct request_queue *q) BUG_ON(!td); - throtl_shutdown_timer_wq(q); + throtl_shutdown_wq(q); spin_lock_irq(q->queue_lock); throtl_release_tgs(td); @@ -1121,12 +1135,16 @@ void blk_throtl_exit(struct request_queue *q) * update limits through cgroup and another work got queued, cancel * it. */ - throtl_shutdown_timer_wq(q); + throtl_shutdown_wq(q); throtl_td_free(td); } static int __init throtl_init(void) { + kthrotld_workqueue = alloc_workqueue("kthrotld", WQ_MEM_RECLAIM, 0); + if (!kthrotld_workqueue) + panic("Failed to create kthrotld\n"); + blkio_policy_register(&blkio_policy_throtl); return 0; } |