From eea8f41cc58849e354ecf8b95bd7f806e1d1f703 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:17 -0400 Subject: blkcg: move block/blk-cgroup.h to include/linux/blk-cgroup.h cgroup aware writeback support will require exposing some of blkcg details. In preprataion, move block/blk-cgroup.h to include/linux/blk-cgroup.h. This patch is pure file move. Signed-off-by: Tejun Heo Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index aa819a58ea24..4afac14d4499 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -32,12 +32,12 @@ #include #include #include +#include #define CREATE_TRACE_POINTS #include #include "blk.h" -#include "blk-cgroup.h" #include "blk-mq.h" EXPORT_TRACEPOINT_SYMBOL_GPL(block_bio_remap); -- cgit v1.2.3 From 4452226ea276e74fc3e252c88d9bb7e8f8e44bf0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:26 -0400 Subject: writeback: move backing_dev_info->state into bdi_writeback Currently, a bdi (backing_dev_info) embeds single wb (bdi_writeback) and the role of the separation is unclear. For cgroup support for writeback IOs, a bdi will be updated to host multiple wb's where each wb serves writeback IOs of a different cgroup on the bdi. To achieve that, a wb should carry all states necessary for servicing writeback IOs for a cgroup independently. This patch moves bdi->state into wb. * enum bdi_state is renamed to wb_state and the prefix of all enums is changed from BDI_ to WB_. * Explicit zeroing of bdi->state is removed without adding zeoring of wb->state as the whole data structure is zeroed on init anyway. * As there's still only one bdi_writeback per backing_dev_info, all uses of bdi->state are mechanically replaced with bdi->wb.state introducing no behavior changes. Signed-off-by: Tejun Heo Reviewed-by: Jan Kara Cc: Jens Axboe Cc: Wu Fengguang Cc: drbd-dev@lists.linbit.com Cc: Neil Brown Cc: Alasdair Kergon Cc: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-core.c | 1 - 1 file changed, 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index 4afac14d4499..c3ba9c3b08d4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -621,7 +621,6 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; - q->backing_dev_info.state = 0; q->backing_dev_info.capabilities = 0; q->backing_dev_info.name = "block"; q->node = node_id; -- cgit v1.2.3 From 89e9b9e07a390c50980d10aa37a04631db5a23ab Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:36 -0400 Subject: writeback: add {CONFIG|BDI_CAP|FS}_CGROUP_WRITEBACK cgroup writeback requires support from both bdi and filesystem sides. Add BDI_CAP_CGROUP_WRITEBACK and FS_CGROUP_WRITEBACK to indicate support and enable BDI_CAP_CGROUP_WRITEBACK on block based bdi's by default. Also, define CONFIG_CGROUP_WRITEBACK which is enabled if both MEMCG and BLK_CGROUP are enabled. inode_cgwb_enabled() which determines whether a given inode's both bdi and fs support cgroup writeback is added. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index c3ba9c3b08d4..c114a61590bc 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -621,7 +621,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) q->backing_dev_info.ra_pages = (VM_MAX_READAHEAD * 1024) / PAGE_CACHE_SIZE; - q->backing_dev_info.capabilities = 0; + q->backing_dev_info.capabilities = BDI_CAP_CGROUP_WRITEBACK; q->backing_dev_info.name = "block"; q->node = node_id; -- cgit v1.2.3 From d40f75a06dd675808eed385d490ba9468200b23f Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:42 -0400 Subject: writeback, blkcg: restructure blk_{set|clear}_queue_congested() blk_{set|clear}_queue_congested() take @q and set or clear, respectively, the congestion state of its bdi's root wb. Because bdi used to be able to handle congestion state only on the root wb, the callers of those functions tested whether the congestion is on the root blkcg and skipped if not. This is cumbersome and makes implementation of per cgroup bdi_writeback congestion state propagation difficult. This patch renames blk_{set|clear}_queue_congested() to blk_{set|clear}_congested(), and makes them take request_list instead of request_queue and test whether the specified request_list is the root one before updating bdi_writeback congestion state. This makes the tests in the callers unnecessary and simplifies them. As there are no external users of these functions, the definitions are moved from include/linux/blkdev.h to block/blk-core.c. This patch doesn't introduce any noticeable behavior difference. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-core.c | 62 +++++++++++++++++++++++++++++++++----------------------- 1 file changed, 37 insertions(+), 25 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index c114a61590bc..f46a43ef1a94 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -63,6 +63,28 @@ struct kmem_cache *blk_requestq_cachep; */ static struct workqueue_struct *kblockd_workqueue; +static void blk_clear_congested(struct request_list *rl, int sync) +{ + if (rl != &rl->q->root_rl) + return; +#ifdef CONFIG_CGROUP_WRITEBACK + clear_wb_congested(rl->blkg->wb_congested, sync); +#else + clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync); +#endif +} + +static void blk_set_congested(struct request_list *rl, int sync) +{ + if (rl != &rl->q->root_rl) + return; +#ifdef CONFIG_CGROUP_WRITEBACK + set_wb_congested(rl->blkg->wb_congested, sync); +#else + set_wb_congested(rl->q->backing_dev_info.wb.congested, sync); +#endif +} + void blk_queue_congestion_threshold(struct request_queue *q) { int nr; @@ -842,13 +864,8 @@ static void __freed_request(struct request_list *rl, int sync) { struct request_queue *q = rl->q; - /* - * bdi isn't aware of blkcg yet. As all async IOs end up root - * blkcg anyway, just use root blkcg state. - */ - if (rl == &q->root_rl && - rl->count[sync] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, sync); + if (rl->count[sync] < queue_congestion_off_threshold(q)) + blk_clear_congested(rl, sync); if (rl->count[sync] + 1 <= q->nr_requests) { if (waitqueue_active(&rl->wait[sync])) @@ -881,25 +898,25 @@ static void freed_request(struct request_list *rl, unsigned int flags) int blk_update_nr_requests(struct request_queue *q, unsigned int nr) { struct request_list *rl; + int on_thresh, off_thresh; spin_lock_irq(q->queue_lock); q->nr_requests = nr; blk_queue_congestion_threshold(q); + on_thresh = queue_congestion_on_threshold(q); + off_thresh = queue_congestion_off_threshold(q); - /* congestion isn't cgroup aware and follows root blkcg for now */ - rl = &q->root_rl; - - if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) - blk_set_queue_congested(q, BLK_RW_SYNC); - else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, BLK_RW_SYNC); + blk_queue_for_each_rl(rl, q) { + if (rl->count[BLK_RW_SYNC] >= on_thresh) + blk_set_congested(rl, BLK_RW_SYNC); + else if (rl->count[BLK_RW_SYNC] < off_thresh) + blk_clear_congested(rl, BLK_RW_SYNC); - if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) - blk_set_queue_congested(q, BLK_RW_ASYNC); - else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, BLK_RW_ASYNC); + if (rl->count[BLK_RW_ASYNC] >= on_thresh) + blk_set_congested(rl, BLK_RW_ASYNC); + else if (rl->count[BLK_RW_ASYNC] < off_thresh) + blk_clear_congested(rl, BLK_RW_ASYNC); - blk_queue_for_each_rl(rl, q) { if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { blk_set_rl_full(rl, BLK_RW_SYNC); } else { @@ -1009,12 +1026,7 @@ static struct request *__get_request(struct request_list *rl, int rw_flags, } } } - /* - * bdi isn't aware of blkcg yet. As all async IOs end up - * root blkcg anyway, just use root blkcg state. - */ - if (rl == &q->root_rl) - blk_set_queue_congested(q, is_sync); + blk_set_congested(rl, is_sync); } /* -- cgit v1.2.3 From 482cf79cdf6669667a914ffd4cbc57a762b55fef Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 22 May 2015 17:13:43 -0400 Subject: writeback, blkcg: propagate non-root blkcg congestion state Now that bdi layer can handle per-blkcg bdi_writeback_congested state, blk_{set|clear}_congested() can propagate non-root blkcg congestion state to them. This can be easily achieved by disabling the root_rl tests in blk_{set|clear}_congested(). Note that we still need those tests when !CONFIG_CGROUP_WRITEBACK as otherwise we'll end up flipping root blkcg wb's congestion state for events happening on other blkcgs. v2: Updated for bdi_writeback_congested. Signed-off-by: Tejun Heo Cc: Jens Axboe Cc: Jan Kara Cc: Vivek Goyal Signed-off-by: Jens Axboe --- block/blk-core.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'block/blk-core.c') diff --git a/block/blk-core.c b/block/blk-core.c index f46a43ef1a94..a4a2dbe46fe3 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -65,23 +65,26 @@ static struct workqueue_struct *kblockd_workqueue; static void blk_clear_congested(struct request_list *rl, int sync) { - if (rl != &rl->q->root_rl) - return; #ifdef CONFIG_CGROUP_WRITEBACK clear_wb_congested(rl->blkg->wb_congested, sync); #else - clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync); + /* + * If !CGROUP_WRITEBACK, all blkg's map to bdi->wb and we shouldn't + * flip its congestion state for events on other blkcgs. + */ + if (rl == &rl->q->root_rl) + clear_wb_congested(rl->q->backing_dev_info.wb.congested, sync); #endif } static void blk_set_congested(struct request_list *rl, int sync) { - if (rl != &rl->q->root_rl) - return; #ifdef CONFIG_CGROUP_WRITEBACK set_wb_congested(rl->blkg->wb_congested, sync); #else - set_wb_congested(rl->q->backing_dev_info.wb.congested, sync); + /* see blk_clear_congested() */ + if (rl == &rl->q->root_rl) + set_wb_congested(rl->q->backing_dev_info.wb.congested, sync); #endif } -- cgit v1.2.3