From 119ba0f82839cd80eaef3e6991988f1403965d5b Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 24 Apr 2013 19:01:12 -0700 Subject: bcache: Convert allocator thread to kthread Using a workqueue when we just want a single thread is a bit silly. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 17 +++++++++++------ 1 file changed, 11 insertions(+), 6 deletions(-) (limited to 'drivers/md/bcache/bcache.h') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d3e15b42a4ab..166c8ddc0be4 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -565,8 +565,7 @@ struct cache { unsigned watermark[WATERMARK_MAX]; - struct closure alloc; - struct workqueue_struct *alloc_workqueue; + struct task_struct *alloc_thread; struct closure prio; struct prio_set *disk_buckets; @@ -703,9 +702,6 @@ struct cache_set { /* For the btree cache */ struct shrinker shrink; - /* For the allocator itself */ - wait_queue_head_t alloc_wait; - /* For the btree cache and anything allocation related */ struct mutex bucket_lock; @@ -1173,6 +1169,15 @@ static inline uint8_t bucket_disk_gen(struct bucket *b) static struct kobj_attribute ksysfs_##n = \ __ATTR(n, S_IWUSR|S_IRUSR, show, store) +static inline void wake_up_allocators(struct cache_set *c) +{ + struct cache *ca; + unsigned i; + + for_each_cache(ca, c, i) + wake_up_process(ca->alloc_thread); +} + /* Forward declarations */ void bch_writeback_queue(struct cached_dev *); @@ -1193,7 +1198,6 @@ void bch_submit_bbio(struct bio *, struct cache_set *, struct bkey *, unsigned); uint8_t bch_inc_gen(struct cache *, struct bucket *); void bch_rescale_priorities(struct cache_set *, int); bool bch_bucket_add_unused(struct cache *, struct bucket *); -void bch_allocator_thread(struct closure *); long bch_bucket_alloc(struct cache *, unsigned, struct closure *); void bch_bucket_free(struct cache_set *, struct bkey *); @@ -1244,6 +1248,7 @@ int bch_btree_cache_alloc(struct cache_set *); void bch_cached_dev_writeback_init(struct cached_dev *); void bch_moving_init_cache_set(struct cache_set *); +int bch_cache_allocator_start(struct cache *ca); void bch_cache_allocator_exit(struct cache *ca); int bch_cache_allocator_init(struct cache *ca); -- cgit v1.2.3 From 5794351146199b9ac67a5ab1beab82be8bfd7b5d Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Thu, 25 Apr 2013 13:58:35 -0700 Subject: bcache: Refactor btree io The most significant change is that btree reads are now done synchronously, instead of asynchronously and doing the post read stuff from a workqueue. This was originally done because we can't block on IO under generic_make_request(). But - we already have a mechanism to punt cache lookups to workqueue if needed, so if we just use that we don't have to deal with the complexity of doing things asynchronously. The main benefit is this makes the locking situation saner; we can hold our write lock on the btree node until we're finished reading it, and we don't need that btree_node_read_done() flag anymore. Also, for writes, btree_write() was broken out into btree_node_write() and btree_leaf_dirty() - the old code with the boolean argument was dumb and confusing. The prio_blocked mechanism was improved a bit too, now the only counter is in struct btree_write, we don't mess with transfering a count from struct btree anymore. This required changing garbage collection to block prios at the start and unblock when it finishes, which is cleaner than what it was doing anyways (the old code had mostly the same effect, but was doing it in a convoluted way) And the btree iter btree_node_read_done() uses was converted to a real mempool. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'drivers/md/bcache/bcache.h') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 166c8ddc0be4..ad4957b52f10 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -819,10 +819,9 @@ struct cache_set { /* * A btree node on disk could have too many bsets for an iterator to fit - * on the stack - this is a single element mempool for btree_read_work() + * on the stack - have to dynamically allocate them */ - struct mutex fill_lock; - struct btree_iter *fill_iter; + mempool_t *fill_iter; /* * btree_sort() is a merge sort and requires temporary space - single -- cgit v1.2.3 From c37511b863f36c1cc6e18440717fd4cc0e881b8a Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Fri, 26 Apr 2013 15:39:55 -0700 Subject: bcache: Fix/revamp tracepoints The tracepoints were reworked to be more sensible, and fixed a null pointer deref in one of the tracepoints. Converted some of the pr_debug()s to tracepoints - this is partly a performance optimization; it used to be that with DEBUG or CONFIG_DYNAMIC_DEBUG pr_debug() was an empty macro; but at some point it was changed to an empty inline function. Some of the pr_debug() statements had rather expensive function calls as part of the arguments, so this code was getting run unnecessarily even on non debug kernels - in some fast paths, too. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 20 -------------------- 1 file changed, 20 deletions(-) (limited to 'drivers/md/bcache/bcache.h') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index ad4957b52f10..59c15e09e4dd 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -178,7 +178,6 @@ #define pr_fmt(fmt) "bcache: %s() " fmt "\n", __func__ #include -#include #include #include #include @@ -901,8 +900,6 @@ static inline unsigned local_clock_us(void) return local_clock() >> 10; } -#define MAX_BSETS 4U - #define BTREE_PRIO USHRT_MAX #define INITIAL_PRIO 32768 @@ -1107,23 +1104,6 @@ static inline void __bkey_put(struct cache_set *c, struct bkey *k) atomic_dec_bug(&PTR_BUCKET(c, k, i)->pin); } -/* Blktrace macros */ - -#define blktrace_msg(c, fmt, ...) \ -do { \ - struct request_queue *q = bdev_get_queue(c->bdev); \ - if (q) \ - blk_add_trace_msg(q, fmt, ##__VA_ARGS__); \ -} while (0) - -#define blktrace_msg_all(s, fmt, ...) \ -do { \ - struct cache *_c; \ - unsigned i; \ - for_each_cache(_c, (s), i) \ - blktrace_msg(_c, fmt, ##__VA_ARGS__); \ -} while (0) - static inline void cached_dev_put(struct cached_dev *dc) { if (atomic_dec_and_test(&dc->count)) -- cgit v1.2.3 From 6ded34d1a54c046a45db071d3cb7b37bd0a4a31f Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 May 2013 15:59:37 -0700 Subject: bcache: Improve lazy sorting The old lazy sorting code was kind of hacky - rewrite in a way that mathematically makes more sense; the idea is that the size of the sets of keys in a btree node should increase by a more or less fixed ratio from smallest to biggest. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md/bcache/bcache.h') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 59c15e09e4dd..6fa5a1e33c49 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -828,6 +828,7 @@ struct cache_set { */ struct mutex sort_lock; struct bset *sort; + unsigned sort_crit_factor; /* List of buckets we're currently writing data to */ struct list_head data_buckets; -- cgit v1.2.3 From 444fc0b6b167ed164e7436621a9d095e042644dd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Sat, 11 May 2013 17:07:26 -0700 Subject: bcache: Initialize sectors_dirty when attaching Previously, dirty_data wouldn't get initialized until the first garbage collection... which was a bit of a problem for background writeback (as the PD controller keys off of it) and also confusing for users. This is also prep work for making background writeback aware of raid5/6 stripes. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/md/bcache/bcache.h') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 6fa5a1e33c49..d099d8894c2f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -438,7 +438,6 @@ struct bcache_device { atomic_t detaching; atomic_long_t sectors_dirty; - unsigned long sectors_dirty_gc; unsigned long sectors_dirty_last; long sectors_dirty_derivative; @@ -1225,6 +1224,7 @@ void bch_cache_set_stop(struct cache_set *); struct cache_set *bch_cache_set_alloc(struct cache_sb *); void bch_btree_cache_free(struct cache_set *); int bch_btree_cache_alloc(struct cache_set *); +void bch_sectors_dirty_init(struct cached_dev *); void bch_cached_dev_writeback_init(struct cached_dev *); void bch_moving_init_cache_set(struct cache_set *); -- cgit v1.2.3 From 279afbad4e54acbd61bf88a54a73af3bbfdeb5dd Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Jun 2013 06:21:07 -0700 Subject: bcache: Track dirty data by stripe To make background writeback aware of raid5/6 stripes, we first need to track the amount of dirty data within each stripe - we do this by breaking up the existing sectors_dirty into per stripe atomic_ts Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 10 ++++------ 1 file changed, 4 insertions(+), 6 deletions(-) (limited to 'drivers/md/bcache/bcache.h') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index d099d8894c2f..dbddef0cdb59 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -437,7 +437,10 @@ struct bcache_device { /* If nonzero, we're detaching/unregistering from cache set */ atomic_t detaching; - atomic_long_t sectors_dirty; + uint64_t nr_stripes; + unsigned stripe_size_bits; + atomic_t *stripe_sectors_dirty; + unsigned long sectors_dirty_last; long sectors_dirty_derivative; @@ -1159,9 +1162,6 @@ static inline void wake_up_allocators(struct cache_set *c) /* Forward declarations */ -void bch_writeback_queue(struct cached_dev *); -void bch_writeback_add(struct cached_dev *, unsigned); - void bch_count_io_errors(struct cache *, int, const char *); void bch_bbio_count_io_errors(struct cache_set *, struct bio *, int, const char *); @@ -1224,8 +1224,6 @@ void bch_cache_set_stop(struct cache_set *); struct cache_set *bch_cache_set_alloc(struct cache_sb *); void bch_btree_cache_free(struct cache_set *); int bch_btree_cache_alloc(struct cache_set *); -void bch_sectors_dirty_init(struct cached_dev *); -void bch_cached_dev_writeback_init(struct cached_dev *); void bch_moving_init_cache_set(struct cache_set *); int bch_cache_allocator_start(struct cache *ca); -- cgit v1.2.3 From 72c270612bd33192fa836ad0f2939af1ca218292 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 5 Jun 2013 06:24:39 -0700 Subject: bcache: Write out full stripes Now that we're tracking dirty data per stripe, we can add two optimizations for raid5/6: * If a stripe is already dirty, force writes to that stripe to writeback mode - to help build up full stripes of dirty data * When flushing dirty data, preferentially write out full stripes first if there are any. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'drivers/md/bcache/bcache.h') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index dbddef0cdb59..342ba86c6e4f 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -387,8 +387,6 @@ struct keybuf_key { typedef bool (keybuf_pred_fn)(struct keybuf *, struct bkey *); struct keybuf { - keybuf_pred_fn *key_predicate; - struct bkey last_scanned; spinlock_t lock; @@ -532,6 +530,7 @@ struct cached_dev { unsigned sequential_merge:1; unsigned verify:1; + unsigned partial_stripes_expensive:1; unsigned writeback_metadata:1; unsigned writeback_running:1; unsigned char writeback_percent; -- cgit v1.2.3 From c9502ea4424b31728703d113fc6b30bfead14633 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 21:25:02 -0700 Subject: bcache: Fix a sysfs splat on shutdown If we stopped a bcache device when we were already detaching (or something like that), bcache_device_unlink() would try to remove a symlink from sysfs that was already gone because the bcache dev kobject had already been removed from sysfs. So keep track of whether we've removed stuff from sysfs. Signed-off-by: Kent Overstreet Cc: linux-stable # >= v3.10 --- drivers/md/bcache/bcache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'drivers/md/bcache/bcache.h') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 342ba86c6e4f..68f1ded81ae0 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -434,6 +434,7 @@ struct bcache_device { /* If nonzero, we're detaching/unregistering from cache set */ atomic_t detaching; + int flush_done; uint64_t nr_stripes; unsigned stripe_size_bits; -- cgit v1.2.3 From 79826c35eb99cd3c0873b8396f45fa26c87fb0b0 Mon Sep 17 00:00:00 2001 From: Kent Overstreet Date: Wed, 10 Jul 2013 18:31:58 -0700 Subject: bcache: Allocation kthread fixes The alloc kthread should've been using try_to_freeze() - and also there was the potential for the alloc kthread to get woken up after it had shut down, which would have been bad. Signed-off-by: Kent Overstreet --- drivers/md/bcache/bcache.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'drivers/md/bcache/bcache.h') diff --git a/drivers/md/bcache/bcache.h b/drivers/md/bcache/bcache.h index 68f1ded81ae0..b39f6f0b45f2 100644 --- a/drivers/md/bcache/bcache.h +++ b/drivers/md/bcache/bcache.h @@ -664,13 +664,9 @@ struct gc_stat { * CACHE_SET_STOPPING always gets set first when we're closing down a cache set; * we'll continue to run normally for awhile with CACHE_SET_STOPPING set (i.e. * flushing dirty data). - * - * CACHE_SET_STOPPING_2 gets set at the last phase, when it's time to shut down - * the allocation thread. */ #define CACHE_SET_UNREGISTERING 0 #define CACHE_SET_STOPPING 1 -#define CACHE_SET_STOPPING_2 2 struct cache_set { struct closure cl; -- cgit v1.2.3