From f72b8792d180948b4b3898374998f5ac8c02e539 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Aug 2016 15:51:50 -0600 Subject: workqueue: add cancel_work() Like cancel_delayed_work(), but for regular work. Signed-off-by: Jens Axboe Mehed-by: Tejun Heo Acked-by: Tejun Heo --- include/linux/workqueue.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/workqueue.h b/include/linux/workqueue.h index 26cc1df280d6..fc6e22186405 100644 --- a/include/linux/workqueue.h +++ b/include/linux/workqueue.h @@ -442,6 +442,7 @@ extern int schedule_on_each_cpu(work_func_t func); int execute_in_process_context(work_func_t fn, struct execute_work *); extern bool flush_work(struct work_struct *work); +extern bool cancel_work(struct work_struct *work); extern bool cancel_work_sync(struct work_struct *work); extern bool flush_delayed_work(struct delayed_work *dwork); -- cgit v1.2.3 From ee63cfa7fc197b63669623721b8009cce5b0659b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Aug 2016 15:52:48 -0600 Subject: block: add kblockd_schedule_work_on() Add a helper to schedule a regular struct work on a particular CPU. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e79055c8b577..69aae720f4ef 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1440,8 +1440,8 @@ static inline bool req_gap_front_merge(struct request *req, struct bio *bio) return bio_will_gap(req->q, bio, req->bio); } -struct work_struct; int kblockd_schedule_work(struct work_struct *work); +int kblockd_schedule_work_on(int cpu, struct work_struct *work); int kblockd_schedule_delayed_work(struct delayed_work *dwork, unsigned long delay); int kblockd_schedule_delayed_work_on(int cpu, struct delayed_work *dwork, unsigned long delay); -- cgit v1.2.3 From 27489a3c827b7eebba26eda0320bb0f100bef167 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Aug 2016 15:54:25 -0600 Subject: blk-mq: turn hctx->run_work into a regular work struct We don't need the larger delayed work struct, since we always run it immediately. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e43bbffb5b7a..d579252e6463 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -25,7 +25,7 @@ struct blk_mq_hw_ctx { } ____cacheline_aligned_in_smp; unsigned long state; /* BLK_MQ_S_* flags */ - struct delayed_work run_work; + struct work_struct run_work; struct delayed_work delay_work; cpumask_var_t cpumask; int next_cpu; -- cgit v1.2.3 From 8d354f133e86dd03ea7885a91df398c55ff699ff Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 25 Aug 2016 08:00:28 -0600 Subject: blk-mq: improve layout of blk_mq_hw_ctx Various cache line optimizations: - Move delay_work towards the end. It's huge, and we don't use it a lot (only SCSI). - Move the atomic state into the same cacheline as the the dispatch list and lock. - Rearrange a few members to pack it better. - Shrink the max-order for dispatch accounting from 10 to 7. This means that ->dispatched[] and ->run now take up their own cacheline. This shrinks struct blk_mq_hw_ctx down to 8 cachelines. Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d579252e6463..e1544f0f8c21 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -22,11 +22,10 @@ struct blk_mq_hw_ctx { struct { spinlock_t lock; struct list_head dispatch; + unsigned long state; /* BLK_MQ_S_* flags */ } ____cacheline_aligned_in_smp; - unsigned long state; /* BLK_MQ_S_* flags */ struct work_struct run_work; - struct delayed_work delay_work; cpumask_var_t cpumask; int next_cpu; int next_cpu_batch; @@ -40,8 +39,8 @@ struct blk_mq_hw_ctx { struct blk_mq_ctxmap ctx_map; - unsigned int nr_ctx; struct blk_mq_ctx **ctxs; + unsigned int nr_ctx; atomic_t wait_index; @@ -49,7 +48,7 @@ struct blk_mq_hw_ctx { unsigned long queued; unsigned long run; -#define BLK_MQ_MAX_DISPATCH_ORDER 10 +#define BLK_MQ_MAX_DISPATCH_ORDER 7 unsigned long dispatched[BLK_MQ_MAX_DISPATCH_ORDER]; unsigned int numa_node; @@ -57,6 +56,8 @@ struct blk_mq_hw_ctx { atomic_t nr_active; + struct delayed_work delay_work; + struct blk_mq_cpu_notifier cpu_notifier; struct kobject kobj; -- cgit v1.2.3 From 6e219353afa1f67f453141f7462b01708ebf5574 Mon Sep 17 00:00:00 2001 From: Stephen Bates Date: Tue, 13 Sep 2016 12:23:15 -0600 Subject: block: add poll_considered statistic In order to help determine the effectiveness of polling in a running system it is usful to determine the ratio of how often the poll function is called vs how often the completion is checked. For this reason we add a poll_considered variable and add it to the sysfs entry for io_poll. Signed-off-by: Stephen Bates Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index e1544f0f8c21..7710f795d7c2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -61,6 +61,7 @@ struct blk_mq_hw_ctx { struct blk_mq_cpu_notifier cpu_notifier; struct kobject kobj; + unsigned long poll_considered; unsigned long poll_invoked; unsigned long poll_success; }; -- cgit v1.2.3 From abe47114b192a9e0167905a3418d815b4fcf87de Mon Sep 17 00:00:00 2001 From: Linus Walleij Date: Wed, 14 Sep 2016 14:33:15 +0200 Subject: block: remove blk_mq_alloc_single_hw_queue() prototype The blk_mq_alloc_single_hw_queue() is a prototype artifact that should have been removed with commit cdef54dd85ad66e77262ea57796a3e81683dd5d6 "blk-mq: remove alloc_hctx and free_hctx methods" where the last users of it were deleted. Fixes: cdef54dd85ad ("blk-mq: remove alloc_hctx and free_hctx methods") Signed-off-by: Linus Walleij Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 7710f795d7c2..ff14f68067aa 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -223,7 +223,6 @@ static inline u16 blk_mq_unique_tag_to_tag(u32 unique_tag) } struct blk_mq_hw_ctx *blk_mq_map_queue(struct request_queue *, const int ctx_index); -struct blk_mq_hw_ctx *blk_mq_alloc_single_hw_queue(struct blk_mq_tag_set *, unsigned int, int); int blk_mq_request_started(struct request *rq); void blk_mq_start_request(struct request *rq); -- cgit v1.2.3 From 637ca77bd1f7950538956c61dcd0c2e559905dbf Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Sep 2016 10:44:12 +0200 Subject: block: Document that bio_op() uses the data type of bio.bi_opf Make it clear that the sizeof(unsigned int) expression in BIO_OP_SHIFT refers to the bi_opf member of struct bio. Signed-off-by: Bart Van Assche Cc: Mike Christie Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 436f43f87da9..1e1ef210ae91 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -89,7 +89,7 @@ struct bio { struct bio_vec bi_inline_vecs[0]; }; -#define BIO_OP_SHIFT (8 * sizeof(unsigned int) - REQ_OP_BITS) +#define BIO_OP_SHIFT (8 * FIELD_SIZEOF(struct bio, bi_opf) - REQ_OP_BITS) #define bio_op(bio) ((bio)->bi_opf >> BIO_OP_SHIFT) #define bio_set_op_attrs(bio, op, op_flags) do { \ -- cgit v1.2.3 From 4382e33ad374862eacf62003bb02c750391ada05 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Sep 2016 10:45:36 +0200 Subject: block, dm-crypt, btrfs: Introduce bio_flags() Introduce the bio_flags() macro. Ensure that the second argument of bio_set_op_attrs() only contains flags and no operation. This patch does not change any functionality. Signed-off-by: Bart Van Assche Cc: Mike Christie Cc: Chris Mason (maintainer:BTRFS FILE SYSTEM) Cc: Josef Bacik (maintainer:BTRFS FILE SYSTEM) Cc: Mike Snitzer Cc: Hannes Reinecke Cc: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1e1ef210ae91..311fa2f478b8 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -90,11 +90,12 @@ struct bio { }; #define BIO_OP_SHIFT (8 * FIELD_SIZEOF(struct bio, bi_opf) - REQ_OP_BITS) +#define bio_flags(bio) ((bio)->bi_opf & ((1 << BIO_OP_SHIFT) - 1)) #define bio_op(bio) ((bio)->bi_opf >> BIO_OP_SHIFT) #define bio_set_op_attrs(bio, op, op_flags) do { \ WARN_ON(op >= (1 << REQ_OP_BITS)); \ - (bio)->bi_opf &= ((1 << BIO_OP_SHIFT) - 1); \ + (bio)->bi_opf = bio_flags(bio); \ (bio)->bi_opf |= ((unsigned int) (op) << BIO_OP_SHIFT); \ (bio)->bi_opf |= op_flags; \ } while (0) -- cgit v1.2.3 From 3e1de31b9bf608c5b35e2d0d134eb87f2a9ba4ae Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Wed, 14 Sep 2016 10:46:22 +0200 Subject: block: Improve bio_set_op_attrs() robustness Since REQ_OP_BITS == 3 and __REQ_NR_BITS == 30 it is not that hard to pass an op_flags argument to bio_set_op_attrs() that is larger than the number of bits reserved for the op_flags argument. Complain if this happens. Additionally, ensure that negative arguments trigger a complaint (1 << ... is signed while 1U << ... is unsigned; adding 0U to an integer expression causes it to be promoted to an unsigned type). Signed-off-by: Bart Van Assche Cc: Mike Christie Cc: Christoph Hellwig Cc: Hannes Reinecke Cc: Damien Le Moal Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 17 ++++++++++++----- 1 file changed, 12 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 311fa2f478b8..53ee1a2acd4f 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -93,11 +93,18 @@ struct bio { #define bio_flags(bio) ((bio)->bi_opf & ((1 << BIO_OP_SHIFT) - 1)) #define bio_op(bio) ((bio)->bi_opf >> BIO_OP_SHIFT) -#define bio_set_op_attrs(bio, op, op_flags) do { \ - WARN_ON(op >= (1 << REQ_OP_BITS)); \ - (bio)->bi_opf = bio_flags(bio); \ - (bio)->bi_opf |= ((unsigned int) (op) << BIO_OP_SHIFT); \ - (bio)->bi_opf |= op_flags; \ +#define bio_set_op_attrs(bio, op, op_flags) do { \ + if (__builtin_constant_p(op)) \ + BUILD_BUG_ON((op) + 0U >= (1U << REQ_OP_BITS)); \ + else \ + WARN_ON_ONCE((op) + 0U >= (1U << REQ_OP_BITS)); \ + if (__builtin_constant_p(op_flags)) \ + BUILD_BUG_ON((op_flags) + 0U >= (1U << BIO_OP_SHIFT)); \ + else \ + WARN_ON_ONCE((op_flags) + 0U >= (1U << BIO_OP_SHIFT)); \ + (bio)->bi_opf = bio_flags(bio); \ + (bio)->bi_opf |= (((op) + 0U) << BIO_OP_SHIFT); \ + (bio)->bi_opf |= (op_flags); \ } while (0) #define BIO_RESET_BYTES offsetof(struct bio, bi_max_vecs) -- cgit v1.2.3 From 3f7c624aa58f769e0313ca3310704c5d88ac99ce Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Sep 2016 16:03:02 +0200 Subject: block: remove bio_destructor_t Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 53ee1a2acd4f..cd395ecec99d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -16,7 +16,6 @@ struct block_device; struct io_context; struct cgroup_subsys_state; typedef void (bio_end_io_t) (struct bio *); -typedef void (bio_destructor_t) (struct bio *); #ifdef CONFIG_BLOCK /* -- cgit v1.2.3 From fc95db3edeaf924e9ad16592d9c1b06c730a49c9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Sep 2016 16:03:03 +0200 Subject: bio.h: remove a very outdated comment Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 23ddf4b46a9b..e00721a2dce1 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -1,6 +1,4 @@ /* - * 2.5 block I/O model - * * Copyright (C) 2001 Jens Axboe * * This program is free software; you can redistribute it and/or modify -- cgit v1.2.3 From c5c5ca777469f0ff854f1da0aff9b3a9051b3ef7 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 11 Sep 2016 16:03:04 +0200 Subject: block: remove IOPRIO_BITS Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index beb9ce1c2c23..8c1239020d79 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -7,7 +7,6 @@ /* * Gives us 8 prio classes with 13-bits of data for each class */ -#define IOPRIO_BITS (16) #define IOPRIO_CLASS_SHIFT (13) #define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) -- cgit v1.2.3 From 2849450ad39d2e699fda2d5c6f41e05d87fd7004 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Wed, 14 Sep 2016 13:28:30 -0400 Subject: blk-mq: introduce blk_mq_delay_kick_requeue_list() blk_mq_delay_kick_requeue_list() provides the ability to kick the q->requeue_list after a specified time. To do this the request_queue's 'requeue_work' member was changed to a delayed_work. blk_mq_delay_kick_requeue_list() allows DM to defer processing requeued requests while it doesn't make sense to immediately requeue them (e.g. when all paths in a DM multipath have failed). Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 1 + include/linux/blkdev.h | 2 +- 2 files changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ff14f68067aa..60ef14cbcd2d 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -233,6 +233,7 @@ void blk_mq_requeue_request(struct request *rq); void blk_mq_add_to_requeue_list(struct request *rq, bool at_head); void blk_mq_cancel_requeue_work(struct request_queue *q); void blk_mq_kick_requeue_list(struct request_queue *q); +void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); void blk_mq_abort_requeue_list(struct request_queue *q); void blk_mq_complete_request(struct request *rq, int error); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 69aae720f4ef..c47c358ba052 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -449,7 +449,7 @@ struct request_queue { struct list_head requeue_list; spinlock_t requeue_lock; - struct work_struct requeue_work; + struct delayed_work requeue_work; struct mutex sysfs_lock; -- cgit v1.2.3 From 88459642cba452630326b9cab1c651e09577d4e4 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 17 Sep 2016 08:38:44 -0600 Subject: blk-mq: abstract tag allocation out into sbitmap library This is a generally useful data structure, so make it available to anyone else who might want to use it. It's also a nice cleanup separating the allocation logic from the rest of the tag handling logic. The code is behind a new Kconfig option, CONFIG_SBITMAP, which is only selected by CONFIG_BLOCK for now. This should be a complete noop functionality-wise. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 9 +- include/linux/sbitmap.h | 327 ++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 329 insertions(+), 7 deletions(-) create mode 100644 include/linux/sbitmap.h (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 60ef14cbcd2d..2575779cf13f 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -2,6 +2,7 @@ #define BLK_MQ_H #include +#include struct blk_mq_tags; struct blk_flush_queue; @@ -12,12 +13,6 @@ struct blk_mq_cpu_notifier { int (*notify)(void *data, unsigned long action, unsigned int cpu); }; -struct blk_mq_ctxmap { - unsigned int size; - unsigned int bits_per_word; - struct blk_align_bitmap *map; -}; - struct blk_mq_hw_ctx { struct { spinlock_t lock; @@ -37,7 +32,7 @@ struct blk_mq_hw_ctx { void *driver_data; - struct blk_mq_ctxmap ctx_map; + struct sbitmap ctx_map; struct blk_mq_ctx **ctxs; unsigned int nr_ctx; diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h new file mode 100644 index 000000000000..1a3b836042e1 --- /dev/null +++ b/include/linux/sbitmap.h @@ -0,0 +1,327 @@ +/* + * Fast and scalable bitmaps. + * + * Copyright (C) 2016 Facebook + * Copyright (C) 2013-2014 Jens Axboe + * + * This program is free software; you can redistribute it and/or + * modify it under the terms of the GNU General Public + * License v2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . + */ + +#ifndef __LINUX_SCALE_BITMAP_H +#define __LINUX_SCALE_BITMAP_H + +#include +#include + +/** + * struct sbitmap_word - Word in a &struct sbitmap. + */ +struct sbitmap_word { + /** + * @word: The bitmap word itself. + */ + unsigned long word; + + /** + * @depth: Number of bits being used in @word. + */ + unsigned long depth; +} ____cacheline_aligned_in_smp; + +/** + * struct sbitmap - Scalable bitmap. + * + * A &struct sbitmap is spread over multiple cachelines to avoid ping-pong. This + * trades off higher memory usage for better scalability. + */ +struct sbitmap { + /** + * @depth: Number of bits used in the whole bitmap. + */ + unsigned int depth; + + /** + * @shift: log2(number of bits used per word) + */ + unsigned int shift; + + /** + * @map_nr: Number of words (cachelines) being used for the bitmap. + */ + unsigned int map_nr; + + /** + * @map: Allocated bitmap. + */ + struct sbitmap_word *map; +}; + +#define SBQ_WAIT_QUEUES 8 +#define SBQ_WAKE_BATCH 8 + +/** + * struct sbq_wait_state - Wait queue in a &struct sbitmap_queue. + */ +struct sbq_wait_state { + /** + * @wait_cnt: Number of frees remaining before we wake up. + */ + atomic_t wait_cnt; + + /** + * @wait: Wait queue. + */ + wait_queue_head_t wait; +} ____cacheline_aligned_in_smp; + +/** + * struct sbitmap_queue - Scalable bitmap with the added ability to wait on free + * bits. + * + * A &struct sbitmap_queue uses multiple wait queues and rolling wakeups to + * avoid contention on the wait queue spinlock. This ensures that we don't hit a + * scalability wall when we run out of free bits and have to start putting tasks + * to sleep. + */ +struct sbitmap_queue { + /** + * @sb: Scalable bitmap. + */ + struct sbitmap sb; + + /** + * @wake_batch: Number of bits which must be freed before we wake up any + * waiters. + */ + unsigned int wake_batch; + + /** + * @wake_index: Next wait queue in @ws to wake up. + */ + atomic_t wake_index; + + /** + * @ws: Wait queues. + */ + struct sbq_wait_state *ws; +}; + +/** + * sbitmap_init_node() - Initialize a &struct sbitmap on a specific memory node. + * @sb: Bitmap to initialize. + * @depth: Number of bits to allocate. + * @shift: Use 2^@shift bits per word in the bitmap; if a negative number if + * given, a good default is chosen. + * @flags: Allocation flags. + * @node: Memory node to allocate on. + * + * Return: Zero on success or negative errno on failure. + */ +int sbitmap_init_node(struct sbitmap *sb, unsigned int depth, int shift, + gfp_t flags, int node); + +/** + * sbitmap_free() - Free memory used by a &struct sbitmap. + * @sb: Bitmap to free. + */ +static inline void sbitmap_free(struct sbitmap *sb) +{ + kfree(sb->map); + sb->map = NULL; +} + +/** + * sbitmap_resize() - Resize a &struct sbitmap. + * @sb: Bitmap to resize. + * @depth: New number of bits to resize to. + * + * Doesn't reallocate anything. It's up to the caller to ensure that the new + * depth doesn't exceed the depth that the sb was initialized with. + */ +void sbitmap_resize(struct sbitmap *sb, unsigned int depth); + +/** + * sbitmap_get() - Try to allocate a free bit from a &struct sbitmap. + * @sb: Bitmap to allocate from. + * @alloc_hint: Hint for where to start searching for a free bit. + * @round_robin: If true, be stricter about allocation order; always allocate + * starting from the last allocated bit. This is less efficient + * than the default behavior (false). + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +int sbitmap_get(struct sbitmap *sb, unsigned int alloc_hint, bool round_robin); + +/** + * sbitmap_any_bit_set() - Check for a set bit in a &struct sbitmap. + * @sb: Bitmap to check. + * + * Return: true if any bit in the bitmap is set, false otherwise. + */ +bool sbitmap_any_bit_set(const struct sbitmap *sb); + +/** + * sbitmap_any_bit_clear() - Check for an unset bit in a &struct + * sbitmap. + * @sb: Bitmap to check. + * + * Return: true if any bit in the bitmap is clear, false otherwise. + */ +bool sbitmap_any_bit_clear(const struct sbitmap *sb); + +typedef bool (*sb_for_each_fn)(struct sbitmap *, unsigned int, void *); + +/** + * sbitmap_for_each_set() - Iterate over each set bit in a &struct sbitmap. + * @sb: Bitmap to iterate over. + * @fn: Callback. Should return true to continue or false to break early. + * @data: Pointer to pass to callback. + * + * This is inline even though it's non-trivial so that the function calls to the + * callback will hopefully get optimized away. + */ +static inline void sbitmap_for_each_set(struct sbitmap *sb, sb_for_each_fn fn, + void *data) +{ + unsigned int i; + + for (i = 0; i < sb->map_nr; i++) { + struct sbitmap_word *word = &sb->map[i]; + unsigned int off, nr; + + if (!word->word) + continue; + + nr = 0; + off = i << sb->shift; + while (1) { + nr = find_next_bit(&word->word, word->depth, nr); + if (nr >= word->depth) + break; + + if (!fn(sb, off + nr, data)) + return; + + nr++; + } + } +} + +#define SB_NR_TO_INDEX(sb, bitnr) ((bitnr) >> (sb)->shift) +#define SB_NR_TO_BIT(sb, bitnr) ((bitnr) & ((1U << (sb)->shift) - 1U)) + +static inline unsigned long *__sbitmap_word(struct sbitmap *sb, + unsigned int bitnr) +{ + return &sb->map[SB_NR_TO_INDEX(sb, bitnr)].word; +} + +/* Helpers equivalent to the operations in asm/bitops.h and linux/bitmap.h */ + +static inline void sbitmap_set_bit(struct sbitmap *sb, unsigned int bitnr) +{ + set_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); +} + +static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) +{ + clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); +} + +static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) +{ + return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); +} + +unsigned int sbitmap_weight(const struct sbitmap *sb); + +/** + * sbitmap_queue_init_node() - Initialize a &struct sbitmap_queue on a specific + * memory node. + * @sbq: Bitmap queue to initialize. + * @depth: See sbitmap_init_node(). + * @shift: See sbitmap_init_node(). + * @flags: Allocation flags. + * @node: Memory node to allocate on. + * + * Return: Zero on success or negative errno on failure. + */ +int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, + int shift, gfp_t flags, int node); + +/** + * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue. + * + * @sbq: Bitmap queue to free. + */ +static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) +{ + kfree(sbq->ws); + sbitmap_free(&sbq->sb); +} + +/** + * sbitmap_queue_resize() - Resize a &struct sbitmap_queue. + * @sbq: Bitmap queue to resize. + * @depth: New number of bits to resize to. + * + * Like sbitmap_resize(), this doesn't reallocate anything. It has to do + * some extra work on the &struct sbitmap_queue, so it's not safe to just + * resize the underlying &struct sbitmap. + */ +void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); + +/** + * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a + * &struct sbitmap_queue. + * @sbq: Bitmap to free from. + * @nr: Bit number to free. + */ +void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr); + +static inline int sbq_index_inc(int index) +{ + return (index + 1) & (SBQ_WAIT_QUEUES - 1); +} + +static inline void sbq_index_atomic_inc(atomic_t *index) +{ + int old = atomic_read(index); + int new = sbq_index_inc(old); + atomic_cmpxchg(index, old, new); +} + +/** + * sbq_wait_ptr() - Get the next wait queue to use for a &struct + * sbitmap_queue. + * @sbq: Bitmap queue to wait on. + * @wait_index: A counter per "user" of @sbq. + */ +static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq, + atomic_t *wait_index) +{ + struct sbq_wait_state *ws; + + ws = &sbq->ws[atomic_read(wait_index)]; + sbq_index_atomic_inc(wait_index); + return ws; +} + +/** + * sbitmap_queue_wake_all() - Wake up everything waiting on a &struct + * sbitmap_queue. + * @sbq: Bitmap queue to wake up. + */ +void sbitmap_queue_wake_all(struct sbitmap_queue *sbq); + +#endif /* __LINUX_SCALE_BITMAP_H */ -- cgit v1.2.3 From 40aabb67464d5aad9ca3d2a5fedee56e2ff45aa0 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 17 Sep 2016 01:28:23 -0700 Subject: sbitmap: push per-cpu last_tag into sbitmap_queue Allocating your own per-cpu allocation hint separately makes for an awkward API. Instead, allocate the per-cpu hint as part of the struct sbitmap_queue. There's no point for a struct sbitmap_queue without the cache, but you can still use a bare struct sbitmap. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 45 ++++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 44 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 1a3b836042e1..6745545e0b22 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -99,6 +99,14 @@ struct sbitmap_queue { */ struct sbitmap sb; + /* + * @alloc_hint: Cache of last successfully allocated or freed bit. + * + * This is per-cpu, which allows multiple users to stick to different + * cachelines until the map is exhausted. + */ + unsigned int __percpu *alloc_hint; + /** * @wake_batch: Number of bits which must be freed before we wake up any * waiters. @@ -267,6 +275,7 @@ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) { kfree(sbq->ws); + free_percpu(sbq->alloc_hint); sbitmap_free(&sbq->sb); } @@ -281,13 +290,47 @@ static inline void sbitmap_queue_free(struct sbitmap_queue *sbq) */ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); +/** + * __sbitmap_queue_get() - Try to allocate a free bit from a &struct + * sbitmap_queue with preemption already disabled. + * @sbq: Bitmap queue to allocate from. + * @round_robin: See sbitmap_get(). + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +int __sbitmap_queue_get(struct sbitmap_queue *sbq, bool round_robin); + +/** + * sbitmap_queue_get() - Try to allocate a free bit from a &struct + * sbitmap_queue. + * @sbq: Bitmap queue to allocate from. + * @round_robin: See sbitmap_get(). + * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to + * sbitmap_queue_clear()). + * + * Return: Non-negative allocated bit number if successful, -1 otherwise. + */ +static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, bool round_robin, + unsigned int *cpu) +{ + int nr; + + *cpu = get_cpu(); + nr = __sbitmap_queue_get(sbq, round_robin); + put_cpu(); + return nr; +} + /** * sbitmap_queue_clear() - Free an allocated bit and wake up waiters on a * &struct sbitmap_queue. * @sbq: Bitmap to free from. * @nr: Bit number to free. + * @round_robin: See sbitmap_get(). + * @cpu: CPU the bit was allocated on. */ -void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr); +void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, + bool round_robin, unsigned int cpu); static inline int sbq_index_inc(int index) { -- cgit v1.2.3 From f4a644db86669d938c71f19560aebf69d4720d63 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Sat, 17 Sep 2016 01:28:24 -0700 Subject: sbitmap: push alloc policy into sbitmap_queue Again, there's no point in passing this in every time. Make it part of struct sbitmap_queue and clean up the API. Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 19 +++++++++++-------- 1 file changed, 11 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 6745545e0b22..f017fd6e69c4 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -122,6 +122,11 @@ struct sbitmap_queue { * @ws: Wait queues. */ struct sbq_wait_state *ws; + + /** + * @round_robin: Allocate bits in strict round-robin order. + */ + bool round_robin; }; /** @@ -259,13 +264,14 @@ unsigned int sbitmap_weight(const struct sbitmap *sb); * @sbq: Bitmap queue to initialize. * @depth: See sbitmap_init_node(). * @shift: See sbitmap_init_node(). + * @round_robin: See sbitmap_get(). * @flags: Allocation flags. * @node: Memory node to allocate on. * * Return: Zero on success or negative errno on failure. */ int sbitmap_queue_init_node(struct sbitmap_queue *sbq, unsigned int depth, - int shift, gfp_t flags, int node); + int shift, bool round_robin, gfp_t flags, int node); /** * sbitmap_queue_free() - Free memory used by a &struct sbitmap_queue. @@ -294,29 +300,27 @@ void sbitmap_queue_resize(struct sbitmap_queue *sbq, unsigned int depth); * __sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue with preemption already disabled. * @sbq: Bitmap queue to allocate from. - * @round_robin: See sbitmap_get(). * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -int __sbitmap_queue_get(struct sbitmap_queue *sbq, bool round_robin); +int __sbitmap_queue_get(struct sbitmap_queue *sbq); /** * sbitmap_queue_get() - Try to allocate a free bit from a &struct * sbitmap_queue. * @sbq: Bitmap queue to allocate from. - * @round_robin: See sbitmap_get(). * @cpu: Output parameter; will contain the CPU we ran on (e.g., to be passed to * sbitmap_queue_clear()). * * Return: Non-negative allocated bit number if successful, -1 otherwise. */ -static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, bool round_robin, +static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, unsigned int *cpu) { int nr; *cpu = get_cpu(); - nr = __sbitmap_queue_get(sbq, round_robin); + nr = __sbitmap_queue_get(sbq); put_cpu(); return nr; } @@ -326,11 +330,10 @@ static inline int sbitmap_queue_get(struct sbitmap_queue *sbq, bool round_robin, * &struct sbitmap_queue. * @sbq: Bitmap to free from. * @nr: Bit number to free. - * @round_robin: See sbitmap_get(). * @cpu: CPU the bit was allocated on. */ void sbitmap_queue_clear(struct sbitmap_queue *sbq, unsigned int nr, - bool round_robin, unsigned int cpu); + unsigned int cpu); static inline int sbq_index_inc(int index) { -- cgit v1.2.3 From b21d5b301794ae332eaa6e177d71fe8b77d3664c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 16 Sep 2016 14:25:06 +0200 Subject: blk-mq: register device instead of disk MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Enable devices without a gendisk instance to register itself with blk-mq and expose the associated multi-queue sysfs entries. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2575779cf13f..fbcfdf323243 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -175,8 +175,8 @@ enum { struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); struct request_queue *blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); -int blk_mq_register_disk(struct gendisk *); -void blk_mq_unregister_disk(struct gendisk *); +int blk_mq_register_dev(struct device *, struct request_queue *); +void blk_mq_unregister_dev(struct device *, struct request_queue *); int blk_mq_alloc_tag_set(struct blk_mq_tag_set *set); void blk_mq_free_tag_set(struct blk_mq_tag_set *set); -- cgit v1.2.3 From b0b4e09c1ae71c4ec33df0616b830ae050006e9b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Matias=20Bj=C3=B8rling?= Date: Fri, 16 Sep 2016 14:25:07 +0200 Subject: lightnvm: control life of nvm_dev in driver MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit LightNVM compatible device drivers does not have a method to expose LightNVM specific sysfs entries. To enable LightNVM sysfs entries to be exposed, lightnvm device drivers require a struct device to attach it to. To allow both the actual device driver and lightnvm sysfs entries to coexist, the device driver tracks the lifetime of the nvm_dev structure. This patch refactors NVMe and null_blk to handle the lifetime of struct nvm_dev, which eliminates the need for struct gendisk when a lightnvm compatible device is provided. Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index ba78b8306674..5afc2634f332 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -524,9 +524,9 @@ extern struct nvm_block *nvm_get_blk(struct nvm_dev *, struct nvm_lun *, unsigned long); extern void nvm_put_blk(struct nvm_dev *, struct nvm_block *); -extern int nvm_register(struct request_queue *, char *, - struct nvm_dev_ops *); -extern void nvm_unregister(char *); +extern struct nvm_dev *nvm_alloc_dev(int); +extern int nvm_register(struct nvm_dev *); +extern void nvm_unregister(struct nvm_dev *); void nvm_mark_blk(struct nvm_dev *dev, struct ppa_addr ppa, int type); @@ -575,11 +575,14 @@ extern int nvm_dev_factory(struct nvm_dev *, int flags); #else /* CONFIG_NVM */ struct nvm_dev_ops; -static inline int nvm_register(struct request_queue *q, char *disk_name, - struct nvm_dev_ops *ops) +static inline struct nvm_dev *nvm_alloc_dev(int node) +{ + return ERR_PTR(-EINVAL); +} +static inline int nvm_register(struct nvm_dev *dev) { return -EINVAL; } -static inline void nvm_unregister(char *disk_name) {} +static inline void nvm_unregister(struct nvm_dev *dev) {} #endif /* CONFIG_NVM */ #endif /* LIGHTNVM.H */ -- cgit v1.2.3 From 40267efddc296190d50c61d96daf277151447cf6 Mon Sep 17 00:00:00 2001 From: "Simon A. F. Lund" Date: Fri, 16 Sep 2016 14:25:08 +0200 Subject: lightnvm: expose device geometry through sysfs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit For a host to access an Open-Channel SSD, it has to know its geometry, so that it writes and reads at the appropriate device bounds. Currently, the geometry information is kept within the kernel, and not exported to user-space for consumption. This patch exposes the configuration through sysfs and enables user-space libraries, such as liblightnvm, to use the sysfs implementation to get the geometry of an Open-Channel SSD. The sysfs entries are stored within the device hierarchy, and can be found using the "lightnvm" device type. An example configuration looks like this: /sys/class/nvme/ └── nvme0n1 ├── capabilities: 3 ├── device_mode: 1 ├── erase_max: 1000000 ├── erase_typ: 1000000 ├── flash_media_type: 0 ├── media_capabilities: 0x00000001 ├── media_type: 0 ├── multiplane: 0x00010101 ├── num_blocks: 1022 ├── num_channels: 1 ├── num_luns: 4 ├── num_pages: 64 ├── num_planes: 1 ├── page_size: 4096 ├── prog_max: 100000 ├── prog_typ: 100000 ├── read_max: 10000 ├── read_typ: 10000 ├── sector_oob_size: 0 ├── sector_size: 4096 ├── media_manager: gennvm ├── ppa_format: 0x380830082808001010102008 ├── vendor_opcode: 0 ├── max_phys_secs: 64 └── version: 1 Signed-off-by: Simon A. F. Lund Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 5afc2634f332..d190786e4ad8 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -352,7 +352,10 @@ struct nvm_dev { /* Backend device */ struct request_queue *q; + struct device dev; + struct device *parent_dev; char name[DISK_NAME_LEN]; + void *private_data; struct mutex mlock; spinlock_t lock; -- cgit v1.2.3 From 491221f88d00651e449c9caf7415b6453c8a77b7 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Thu, 22 Sep 2016 03:10:01 -0400 Subject: block: export bio_free_pages to other modules bio_free_pages is introduced in commit 1dfa0f68c040 ("block: add a helper to free bio bounce buffer pages"), we can reuse the func in other modules after it was imported. Cc: Christoph Hellwig Cc: Jens Axboe Cc: Mike Snitzer Cc: Shaohua Li Signed-off-by: Guoqing Jiang Acked-by: Kent Overstreet Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index e00721a2dce1..97cb48f03dc7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -459,6 +459,7 @@ static inline void bio_flush_dcache_pages(struct bio *bi) extern void bio_copy_data(struct bio *dst, struct bio *src); extern int bio_alloc_pages(struct bio *bio, gfp_t gfp); +extern void bio_free_pages(struct bio *bio); extern struct bio *bio_copy_user_iov(struct request_queue *, struct rq_map_data *, -- cgit v1.2.3 From 1b792f2f92784c00db2e6431496e437855d6f12a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 21 Sep 2016 10:12:13 -0600 Subject: blk-mq: add flag for drivers wanting blocking ->queue_rq() If a driver sets BLK_MQ_F_BLOCKING, it is allowed to block in its ->queue_rq() handler. For that case, blk-mq ensures that we always calls it from a safe context. Signed-off-by: Jens Axboe Tested-by: Josef Bacik --- include/linux/blk-mq.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index fbcfdf323243..5daa0ef756dd 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -155,6 +155,7 @@ enum { BLK_MQ_F_TAG_SHARED = 1 << 1, BLK_MQ_F_SG_MERGE = 1 << 2, BLK_MQ_F_DEFER_ISSUE = 1 << 4, + BLK_MQ_F_BLOCKING = 1 << 5, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, -- cgit v1.2.3 From 55679c8d23d191c24ad133abc5647e3054ca8de1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Fri, 23 Sep 2016 09:07:56 -0700 Subject: blkcg: Annotate blkg_hint correctly Avoid that sparse complains about blkg_hint manipulations. Fixes: a637120e4902 ("blkcg: use radix tree to index blkgs from blkcg") Signed-off-by: Bart Van Assche Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 10648e300c93..cbdbf34de5b6 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -45,7 +45,7 @@ struct blkcg { spinlock_t lock; struct radix_tree_root blkg_tree; - struct blkcg_gq *blkg_hint; + struct blkcg_gq __rcu *blkg_hint; struct hlist_head blkg_list; struct blkcg_policy_data *cpd[BLKCG_MAX_POLS]; -- cgit v1.2.3