From 190470871ae28da7bdb3909f6124385c8472fc97 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Sun, 17 Mar 2019 18:01:08 +0800 Subject: block: put the same page when adding it to bio When the added page is merged to last same page in bio_add_pc_page(), the user may need to put this page for avoiding page leak. bio_map_user_iov() needs this kind of handling, and now it deals with it by itself in hack style. Moves the handling of put page into __bio_add_pc_page(), so bio_map_user_iov() may be simplified a bit, and maybe more users can benefit from this change. Cc: Omar Sandoval Cc: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index bb6090aa165d..bb915591557b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -432,6 +432,9 @@ void bio_chain(struct bio *, struct bio *); extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); +extern int __bio_add_pc_page(struct request_queue *, struct bio *, + struct page *, unsigned int, unsigned int, + bool); bool __bio_try_merge_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off, bool same_page); void __bio_add_page(struct bio *bio, struct page *page, -- cgit v1.2.3 From 2b24e6f63ac9e817630424c6d8f008256348dfc4 Mon Sep 17 00:00:00 2001 From: Johannes Thumshirn Date: Wed, 3 Apr 2019 11:15:19 +0200 Subject: block: bio: ensure newly added bio flags don't override BVEC_POOL_IDX With the introduction of BIO_NO_PAGE_REF we've used up all available bits in bio::bi_flags. Convert the defines of the flags to an enum and add a BUILD_BUG_ON() call to make sure no-one adds a new one and thus overrides the BVEC_POOL_IDX causing crashes. Reviewed-by: Ming Lei Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Christoph Hellwig Signed-off-by: Johannes Thumshirn Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 29 ++++++++++++++++------------- 1 file changed, 16 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 791fee35df88..be418275763c 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -215,21 +215,24 @@ struct bio { /* * bio flags */ -#define BIO_NO_PAGE_REF 0 /* don't put release vec pages */ -#define BIO_SEG_VALID 1 /* bi_phys_segments valid */ -#define BIO_CLONED 2 /* doesn't own data */ -#define BIO_BOUNCED 3 /* bio is a bounce bio */ -#define BIO_USER_MAPPED 4 /* contains user pages */ -#define BIO_NULL_MAPPED 5 /* contains invalid user pages */ -#define BIO_QUIET 6 /* Make BIO Quiet */ -#define BIO_CHAIN 7 /* chained bio, ->bi_remaining in effect */ -#define BIO_REFFED 8 /* bio has elevated ->bi_cnt */ -#define BIO_THROTTLED 9 /* This bio has already been subjected to +enum { + BIO_NO_PAGE_REF, /* don't put release vec pages */ + BIO_SEG_VALID, /* bi_phys_segments valid */ + BIO_CLONED, /* doesn't own data */ + BIO_BOUNCED, /* bio is a bounce bio */ + BIO_USER_MAPPED, /* contains user pages */ + BIO_NULL_MAPPED, /* contains invalid user pages */ + BIO_QUIET, /* Make BIO Quiet */ + BIO_CHAIN, /* chained bio, ->bi_remaining in effect */ + BIO_REFFED, /* bio has elevated ->bi_cnt */ + BIO_THROTTLED, /* This bio has already been subjected to * throttling rules. Don't do it again. */ -#define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion + BIO_TRACE_COMPLETION, /* bio_endio() should trace the final completion * of this bio. */ -#define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */ -#define BIO_TRACKED 12 /* set if bio goes through the rq_qos path */ + BIO_QUEUE_ENTERED, /* can use blk_queue_enter_live() */ + BIO_TRACKED, /* set if bio goes through the rq_qos path */ + BIO_FLAG_LAST +}; /* See BVEC_POOL_OFFSET below before adding new flags */ -- cgit v1.2.3 From 3aef3cae4342c1d8137a1c0782cbb66f1be3943c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 09:14:01 -0700 Subject: block: add a req_bvec helper Return the currently active bvec segment, potentially spanning multiple pages. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- include/linux/blkdev.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5c58a3b2bf00..84ce76f92d83 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -932,6 +932,17 @@ static inline unsigned int blk_rq_payload_bytes(struct request *rq) return blk_rq_bytes(rq); } +/* + * Return the first full biovec in the request. The caller needs to check that + * there are any bvecs before calling this helper. + */ +static inline struct bio_vec req_bvec(struct request *rq) +{ + if (rq->rq_flags & RQF_SPECIAL_PAYLOAD) + return rq->special_vec; + return mp_bvec_iter_bvec(rq->bio->bi_io_vec, rq->bio->bi_iter); +} + static inline unsigned int blk_queue_get_max_sectors(struct request_queue *q, int op) { -- cgit v1.2.3 From 2a876f5e25e8ec9fa5777d36e5695ee33dd63f6f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:38:29 -0700 Subject: block: add a rq_integrity_vec helper This provides a nice little shortcut to get the integrity data for drivers like NVMe that only support a single integrity segment. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- include/linux/blkdev.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 84ce76f92d83..3a13fbe13e08 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1559,6 +1559,17 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, return bio_integrity_intervals(bi, sectors) * bi->tuple_size; } +/* + * Return the first bvec that contains integrity data. Only drivers that are + * limited to a single integrity segment should use this helper. + */ +static inline struct bio_vec *rq_integrity_vec(struct request *rq) +{ + if (WARN_ON_ONCE(queue_max_integrity_segments(rq->q) > 1)) + return NULL; + return rq->bio->bi_integrity->bip_vec; +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ struct bio; @@ -1633,6 +1644,11 @@ static inline unsigned int bio_integrity_bytes(struct blk_integrity *bi, return 0; } +static inline struct bio_vec *rq_integrity_vec(struct request *rq) +{ + return NULL; +} + #endif /* CONFIG_BLK_DEV_INTEGRITY */ struct block_device_operations { -- cgit v1.2.3 From 9d9de535f385a8b3ba0e88ca0abf386c5704bbfc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:18:30 -0700 Subject: block: add a rq_dma_dir helper In a lot of places we want to know the DMA direction for a given struct request. Add a little helper to make it a littler easier. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3a13fbe13e08..74469a4dc0a1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -641,6 +641,9 @@ static inline bool blk_account_rq(struct request *rq) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) +#define rq_dma_dir(rq) \ + (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) + static inline bool queue_is_mq(struct request_queue *q) { return q->mq_ops; -- cgit v1.2.3 From 3ab3a0313cb8c50391d74e40fd46a3408d8e4de9 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 3 Mar 2019 08:40:36 -0700 Subject: block: add dma_map_bvec helper Provide a nice little shortcut for mapping a single bvec. Signed-off-by: Christoph Hellwig Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 74469a4dc0a1..4b85dc066264 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -644,6 +644,10 @@ static inline bool blk_account_rq(struct request *rq) #define rq_dma_dir(rq) \ (op_is_write(req_op(rq)) ? DMA_TO_DEVICE : DMA_FROM_DEVICE) +#define dma_map_bvec(dev, bv, dir, attrs) \ + dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ + (dir), (attrs)) + static inline bool queue_is_mq(struct request_queue *q) { return q->mq_ops; -- cgit v1.2.3 From 72deb455b5ec619ff043c30bc90025aa3de3cdda Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 5 Apr 2019 18:08:59 +0200 Subject: block: remove CONFIG_LBDAF Currently support for 64-bit sector_t and blkcnt_t is optional on 32-bit architectures. These types are required to support block device and/or file sizes larger than 2 TiB, and have generally defaulted to on for a long time. Enabling the option only increases the i386 tinyconfig size by 145 bytes, and many data structures already always use 64-bit values for their in-core and on-disk data structures anyway, so there should not be a large change in dynamic memory usage either. Dropping this option removes a somewhat weird non-default config that has cause various bugs or compiler warnings when actually used. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/genhd.h | 8 ++++---- include/linux/kernel.h | 14 ++------------ include/linux/types.h | 5 ----- 3 files changed, 6 insertions(+), 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 06c0fd594097..98076b1b5e48 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -714,7 +714,7 @@ static inline void hd_free_part(struct hd_struct *part) */ static inline sector_t part_nr_sects_read(struct hd_struct *part) { -#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP) +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) sector_t nr_sects; unsigned seq; do { @@ -722,7 +722,7 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part) nr_sects = part->nr_sects; } while (read_seqcount_retry(&part->nr_sects_seq, seq)); return nr_sects; -#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT) +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) sector_t nr_sects; preempt_disable(); @@ -741,11 +741,11 @@ static inline sector_t part_nr_sects_read(struct hd_struct *part) */ static inline void part_nr_sects_write(struct hd_struct *part, sector_t size) { -#if BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_SMP) +#if BITS_PER_LONG==32 && defined(CONFIG_SMP) write_seqcount_begin(&part->nr_sects_seq); part->nr_sects = size; write_seqcount_end(&part->nr_sects_seq); -#elif BITS_PER_LONG==32 && defined(CONFIG_LBDAF) && defined(CONFIG_PREEMPT) +#elif BITS_PER_LONG==32 && defined(CONFIG_PREEMPT) preempt_disable(); part->nr_sects = size; preempt_enable(); diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 34a5036debd3..24ef5a018a5e 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -17,6 +17,7 @@ #include #include #include +#include #define STACK_MAGIC 0xdeadbeef @@ -175,18 +176,7 @@ #define _RET_IP_ (unsigned long)__builtin_return_address(0) #define _THIS_IP_ ({ __label__ __here; __here: (unsigned long)&&__here; }) -#ifdef CONFIG_LBDAF -# define sector_div(a, b) do_div(a, b) -#else -# define sector_div(n, b)( \ -{ \ - int _res; \ - _res = (n) % (b); \ - (n) /= (b); \ - _res; \ -} \ -) -#endif +#define sector_div(a, b) do_div(a, b) /** * upper_32_bits - return bits 32-63 of a number diff --git a/include/linux/types.h b/include/linux/types.h index cc0dbbe551d5..231114ae38f4 100644 --- a/include/linux/types.h +++ b/include/linux/types.h @@ -127,13 +127,8 @@ typedef s64 int64_t; * * blkcnt_t is the type of the inode's block count. */ -#ifdef CONFIG_LBDAF typedef u64 sector_t; typedef u64 blkcnt_t; -#else -typedef unsigned long sector_t; -typedef unsigned long blkcnt_t; -#endif /* * The type of an index into the pagecache. -- cgit v1.2.3 From 7321ecbfc7cf85211460a1dc6bb0ccfc3dcf9df0 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Apr 2019 08:23:30 +0200 Subject: block: change how we get page references in bio_iov_iter_get_pages Instead of needing a special macro to iterate over all pages in a bvec just do a second passs over the whole bio. This also matches what we do on the release side. The release side helper is moved up to where we need the get helper to clearly express the symmetry. Reviewed-by: Ming Lei Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bvec.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index f6275c4da13a..307bbda62b7b 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -189,9 +189,4 @@ static inline void mp_bvec_last_segment(const struct bio_vec *bvec, } } -#define mp_bvec_for_each_page(pg, bv, i) \ - for (i = (bv)->bv_offset / PAGE_SIZE; \ - (i <= (((bv)->bv_offset + (bv)->bv_len - 1) / PAGE_SIZE)) && \ - (pg = bvec_nth_page((bv)->bv_page, i)); i += 1) - #endif /* __LINUX_BVEC_ITER_H */ -- cgit v1.2.3 From 52d52d1c98a90cfe860b83498e4b6074aad95c15 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 11 Apr 2019 08:23:31 +0200 Subject: block: only allow contiguous page structs in a bio_vec We currently have to call nth_page when iterating over pages inside a bio_vec. Jens complained a while ago that this is fairly expensive. To mitigate this we can check that that the actual page structures are contiguous when adding them to the bio, and just do check pointer arithmetics later on. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bvec.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 307bbda62b7b..44b0f4684190 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -51,11 +51,6 @@ struct bvec_iter_all { unsigned done; }; -static inline struct page *bvec_nth_page(struct page *page, int idx) -{ - return idx == 0 ? page : nth_page(page, idx); -} - /* * various member access, note that bio_data should of course not be used * on highmem page vectors @@ -92,8 +87,8 @@ static inline struct page *bvec_nth_page(struct page *page, int idx) PAGE_SIZE - bvec_iter_offset((bvec), (iter))) #define bvec_iter_page(bvec, iter) \ - bvec_nth_page(mp_bvec_iter_page((bvec), (iter)), \ - mp_bvec_iter_page_idx((bvec), (iter))) + (mp_bvec_iter_page((bvec), (iter)) + \ + mp_bvec_iter_page_idx((bvec), (iter))) #define bvec_iter_bvec(bvec, iter) \ ((struct bio_vec) { \ @@ -157,7 +152,7 @@ static inline void mp_bvec_next_segment(const struct bio_vec *bvec, struct bio_vec *bv = &iter_all->bv; if (bv->bv_page) { - bv->bv_page = nth_page(bv->bv_page, 1); + bv->bv_page++; bv->bv_offset = 0; } else { bv->bv_page = bvec->bv_page; @@ -177,7 +172,7 @@ static inline void mp_bvec_last_segment(const struct bio_vec *bvec, unsigned total = bvec->bv_offset + bvec->bv_len; unsigned last_page = (total - 1) / PAGE_SIZE; - seg->bv_page = bvec_nth_page(bvec->bv_page, last_page); + seg->bv_page = bvec->bv_page + last_page; /* the whole segment is inside the last page */ if (bvec->bv_offset >= last_page * PAGE_SIZE) { -- cgit v1.2.3 From 673387a930059fc4ad8060847a1d46f94e702281 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 27 Mar 2019 14:51:01 +0100 Subject: block: genhd: remove async_events field The async_events field, intended to be used for drivers that support asynchronous notifications about disk events (aka media change events), isn't currently used by any driver, and apparently that has been that way for a long time (if not forever). Remove it. Reviewed-by: Hannes Reinecke Reviewed-by: Christoph Hellwig Signed-off-by: Martin Wilck Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 98076b1b5e48..a65d9fc17369 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -185,7 +185,6 @@ struct gendisk { char *(*devnode)(struct gendisk *gd, umode_t *mode); unsigned int events; /* supported events */ - unsigned int async_events; /* async events, subset of all */ /* Array of pointers to partitions indexed by partno. * Protected with matching bdev lock but stat and other -- cgit v1.2.3 From c92e2f04b35938da23eb9a7f7101cbdd5ac7cdc4 Mon Sep 17 00:00:00 2001 From: Martin Wilck Date: Wed, 27 Mar 2019 14:51:02 +0100 Subject: block: disk_events: introduce event flags Currently, an empty disk->events field tells the block layer not to forward media change events to user space. This was done in commit 7c88a168da80 ("block: don't propagate unlisted DISK_EVENTs to userland") in order to avoid events from "fringe" drivers to be forwarded to user space. By doing so, the block layer lost the information which events were supported by a particular block device, and most importantly, whether or not a given device supports media change events at all. Prepare for not interpreting the "events" field this way in the future any more. This is done by adding an additional field "event_flags" to struct gendisk, and two flag bits that can be set to have the device treated like one that had the "events" field set to a non-zero value before. This applies only to the sd and sr drivers, which are changed to set the new flags. The new flags are DISK_EVENT_FLAG_POLL to enforce polling of the device for synchronous events, and DISK_EVENT_FLAG_UEVENT to tell the blocklayer to generate udev events from kernel events. In order to add the event_flags field to struct gendisk, the events field is converted to an "unsigned short"; it doesn't need to hold values bigger than 2 anyway. This patch doesn't change behavior. Reviewed-by: Christoph Hellwig Signed-off-by: Martin Wilck Signed-off-by: Jens Axboe --- include/linux/genhd.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index a65d9fc17369..6547c9256d5c 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -150,6 +150,13 @@ enum { DISK_EVENT_EJECT_REQUEST = 1 << 1, /* eject requested */ }; +enum { + /* Poll even if events_poll_msecs is unset */ + DISK_EVENT_FLAG_POLL = 1 << 0, + /* Forward events to udev */ + DISK_EVENT_FLAG_UEVENT = 1 << 1, +}; + struct disk_part_tbl { struct rcu_head rcu_head; int len; @@ -184,7 +191,8 @@ struct gendisk { char disk_name[DISK_NAME_LEN]; /* name of major driver */ char *(*devnode)(struct gendisk *gd, umode_t *mode); - unsigned int events; /* supported events */ + unsigned short events; /* supported events */ + unsigned short event_flags; /* flags related to event processing */ /* Array of pointers to partitions indexed by partno. * Protected with matching bdev lock but stat and other -- cgit v1.2.3 From 6fcc44d1d77fea3c7230e4d109b37f6977aa675a Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Tue, 2 Apr 2019 20:06:34 +0800 Subject: block: fix use-after-free on gendisk commit 2da78092dda "block: Fix dev_t minor allocation lifetime" specifically moved blk_free_devt(dev->devt) call to part_release() to avoid reallocating device number before the device is fully shutdown. However, it can cause use-after-free on gendisk in get_gendisk(). We use md device as example to show the race scenes: Process1 Worker Process2 md_free blkdev_open del_gendisk add delete_partition_work_fn() to wq __blkdev_get get_gendisk put_disk disk_release kfree(disk) find part from ext_devt_idr get_disk_and_module(disk) cause use after free delete_partition_work_fn put_device(part) part_release remove part from ext_devt_idr Before is removed from ext_devt_idr by delete_partition_work_fn(), we can find the devt and then access gendisk by hd_struct pointer. But, if we access the gendisk after it have been freed, it can cause in use-after-freeon gendisk in get_gendisk(). We fix this by adding a new helper blk_invalidate_devt() in delete_partition() and del_gendisk(). It replaces hd_struct pointer in idr with value 'NULL', and deletes the entry from idr in part_release() as we do now. Thanks to Jan Kara for providing the solution and more clear comments for the code. Fixes: 2da78092dda1 ("block: Fix dev_t minor allocation lifetime") Cc: Al Viro Reviewed-by: Bart Van Assche Reviewed-by: Keith Busch Reviewed-by: Jan Kara Suggested-by: Jan Kara Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 6547c9256d5c..8b5330dd5ac0 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -617,6 +617,7 @@ struct unixware_disklabel { extern int blk_alloc_devt(struct hd_struct *part, dev_t *devt); extern void blk_free_devt(dev_t devt); +extern void blk_invalidate_devt(dev_t devt); extern dev_t blk_lookup_devt(const char *name, int partno); extern char *disk_name (struct gendisk *hd, int partno, char *buf); -- cgit v1.2.3 From 82bebbde02e24ad7b641eca25e632f32579ed52f Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 10 Apr 2019 23:48:59 +0900 Subject: nvme-rdma: fix typo in struct comment struct nvme_rdma_cm_rej has two different attributes: recfmt and sts. And sts will have value what this comment wanted to show. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig --- include/linux/nvme-rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme-rdma.h b/include/linux/nvme-rdma.h index 3aa97b98dc89..3ec8e50efa16 100644 --- a/include/linux/nvme-rdma.h +++ b/include/linux/nvme-rdma.h @@ -77,7 +77,7 @@ struct nvme_rdma_cm_rep { * struct nvme_rdma_cm_rej - rdma connect reject * * @recfmt: format of the RDMA Private Data - * @fsts: error status for the associated connect request + * @sts: error status for the associated connect request */ struct nvme_rdma_cm_rej { __le16 recfmt; -- cgit v1.2.3 From 2b070cfe582b8e99fec6ada57d2e59e194aae202 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 25 Apr 2019 09:03:00 +0200 Subject: block: remove the i argument to bio_for_each_segment_all We only have two callers that need the integer loop iterator, and they can easily maintain it themselves. Suggested-by: Matthew Wilcox Reviewed-by: Johannes Thumshirn Acked-by: David Sterba Reviewed-by: Hannes Reinecke Acked-by: Coly Li Reviewed-by: Matthew Wilcox Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 9577ad8f6e28..186b2723c61b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -134,9 +134,8 @@ static inline bool bio_next_segment(const struct bio *bio, * drivers should _never_ use the all version - the bio may have been split * before it got to the driver and the driver won't own all of it */ -#define bio_for_each_segment_all(bvl, bio, i, iter) \ - for (i = 0, bvl = bvec_init_iter_all(&iter); \ - bio_next_segment((bio), &iter); i++) +#define bio_for_each_segment_all(bvl, bio, iter) \ + for (bvl = bvec_init_iter_all(&iter); bio_next_segment((bio), &iter); ) static inline void bio_advance_iter(struct bio *bio, struct bvec_iter *iter, unsigned bytes) -- cgit v1.2.3 From 4713839dfe8269d27d83a33d1e39f9c2970eb31a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 25 Apr 2019 09:04:33 +0200 Subject: block: remove the __bio_add_pc_page export The same page optimization is a rather odd corner case, which is not used outside bio.c and which really should not be used outside of bio.c either - we have better highlevel helpers like the rq/bio mapping helpers. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 186b2723c61b..077cecdf9437 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -435,9 +435,6 @@ void bio_chain(struct bio *, struct bio *); extern int bio_add_page(struct bio *, struct page *, unsigned int,unsigned int); extern int bio_add_pc_page(struct request_queue *, struct bio *, struct page *, unsigned int, unsigned int); -extern int __bio_add_pc_page(struct request_queue *, struct bio *, - struct page *, unsigned int, unsigned int, - bool); bool __bio_try_merge_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off, bool same_page); void __bio_add_page(struct bio *bio, struct page *page, -- cgit v1.2.3 From 8c16567d867ed3185a67d8560e051090486d3ff1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 30 Apr 2019 14:42:39 -0400 Subject: block: switch all files cleared marked as GPLv2 to SPDX tags All these files have some form of the usual GPLv2 boilerplate. Switch them to use SPDX tags instead. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 15 +-------------- include/linux/bvec.h | 15 +-------------- include/linux/sed-opal.h | 10 +--------- 3 files changed, 3 insertions(+), 37 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 077cecdf9437..ea73df36529a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -1,19 +1,6 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright (C) 2001 Jens Axboe - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- */ #ifndef __LINUX_BIO_H #define __LINUX_BIO_H diff --git a/include/linux/bvec.h b/include/linux/bvec.h index a4811410e4fc..545a480528e0 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -1,21 +1,8 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * bvec iterator * * Copyright (C) 2001 Ming Lei - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License version 2 as - * published by the Free Software Foundation. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public Licens - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111- */ #ifndef __LINUX_BVEC_ITER_H #define __LINUX_BVEC_ITER_H diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 04b124fca51e..3e76b6d7d97f 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -1,18 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0 */ /* * Copyright © 2016 Intel Corporation * * Authors: * Rafael Antognolli * Scott Bauer - * - * This program is free software; you can redistribute it and/or modify it - * under the terms and conditions of the GNU General Public License, - * version 2, as published by the Free Software Foundation. - * - * This program is distributed in the hope it will be useful, but WITHOUT - * ANY WARRANTY; without even the implied warranty of MERCHANTABILITY or - * FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public License for - * more details. */ #ifndef LINUX_OPAL_H -- cgit v1.2.3 From a497ee34a45d58e9b978d0fa5c4b25d4813eb350 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 30 Apr 2019 14:42:40 -0400 Subject: block: switch all files cleared marked as GPLv2 or later to SPDX tags All these files have some form of the usual GPLv2 or later boilerplate. Switch them to use SPDX tags instead. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bsg-lib.h | 16 +--------------- 1 file changed, 1 insertion(+), 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 7f14517a559b..960988d42f77 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -1,24 +1,10 @@ +/* SPDX-License-Identifier: GPL-2.0-or-later */ /* * BSG helper library * * Copyright (C) 2008 James Smart, Emulex Corporation * Copyright (C) 2011 Red Hat, Inc. All rights reserved. * Copyright (C) 2011 Mike Christie - * - * This program is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * This program is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with this program; if not, write to the Free Software - * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - * */ #ifndef _BLK_BSG_ #define _BLK_BSG_ -- cgit v1.2.3 From 6353599813158c02227d8086acb20e07f3306452 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 30 Apr 2019 14:42:42 -0400 Subject: block: add a SPDX tag to blk-mq-rdma.h This file has no copyright notice, but was added as part of a commit adding another file using the default kernel GPLv2 license. Add a matching SPDX tag. Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq-rdma.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h index 7b6ecf9ac4c3..5cc5f0f36218 100644 --- a/include/linux/blk-mq-rdma.h +++ b/include/linux/blk-mq-rdma.h @@ -1,3 +1,4 @@ +/* SPDX-License-Identifier: GPL-2.0 */ #ifndef _LINUX_BLK_MQ_RDMA_H #define _LINUX_BLK_MQ_RDMA_H -- cgit v1.2.3 From 12adb7a013e318de553ccee4a006a718667972b3 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 30 Apr 2019 13:56:16 -0400 Subject: block: remove the unused blk_queue_dma_pad function Reviewed-by: Chaitanya Kulkarni Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 99aa98f60b9e..bd3e3f09bfa0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1069,7 +1069,6 @@ extern int bdev_stack_limits(struct queue_limits *t, struct block_device *bdev, extern void disk_stack_limits(struct gendisk *disk, struct block_device *bdev, sector_t offset); extern void blk_queue_stack_limits(struct request_queue *t, struct request_queue *b); -extern void blk_queue_dma_pad(struct request_queue *, unsigned int); extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, -- cgit v1.2.3 From 2f8f1336a48bd5186de3476da0a3e2ec06d0533a Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 30 Apr 2019 09:52:27 +0800 Subject: blk-mq: always free hctx after request queue is freed In normal queue cleanup path, hctx is released after request queue is freed, see blk_mq_release(). However, in __blk_mq_update_nr_hw_queues(), hctx may be freed because of hw queues shrinking. This way is easy to cause use-after-free, because: one implicit rule is that it is safe to call almost all block layer APIs if the request queue is alive; and one hctx may be retrieved by one API, then the hctx can be freed by blk_mq_update_nr_hw_queues(); finally use-after-free is triggered. Fixes this issue by always freeing hctx after releasing request queue. If some hctxs are removed in blk_mq_update_nr_hw_queues(), introduce a per-queue list to hold them, then try to resuse these hctxs if numa node is matched. Cc: Dongli Zhang Cc: James Smart Cc: Bart Van Assche Cc: linux-scsi@vger.kernel.org, Cc: Martin K . Petersen , Cc: Christoph Hellwig , Cc: James E . J . Bottomley , Reviewed-by: Hannes Reinecke Tested-by: James Smart Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ include/linux/blkdev.h | 7 +++++++ 2 files changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index db29928de467..15d1aa53d96c 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -70,6 +70,8 @@ struct blk_mq_hw_ctx { struct dentry *sched_debugfs_dir; #endif + struct list_head hctx_list; + /* Must be the last member - see also blk_mq_hw_ctx_size(). */ struct srcu_struct srcu[0]; }; diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index bd3e3f09bfa0..1aafeb923e7b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -535,6 +535,13 @@ struct request_queue { struct mutex sysfs_lock; + /* + * for reusing dead hctx instance in case of updating + * nr_hw_queues + */ + struct list_head unused_hctx_list; + spinlock_t unused_hctx_lock; + atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) -- cgit v1.2.3 From b8753433fc611e23e31300e1d099001a08955c88 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 May 2019 08:53:35 +0200 Subject: block: fix mismerge in bvec_advance When Jens merged my commit to only allow contiguous page structs in a bio_vec with Ming's 5.1 fix to ensue the bvec length didn't overflow we failed to keep the removal of the expensive nth_page calls. This commits adds them back as intended. Fixes: 5c61ee2cd586 ("Merge tag 'v5.1-rc6' into for-5.2/block") Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bvec.h | 8 +------- 1 file changed, 1 insertion(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 545a480528e0..a032f01e928c 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -133,11 +133,6 @@ static inline struct bio_vec *bvec_init_iter_all(struct bvec_iter_all *iter_all) return &iter_all->bv; } -static inline struct page *bvec_nth_page(struct page *page, int idx) -{ - return idx == 0 ? page : nth_page(page, idx); -} - static inline void bvec_advance(const struct bio_vec *bvec, struct bvec_iter_all *iter_all) { @@ -147,8 +142,7 @@ static inline void bvec_advance(const struct bio_vec *bvec, bv->bv_page++; bv->bv_offset = 0; } else { - bv->bv_page = bvec_nth_page(bvec->bv_page, bvec->bv_offset / - PAGE_SIZE); + bv->bv_page = bvec->bv_page + (bvec->bv_offset >> PAGE_SHIFT); bv->bv_offset = bvec->bv_offset & ~PAGE_MASK; } bv->bv_len = min_t(unsigned int, PAGE_SIZE - bv->bv_offset, -- cgit v1.2.3