From 06447ae5e33bfbc5a777cc06d9854a31f3912833 Mon Sep 17 00:00:00 2001 From: Oliver Hartkopp Date: Wed, 14 Jul 2021 21:56:55 +0200 Subject: ioprio: move user space relevant ioprio bits to UAPI includes systemd added a modified copy of include/linux/ioprio.h into its code to get the relevant content definitions for the exposed ioprio_[get|set] system calls. Move the user space relevant ioprio bits to the UAPI includes to be able to use the ioprio_[get|set] syscalls as intended. Cc: Kay Sievers Cc: Greg Kroah-Hartman Cc: Jens Axboe Cc: linux-block@vger.kernel.org Signed-off-by: Oliver Hartkopp Link: https://lore.kernel.org/r/20210714195655.181943-1-socketcan@hartkopp.net Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 41 +---------------------------------------- 1 file changed, 1 insertion(+), 40 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index e9bfe6972aed..ef9ad4fb245f 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -6,46 +6,7 @@ #include #include -/* - * Gives us 8 prio classes with 13-bits of data for each class - */ -#define IOPRIO_CLASS_SHIFT (13) -#define IOPRIO_PRIO_MASK ((1UL << IOPRIO_CLASS_SHIFT) - 1) - -#define IOPRIO_PRIO_CLASS(mask) ((mask) >> IOPRIO_CLASS_SHIFT) -#define IOPRIO_PRIO_DATA(mask) ((mask) & IOPRIO_PRIO_MASK) -#define IOPRIO_PRIO_VALUE(class, data) (((class) << IOPRIO_CLASS_SHIFT) | data) - -#define ioprio_valid(mask) (IOPRIO_PRIO_CLASS((mask)) != IOPRIO_CLASS_NONE) - -/* - * These are the io priority groups as implemented by CFQ. RT is the realtime - * class, it always gets premium service. BE is the best-effort scheduling - * class, the default for any process. IDLE is the idle scheduling class, it - * is only served when no one else is using the disk. - */ -enum { - IOPRIO_CLASS_NONE, - IOPRIO_CLASS_RT, - IOPRIO_CLASS_BE, - IOPRIO_CLASS_IDLE, -}; - -/* - * 8 best effort priority levels are supported - */ -#define IOPRIO_BE_NR (8) - -enum { - IOPRIO_WHO_PROCESS = 1, - IOPRIO_WHO_PGRP, - IOPRIO_WHO_USER, -}; - -/* - * Fallback BE priority - */ -#define IOPRIO_NORM (4) +#include /* * if process has set io priority explicitly, use that. if not, convert -- cgit v1.2.3 From e45cef51dba9765a6e1df1be724f3d26323512c8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Jul 2021 07:56:33 +0200 Subject: bvec: fix the include guards for bvec.h Fix the include guards to match the file naming. Signed-off-by: Christoph Hellwig Reviewed-by: Bart Van Assche Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/20210727055646.118787-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bvec.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index ff832e698efb..883faf5f1523 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -4,8 +4,8 @@ * * Copyright (C) 2001 Ming Lei */ -#ifndef __LINUX_BVEC_ITER_H -#define __LINUX_BVEC_ITER_H +#ifndef __LINUX_BVEC_H +#define __LINUX_BVEC_H #include #include @@ -183,4 +183,4 @@ static inline void bvec_advance(const struct bio_vec *bvec, } } -#endif /* __LINUX_BVEC_ITER_H */ +#endif /* __LINUX_BVEC_H */ -- cgit v1.2.3 From e6e7471706dc42cbe0e01278540c0730138d43e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Jul 2021 07:56:34 +0200 Subject: bvec: add a bvec_kmap_local helper Add a helper to call kmap_local_page on a bvec. There is no need for an unmap helper given that kunmap_local accept any address in the mapped page. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/20210727055646.118787-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bvec.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index 883faf5f1523..f8710af18eef 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -7,6 +7,7 @@ #ifndef __LINUX_BVEC_H #define __LINUX_BVEC_H +#include #include #include #include @@ -183,4 +184,16 @@ static inline void bvec_advance(const struct bio_vec *bvec, } } +/** + * bvec_kmap_local - map a bvec into the kernel virtual address space + * @bvec: bvec to map + * + * Must be called on single-page bvecs only. Call kunmap_local on the returned + * address to unmap. + */ +static inline void *bvec_kmap_local(struct bio_vec *bvec) +{ + return kmap_local_page(bvec->bv_page) + bvec->bv_offset; +} + #endif /* __LINUX_BVEC_H */ -- cgit v1.2.3 From f93a181af40b159aabea2ccf1a0496e9280be2d5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Jul 2021 07:56:35 +0200 Subject: bvec: add memcpy_{from,to}_bvec and memzero_bvec helper Add helpers to perform common memory operation on a bvec. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Reviewed-by: Ira Weiny Link: https://lore.kernel.org/r/20210727055646.118787-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bvec.h | 33 +++++++++++++++++++++++++++++++++ 1 file changed, 33 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index f8710af18eef..f9fa43b940ff 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -196,4 +196,37 @@ static inline void *bvec_kmap_local(struct bio_vec *bvec) return kmap_local_page(bvec->bv_page) + bvec->bv_offset; } +/** + * memcpy_from_bvec - copy data from a bvec + * @bvec: bvec to copy from + * + * Must be called on single-page bvecs only. + */ +static inline void memcpy_from_bvec(char *to, struct bio_vec *bvec) +{ + memcpy_from_page(to, bvec->bv_page, bvec->bv_offset, bvec->bv_len); +} + +/** + * memcpy_to_bvec - copy data to a bvec + * @bvec: bvec to copy to + * + * Must be called on single-page bvecs only. + */ +static inline void memcpy_to_bvec(struct bio_vec *bvec, const char *from) +{ + memcpy_to_page(bvec->bv_page, bvec->bv_offset, from, bvec->bv_len); +} + +/** + * memzero_bvec - zero all data in a bvec + * @bvec: bvec to zero + * + * Must be called on single-page bvecs only. + */ +static inline void memzero_bvec(struct bio_vec *bvec) +{ + memzero_page(bvec->bv_page, bvec->bv_offset, bvec->bv_len); +} + #endif /* __LINUX_BVEC_H */ -- cgit v1.2.3 From bda135d9c03fae64c910a8c8d751eccd8408f400 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 27 Jul 2021 07:56:40 +0200 Subject: block: remove bvec_kmap_irq and bvec_kunmap_irq These two helpers are entirely unused now. Signed-off-by: Christoph Hellwig Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20210727055646.118787-10-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bio.h | 42 ------------------------------------------ 1 file changed, 42 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 2203b686e1f0..7b5f65a81f2b 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -5,7 +5,6 @@ #ifndef __LINUX_BIO_H #define __LINUX_BIO_H -#include #include #include /* struct bio, bio_vec and BIO_* flags are defined in blk_types.h */ @@ -519,47 +518,6 @@ static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ -#ifdef CONFIG_HIGHMEM -/* - * remember never ever reenable interrupts between a bvec_kmap_irq and - * bvec_kunmap_irq! - */ -static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) -{ - unsigned long addr; - - /* - * might not be a highmem page, but the preempt/irq count - * balancing is a lot nicer this way - */ - local_irq_save(*flags); - addr = (unsigned long) kmap_atomic(bvec->bv_page); - - BUG_ON(addr & ~PAGE_MASK); - - return (char *) addr + bvec->bv_offset; -} - -static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) -{ - unsigned long ptr = (unsigned long) buffer & PAGE_MASK; - - kunmap_atomic((void *) ptr); - local_irq_restore(*flags); -} - -#else -static inline char *bvec_kmap_irq(struct bio_vec *bvec, unsigned long *flags) -{ - return page_address(bvec->bv_page) + bvec->bv_offset; -} - -static inline void bvec_kunmap_irq(char *buffer, unsigned long *flags) -{ - *flags = 0; -} -#endif - /* * BIO list management for use by remapping drivers (e.g. DM or MD) and loop. * -- cgit v1.2.3 From 14cf1dbb55bb07427babee425fd2a8a9300737cc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Jul 2021 09:54:01 +0200 Subject: block: remove bdgrab All callers are gone, and no one should grab a pure inode reference to a block device anymore. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Link: https://lore.kernel.org/r/20210722075402.983367-9-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d3afea47ade6..eb1289a58917 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1984,7 +1984,6 @@ void blkdev_put_no_open(struct block_device *bdev); struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); -struct block_device *bdgrab(struct block_device *bdev); void bdput(struct block_device *); int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, loff_t lend); -- cgit v1.2.3 From 2f4731dcd0bb73379fbb9e3eb07ae7324125caef Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 22 Jul 2021 09:54:02 +0200 Subject: block: remove bdput Now that we've stopped using inode references for anything meaninful in the block layer get rid of the helper to put it and just open code the call to iput on the block_device inode. Signed-off-by: Christoph Hellwig Reviewed-by: Josef Bacik Reviewed-by: Chaitanya Kulkarni Link: https://lore.kernel.org/r/20210722075402.983367-10-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index eb1289a58917..b5c033cf5f26 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1984,7 +1984,6 @@ void blkdev_put_no_open(struct block_device *bdev); struct block_device *bdev_alloc(struct gendisk *disk, u8 partno); void bdev_add(struct block_device *bdev, dev_t dev); struct block_device *I_BDEV(struct inode *inode); -void bdput(struct block_device *); int truncate_bdev_range(struct block_device *bdev, fmode_t mode, loff_t lstart, loff_t lend); -- cgit v1.2.3 From 2164877c7f373e14e55fca20b7c4a9c436fe4462 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 28 Jul 2021 07:37:56 +0200 Subject: block: remove cmdline-parser.c cmdline-parser.c is only used by the cmdline faux partition format, so merge the code into that and avoid an indirect call. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210728053756.409654-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/cmdline-parser.h | 46 ------------------------------------------ 1 file changed, 46 deletions(-) delete mode 100644 include/linux/cmdline-parser.h (limited to 'include/linux') diff --git a/include/linux/cmdline-parser.h b/include/linux/cmdline-parser.h deleted file mode 100644 index 68a541807bdf..000000000000 --- a/include/linux/cmdline-parser.h +++ /dev/null @@ -1,46 +0,0 @@ -/* SPDX-License-Identifier: GPL-2.0 */ -/* - * Parsing command line, get the partitions information. - * - * Written by Cai Zhiyong - * - */ -#ifndef CMDLINEPARSEH -#define CMDLINEPARSEH - -#include -#include -#include - -/* partition flags */ -#define PF_RDONLY 0x01 /* Device is read only */ -#define PF_POWERUP_LOCK 0x02 /* Always locked after reset */ - -struct cmdline_subpart { - char name[BDEVNAME_SIZE]; /* partition name, such as 'rootfs' */ - sector_t from; - sector_t size; - int flags; - struct cmdline_subpart *next_subpart; -}; - -struct cmdline_parts { - char name[BDEVNAME_SIZE]; /* block device, such as 'mmcblk0' */ - unsigned int nr_subparts; - struct cmdline_subpart *subpart; - struct cmdline_parts *next_parts; -}; - -void cmdline_parts_free(struct cmdline_parts **parts); - -int cmdline_parts_parse(struct cmdline_parts **parts, const char *cmdline); - -struct cmdline_parts *cmdline_parts_find(struct cmdline_parts *parts, - const char *bdev); - -int cmdline_parts_set(struct cmdline_parts *parts, sector_t disk_size, - int slot, - int (*add_part)(int, struct cmdline_subpart *, void *), - void *param); - -#endif /* CMDLINEPARSEH */ -- cgit v1.2.3 From cf179948554a2e0d2b622317bf6bf33138ac36e5 Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 13 Jul 2021 01:05:25 +0200 Subject: block: add disk sequence number Associating uevents with block devices in userspace is difficult and racy: the uevent netlink socket is lossy, and on slow and overloaded systems has a very high latency. Block devices do not have exclusive owners in userspace, any process can set one up (e.g. loop devices). Moreover, device names can be reused (e.g. loop0 can be reused again and again). A userspace process setting up a block device and watching for its events cannot thus reliably tell whether an event relates to the device it just set up or another earlier instance with the same name. Being able to set a UUID on a loop device would solve the race conditions. But it does not allow to derive orderings from uevents: if you see a uevent with a UUID that does not match the device you are waiting for, you cannot tell whether it's because the right uevent has not arrived yet, or it was already sent and you missed it. So you cannot tell whether you should wait for it or not. Associating a unique, monotonically increasing sequential number to the lifetime of each block device, which can be retrieved with an ioctl immediately upon setting it up, allows to solve the race conditions with uevents, and also allows userspace processes to know whether they should wait for the uevent they need or if it was dropped and thus they should move on. Additionally, increment the disk sequence number when the media change, i.e. on DISK_EVENT_MEDIA_CHANGE event. Reviewed-by: Christoph Hellwig Signed-off-by: Matteo Croce Tested-by: Luca Boccassi Link: https://lore.kernel.org/r/20210712230530.29323-2-mcroce@linux.microsoft.com Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 13b34177cc85..140c028845af 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -172,6 +172,7 @@ struct gendisk { int node_id; struct badblocks *bb; struct lockdep_map lockdep_map; + u64 diskseq; }; /* @@ -332,6 +333,7 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, #endif /* CONFIG_SYSFS */ dev_t part_devt(struct gendisk *disk, u8 partno); +void inc_diskseq(struct gendisk *disk); dev_t blk_lookup_devt(const char *name, int partno); void blk_request_module(dev_t devt); #ifdef CONFIG_BLOCK -- cgit v1.2.3 From e6138dc12de9df17cbda9c40314d69592855ac5e Mon Sep 17 00:00:00 2001 From: Matteo Croce Date: Tue, 13 Jul 2021 01:05:29 +0200 Subject: block: add a helper to raise a media changed event Refactor disk_check_events() and move some code into disk_event_uevent(). Then add disk_force_media_change(), a helper which will be used by devices to force issuing a DISK_EVENT_MEDIA_CHANGE event. Co-developed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig Signed-off-by: Matteo Croce Tested-by: Luca Boccassi Link: https://lore.kernel.org/r/20210712230530.29323-6-mcroce@linux.microsoft.com Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 140c028845af..849486de81c6 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -237,6 +237,7 @@ extern void disk_block_events(struct gendisk *disk); extern void disk_unblock_events(struct gendisk *disk); extern void disk_flush_events(struct gendisk *disk, unsigned int mask); bool set_capacity_and_notify(struct gendisk *disk, sector_t size); +bool disk_force_media_change(struct gendisk *disk, unsigned int events); /* drivers/char/random.c */ extern void add_disk_randomness(struct gendisk *disk) __latent_entropy; -- cgit v1.2.3 From 90b7198001f23ea37d3b46dc631bdaa2357a20b1 Mon Sep 17 00:00:00 2001 From: Bart Van Assche Date: Thu, 5 Aug 2021 10:41:59 -0700 Subject: blk-mq: Introduce the BLK_MQ_F_NO_SCHED_BY_DEFAULT flag elevator_get_default() uses the following algorithm to select an I/O scheduler from inside add_disk(): - In case of a single hardware queue or if sharing hardware queues across multiple request queues (BLK_MQ_F_TAG_HCTX_SHARED), use mq-deadline. - Otherwise, use 'none'. This is a good choice for most but not for all block drivers. Make it possible to override the selection of mq-deadline with a new flag, namely BLK_MQ_F_NO_SCHED_BY_DEFAULT. Cc: Christoph Hellwig Cc: Ming Lei Cc: Tetsuo Handa Cc: Martijn Coenen Cc: Jaegeuk Kim Signed-off-by: Bart Van Assche Link: https://lore.kernel.org/r/20210805174200.3250718-2-bvanassche@acm.org Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 1d18447ebebc..22215db36122 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -404,7 +404,13 @@ enum { BLK_MQ_F_STACKING = 1 << 2, BLK_MQ_F_TAG_HCTX_SHARED = 1 << 3, BLK_MQ_F_BLOCKING = 1 << 5, + /* Do not allow an I/O scheduler to be configured. */ BLK_MQ_F_NO_SCHED = 1 << 6, + /* + * Select 'none' during queue registration in case of a single hwq + * or shared hwqs instead of 'mq-deadline'. + */ + BLK_MQ_F_NO_SCHED_BY_DEFAULT = 1 << 7, BLK_MQ_F_ALLOC_POLICY_START_BIT = 8, BLK_MQ_F_ALLOC_POLICY_BITS = 1, -- cgit v1.2.3 From c66fd019713e9cf7d6f1243c378cd177d01fe18a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Aug 2021 11:41:40 +0200 Subject: block: make the block holder code optional Move the block holder code into a separate file as it is not in any way related to the other block_dev.c code, and add a new selectable config option for it so that we don't have to build it without any remapped drivers selected. The Kconfig symbol contains a _DEPRECATED suffix to match the comments added in commit 49731baa41df ("block: restore multiple bd_link_disk_holder() support"). Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20210804094147.459763-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 +- include/linux/genhd.h | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 290f9061b29a..7a4e139d24ef 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -34,7 +34,7 @@ struct block_device { void * bd_holder; int bd_holders; bool bd_write_holder; -#ifdef CONFIG_SYSFS +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED struct list_head bd_holder_disks; #endif struct kobject *bd_holder_dir; diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 849486de81c6..e21a91c16a79 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -318,7 +318,7 @@ void set_capacity(struct gendisk *disk, sector_t size); int blkdev_ioctl(struct block_device *, fmode_t, unsigned, unsigned long); long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); -#ifdef CONFIG_SYSFS +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); #else @@ -331,7 +331,7 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { } -#endif /* CONFIG_SYSFS */ +#endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ dev_t part_devt(struct gendisk *disk, u8 partno); void inc_diskseq(struct gendisk *disk); -- cgit v1.2.3 From 0dbcfe247f22a6d73302dfa691c48b3c14d31c4c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Aug 2021 11:41:42 +0200 Subject: block: look up holders by bdev Invert they way the holder relations are tracked. This very slightly reduces the memory overhead for partitioned devices. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210804094147.459763-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 --- include/linux/genhd.h | 4 +++- 2 files changed, 3 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 7a4e139d24ef..e92735655684 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -34,9 +34,6 @@ struct block_device { void * bd_holder; int bd_holders; bool bd_write_holder; -#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED - struct list_head bd_holder_disks; -#endif struct kobject *bd_holder_dir; u8 bd_partno; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ diff --git a/include/linux/genhd.h b/include/linux/genhd.h index e21a91c16a79..0721807d76ee 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -159,7 +159,9 @@ struct gendisk { unsigned open_partitions; /* number of open partitions */ struct kobject *slave_dir; - +#ifdef CONFIG_BLOCK_HOLDER_DEPRECATED + struct list_head slave_bdevs; +#endif struct timer_rand_state *random; atomic_t sync_io; /* RAID */ struct disk_events *ev; -- cgit v1.2.3 From d626338735909bc2b2e7cafc332f44ed41cfdeee Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Aug 2021 11:41:43 +0200 Subject: block: support delayed holder registration device mapper needs to register holders before it is ready to do I/O. Currently it does so by registering the disk early, which can leave the disk and queue in a weird half state where the queue is registered with the disk, except for sysfs and the elevator. And this state has been a bit promlematic before, and will get more so when sorting out the responsibilities between the queue and the disk. Support registering holders on an initialized but not registered disk instead by delaying the sysfs registration until the disk is registered. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20210804094147.459763-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0721807d76ee..80952f038d79 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -323,6 +323,7 @@ long compat_blkdev_ioctl(struct file *, unsigned, unsigned long); #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk); void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk); +int bd_register_pending_holders(struct gendisk *disk); #else static inline int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk) @@ -333,6 +334,10 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk) { } +static inline int bd_register_pending_holders(struct gendisk *disk) +{ + return 0; +} #endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */ dev_t part_devt(struct gendisk *disk, u8 partno); -- cgit v1.2.3 From d1254a8749711e0d7441036a74ce592341f89697 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Aug 2021 11:41:47 +0200 Subject: block: remove support for delayed queue registrations Now that device mapper has been changed to register the disk once it is fully ready all this code is unused. Signed-off-by: Christoph Hellwig Reviewed-by: Mike Snitzer Link: https://lore.kernel.org/r/20210804094147.459763-9-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 80952f038d79..473d93c6ebda 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -219,12 +219,6 @@ static inline void add_disk(struct gendisk *disk) { device_add_disk(NULL, disk, NULL); } -extern void device_add_disk_no_queue_reg(struct device *parent, struct gendisk *disk); -static inline void add_disk_no_queue_reg(struct gendisk *disk) -{ - device_add_disk_no_queue_reg(NULL, disk); -} - extern void del_gendisk(struct gendisk *gp); void set_disk_ro(struct gendisk *disk, bool read_only); -- cgit v1.2.3 From 471aa704db4904f7af5a50019ca3b5b018c0cf62 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Aug 2021 16:17:41 +0200 Subject: block: pass a gendisk to blk_queue_update_readahead .. and rename the function to disk_update_readahead. This is in preparation for moving the BDI from the request_queue to the gendisk. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210809141744.1203023-3-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b5c033cf5f26..ac3642c88a4d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1139,7 +1139,7 @@ void blk_queue_zone_write_granularity(struct request_queue *q, unsigned int size); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); -void blk_queue_update_readahead(struct request_queue *q); +void disk_update_readahead(struct gendisk *disk); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); extern void blk_queue_io_min(struct request_queue *q, unsigned int min); extern void blk_limits_io_opt(struct queue_limits *limits, unsigned int opt); -- cgit v1.2.3 From 1008162b2782a3624d12b0aee8da58bc75d12e19 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Aug 2021 16:17:42 +0200 Subject: block: add a queue_has_disk helper Add a helper to check if a gendisk is associated with a request_queue. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210809141744.1203023-4-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ac3642c88a4d..96f3d9617cd8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -664,6 +664,7 @@ extern void blk_clear_pm_only(struct request_queue *q); dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ (dir), (attrs)) +#define queue_has_disk(q) ((q)->kobj.parent != NULL) #define queue_to_disk(q) (dev_to_disk(kobj_to_dev((q)->kobj.parent))) static inline bool queue_is_mq(struct request_queue *q) -- cgit v1.2.3 From edb0872f44ec9976ea6d052cb4b93cd2d23ac2ba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Aug 2021 16:17:43 +0200 Subject: block: move the bdi from the request_queue to the gendisk The backing device information only makes sense for file system I/O, and thus belongs into the gendisk and not the lower level request_queue structure. Move it there. Signed-off-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210809141744.1203023-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- include/linux/genhd.h | 1 + 2 files changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 96f3d9617cd8..23e1253a8d88 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -11,7 +11,6 @@ #include #include #include -#include #include #include #include @@ -398,8 +397,6 @@ struct request_queue { struct blk_mq_hw_ctx **queue_hw_ctx; unsigned int nr_hw_queues; - struct backing_dev_info *backing_dev_info; - /* * The queue owner gets to use this for whatever they like. * ll_rw_blk doesn't touch it. diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 473d93c6ebda..b3bab578f03a 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -158,6 +158,7 @@ struct gendisk { struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ + struct backing_dev_info *bdi; struct kobject *slave_dir; #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED struct list_head slave_bdevs; -- cgit v1.2.3 From a11d7fc2d05fb509cd9e33d4093507d6eda3ad53 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Aug 2021 16:17:44 +0200 Subject: block: remove the bd_bdi in struct block_device Just retrieve the bdi from the disk. Signed-off-by: Christoph Hellwig Reviewed-by: Jan Kara Reviewed-by: Johannes Thumshirn Link: https://lore.kernel.org/r/20210809141744.1203023-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 2 +- include/linux/blk_types.h | 1 - 2 files changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 44df4fcef65c..29530859d9ff 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -143,7 +143,7 @@ static inline struct backing_dev_info *inode_to_bdi(struct inode *inode) sb = inode->i_sb; #ifdef CONFIG_BLOCK if (sb_is_blkdev_sb(sb)) - return I_BDEV(inode)->bd_bdi; + return I_BDEV(inode)->bd_disk->bdi; #endif return sb->s_bdi; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index e92735655684..1335efa8a1db 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -38,7 +38,6 @@ struct block_device { u8 bd_partno; spinlock_t bd_size_lock; /* for bd_inode->i_size updates */ struct gendisk * bd_disk; - struct backing_dev_info *bd_bdi; /* The counter of freeze processes */ int bd_fsfreeze_count; -- cgit v1.2.3 From 866663b7b52d2da267b28e12eed89ee781b8fed1 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Thu, 29 Jul 2021 11:42:26 +0800 Subject: block: return ELEVATOR_DISCARD_MERGE if possible When merging one bio to request, if they are discard IO and the queue supports multi-range discard, we need to return ELEVATOR_DISCARD_MERGE because both block core and related drivers(nvme, virtio-blk) doesn't handle mixed discard io merge(traditional IO merge together with discard merge) well. Fix the issue by returning ELEVATOR_DISCARD_MERGE in this situation, so both blk-mq and drivers just need to handle multi-range discard. Reported-by: Oleksandr Natalenko Signed-off-by: Ming Lei Tested-by: Oleksandr Natalenko Fixes: 2705dfb20947 ("block: fix discard request merge") Link: https://lore.kernel.org/r/20210729034226.1591070-1-ming.lei@redhat.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 23e1253a8d88..07eef02325b4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1519,6 +1519,22 @@ static inline int queue_limit_discard_alignment(struct queue_limits *lim, sector return offset << SECTOR_SHIFT; } +/* + * Two cases of handling DISCARD merge: + * If max_discard_segments > 1, the driver takes every bio + * as a range and send them to controller together. The ranges + * needn't to be contiguous. + * Otherwise, the bios/requests will be handled as same as + * others which should be contiguous. + */ +static inline bool blk_discard_mergable(struct request *req) +{ + if (req_op(req) == REQ_OP_DISCARD && + queue_max_discard_segments(req->q) > 1) + return true; + return false; +} + static inline int bdev_discard_alignment(struct block_device *bdev) { struct request_queue *q = bdev_get_queue(bdev); -- cgit v1.2.3 From 99d26de2f6d79badc80f55b54bd90d4cb9d1ad90 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Aug 2021 14:39:24 +0200 Subject: writeback: make the laptop_mode prototypes available unconditionally Fix the !CONFIG_BLOCK build after the recent cleanup. Fixes: 5ed964f8e54e ("mm: hide laptop_mode_wb_timer entirely behind the BDI API") Reported-by: Stephen Rothwell Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/writeback.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/writeback.h b/include/linux/writeback.h index 667e86cfbdcf..270677dc4f36 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -336,14 +336,9 @@ static inline void cgroup_writeback_umount(void) /* * mm/page-writeback.c */ -#ifdef CONFIG_BLOCK void laptop_io_completion(struct backing_dev_info *info); void laptop_sync_completion(void); -void laptop_mode_sync(struct work_struct *work); void laptop_mode_timer_fn(struct timer_list *t); -#else -static inline void laptop_sync_completion(void) { } -#endif bool node_dirty_ok(struct pglist_data *pgdat); int wb_domain_init(struct wb_domain *dom, gfp_t gfp); #ifdef CONFIG_CGROUP_WRITEBACK -- cgit v1.2.3 From 018eca456c4b4dca56aaf1ec27f309c74d0fe246 Mon Sep 17 00:00:00 2001 From: Guoqing Jiang Date: Wed, 21 Jul 2021 10:53:15 +0800 Subject: block: move some macros to blkdev.h Move them (PAGE_SECTORS_SHIFT, PAGE_SECTORS and SECTOR_MASK) to the generic header file to remove redundancy. Signed-off-by: Guoqing Jiang Link: https://lore.kernel.org/r/20210721025315.1729118-1-guoqing.jiang@linux.dev Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ include/linux/device-mapper.h | 1 - 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 07eef02325b4..df404c1fb087 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -939,6 +939,10 @@ static inline struct request_queue *bdev_get_queue(struct block_device *bdev) #define SECTOR_SIZE (1 << SECTOR_SHIFT) #endif +#define PAGE_SECTORS_SHIFT (PAGE_SHIFT - SECTOR_SHIFT) +#define PAGE_SECTORS (1 << PAGE_SECTORS_SHIFT) +#define SECTOR_MASK (PAGE_SECTORS - 1) + /* * blk_rq_pos() : the current sector * blk_rq_bytes() : bytes left in the entire request diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 7457d49acf9a..94f2cd6a8e83 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -151,7 +151,6 @@ typedef size_t (*dm_dax_copy_iter_fn)(struct dm_target *ti, pgoff_t pgoff, void *addr, size_t bytes, struct iov_iter *i); typedef int (*dm_dax_zero_page_range_fn)(struct dm_target *ti, pgoff_t pgoff, size_t nr_pages); -#define PAGE_SECTORS (PAGE_SIZE / 512) void dm_error(const char *message); -- cgit v1.2.3 From 50b4aecfbbb09869db967e4a26212a47e10c0088 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 9 Aug 2021 08:40:28 +0200 Subject: block: remove GENHD_FL_UP Just check inode_unhashed on the whole device bdev inode instead, and provide a helper to check for that information. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210809064028.1198327-9-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b3bab578f03a..b47e297cd551 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -60,9 +60,6 @@ struct partition_meta_info { * device. * Affects responses to the ``CDROM_GET_CAPABILITY`` ioctl. * - * ``GENHD_FL_UP`` (0x0010): indicates that the block device is "up", - * with a similar meaning to network interfaces. - * * ``GENHD_FL_SUPPRESS_PARTITION_INFO`` (0x0020): don't include * partition information in ``/proc/partitions`` or in the output of * printk_all_partitions(). @@ -97,7 +94,6 @@ struct partition_meta_info { /* 2 is unused (used to be GENHD_FL_DRIVERFS) */ /* 4 is unused (used to be GENHD_FL_MEDIA_CHANGE_NOTIFY) */ #define GENHD_FL_CD 0x0008 -#define GENHD_FL_UP 0x0010 #define GENHD_FL_SUPPRESS_PARTITION_INFO 0x0020 #define GENHD_FL_EXT_DEVT 0x0040 #define GENHD_FL_NATIVE_CAPACITY 0x0080 @@ -178,6 +174,11 @@ struct gendisk { u64 diskseq; }; +static inline bool disk_live(struct gendisk *disk) +{ + return !inode_unhashed(disk->part0->bd_inode); +} + /* * The gendisk is refcounted by the part0 block_device, and the bd_device * therein is also used for device model presentation in sysfs. -- cgit v1.2.3 From 1113f0b69c6a98ff4e733c306a6658a31f8cbc49 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 4 Aug 2021 11:56:20 +0200 Subject: bvec: add a bvec_virt helper Add a helper to get the virtual address for a bvec. This avoids that all callers need to know about the page + offset representation. Signed-off-by: Christoph Hellwig Reviewed-by: Chaitanya Kulkarni Reviewed-by: Martin K. Petersen Link: https://lore.kernel.org/r/20210804095634.460779-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/bvec.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index f9fa43b940ff..0e9bdd42dafb 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -229,4 +229,16 @@ static inline void memzero_bvec(struct bio_vec *bvec) memzero_page(bvec->bv_page, bvec->bv_offset, bvec->bv_len); } +/** + * bvec_virt - return the virtual address for a bvec + * @bvec: bvec to return the virtual address for + * + * Note: the caller must ensure that @bvec->bv_page is not a highmem page. + */ +static inline void *bvec_virt(struct bio_vec *bvec) +{ + WARN_ON_ONCE(PageHighMem(bvec->bv_page)); + return page_address(bvec->bv_page) + bvec->bv_offset; +} + #endif /* __LINUX_BVEC_H */ -- cgit v1.2.3 From 252c651a4c854b328445a536bd1892e999103fca Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 10 Aug 2021 17:26:23 +0200 Subject: blk-cgroup: stop using seq_get_buf seq_get_buf is a crutch that undoes all the memory safety of the seq_file interface. Use the normal seq_printf interfaces instead. Signed-off-by: Christoph Hellwig Acked-by: Tejun Heo Link: https://lore.kernel.org/r/20210810152623.1796144-2-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 37048438872c..b4de2010fba5 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -152,8 +152,8 @@ typedef void (blkcg_pol_online_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_offline_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_free_pd_fn)(struct blkg_policy_data *pd); typedef void (blkcg_pol_reset_pd_stats_fn)(struct blkg_policy_data *pd); -typedef size_t (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, char *buf, - size_t size); +typedef bool (blkcg_pol_stat_pd_fn)(struct blkg_policy_data *pd, + struct seq_file *s); struct blkcg_policy { int plid; -- cgit v1.2.3 From a553a835ca57668b0d9907d8ec2507ec51292d9a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:36:59 +0900 Subject: block: change ioprio_valid() to an inline function Change the ioprio_valid() macro in include/usapi/linux/ioprio.h to an inline function declared on the kernel side in include/linux/ioprio.h. Also improve checks on the class value by checking the upper bound value. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-4-damien.lemoal@wdc.com Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index ef9ad4fb245f..2ee3373684b1 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -8,6 +8,16 @@ #include +/* + * Check that a priority value has a valid class. + */ +static inline bool ioprio_valid(unsigned short ioprio) +{ + unsigned short class = IOPRIO_PRIO_CLASS(ioprio); + + return class > IOPRIO_CLASS_NONE && class <= IOPRIO_CLASS_IDLE; +} + /* * if process has set io priority explicitly, use that. if not, convert * the cpu scheduler nice value to an io priority -- cgit v1.2.3 From e70344c05995a190a56bbd1a23dc2218bcc8c924 Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Wed, 11 Aug 2021 12:37:02 +0900 Subject: block: fix default IO priority handling The default IO priority is the best effort (BE) class with the normal priority level IOPRIO_NORM (4). However, get_task_ioprio() returns IOPRIO_CLASS_NONE/IOPRIO_NORM as the default priority and get_current_ioprio() returns IOPRIO_CLASS_NONE/0. Let's be consistent with the defined default and have both of these functions return the default priority IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_NORM) when the user did not define another default IO priority for the task. In include/uapi/linux/ioprio.h, introduce the IOPRIO_BE_NORM macro as an alias to IOPRIO_NORM to clarify that this default level applies to the BE priotity class. In include/linux/ioprio.h, define the macro IOPRIO_DEFAULT as IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) and use this new macro when setting a priority to the default. Signed-off-by: Damien Le Moal Link: https://lore.kernel.org/r/20210811033702.368488-7-damien.lemoal@wdc.com [axboe: drop unnecessary lightnvm change] Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 2ee3373684b1..3f53bc27a19b 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -8,6 +8,11 @@ #include +/* + * Default IO priority. + */ +#define IOPRIO_DEFAULT IOPRIO_PRIO_VALUE(IOPRIO_CLASS_BE, IOPRIO_BE_NORM) + /* * Check that a priority value has a valid class. */ @@ -51,7 +56,7 @@ static inline int get_current_ioprio(void) if (ioc) return ioc->ioprio; - return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); + return IOPRIO_DEFAULT; } /* -- cgit v1.2.3 From 4dcc4874deb41a11ece9c6e8858385235463c1ac Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Aug 2021 15:19:05 +0200 Subject: block: cleanup the lockdep handling in *alloc_disk Pass the lockdep name to the low-level __blk_alloc_disk helper and hardcode the name for it given that the number of minors or node_id are not very useful information. While this passes a pointless argument for non-lockdep builds that is not really an issue as disk allocation is a probe time only slow path. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210816131910.615153-5-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 10 +++------- include/linux/genhd.h | 23 ++++++----------------- 2 files changed, 9 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 22215db36122..13ba1861e688 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -432,18 +432,14 @@ enum { ((policy & ((1 << BLK_MQ_F_ALLOC_POLICY_BITS) - 1)) \ << BLK_MQ_F_ALLOC_POLICY_START_BIT) +struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, void *queuedata, + struct lock_class_key *lkclass); #define blk_mq_alloc_disk(set, queuedata) \ ({ \ static struct lock_class_key __key; \ - struct gendisk *__disk = __blk_mq_alloc_disk(set, queuedata); \ \ - if (!IS_ERR(__disk)) \ - lockdep_init_map(&__disk->lockdep_map, \ - "(bio completion)", &__key, 0); \ - __disk; \ + __blk_mq_alloc_disk(set, queuedata, &__key); \ }) -struct gendisk *__blk_mq_alloc_disk(struct blk_mq_tag_set *set, - void *queuedata); struct request_queue *blk_mq_init_queue(struct blk_mq_tag_set *); int blk_mq_init_allocated_queue(struct blk_mq_tag_set *set, struct request_queue *q); diff --git a/include/linux/genhd.h b/include/linux/genhd.h index b47e297cd551..3d2e5ee30677 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -259,27 +259,21 @@ static inline sector_t get_capacity(struct gendisk *disk) int bdev_disk_changed(struct gendisk *disk, bool invalidate); void blk_drop_partitions(struct gendisk *disk); -extern struct gendisk *__alloc_disk_node(int minors, int node_id); +struct gendisk *__alloc_disk_node(int minors, int node_id, + struct lock_class_key *lkclass); extern void put_disk(struct gendisk *disk); #define alloc_disk_node(minors, node_id) \ ({ \ static struct lock_class_key __key; \ - const char *__name; \ - struct gendisk *__disk; \ \ - __name = "(gendisk_completion)"#minors"("#node_id")"; \ - \ - __disk = __alloc_disk_node(minors, node_id); \ - \ - if (__disk) \ - lockdep_init_map(&__disk->lockdep_map, __name, &__key, 0); \ - \ - __disk; \ + __alloc_disk_node(minors, node_id, &__key); \ }) #define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) +struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); + /** * blk_alloc_disk - allocate a gendisk structure * @node_id: numa node to allocate on @@ -291,15 +285,10 @@ extern void put_disk(struct gendisk *disk); */ #define blk_alloc_disk(node_id) \ ({ \ - struct gendisk *__disk = __blk_alloc_disk(node_id); \ static struct lock_class_key __key; \ \ - if (__disk) \ - lockdep_init_map(&__disk->lockdep_map, \ - "(bio completion)", &__key, 0); \ - __disk; \ + __blk_alloc_disk(node_id, &__key); \ }) -struct gendisk *__blk_alloc_disk(int node); void blk_cleanup_disk(struct gendisk *disk); int __register_blkdev(unsigned int major, const char *name, -- cgit v1.2.3 From 9c2b9dbafc067e173db30c4fd0636392d27944e8 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Aug 2021 15:19:06 +0200 Subject: block: remove alloc_disk and alloc_disk_node Most drivers should use and have been converted to use blk_alloc_disk and blk_mq_alloc_disk. Only the scsi ULPs and dasd still allocate a disk separately from the request_queue, so don't bother with convenience macros for something that should not see significant new users and remove these wrappers. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210816131910.615153-6-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 3d2e5ee30677..ceda9b255dba 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -262,16 +262,6 @@ void blk_drop_partitions(struct gendisk *disk); struct gendisk *__alloc_disk_node(int minors, int node_id, struct lock_class_key *lkclass); extern void put_disk(struct gendisk *disk); - -#define alloc_disk_node(minors, node_id) \ -({ \ - static struct lock_class_key __key; \ - \ - __alloc_disk_node(minors, node_id, &__key); \ -}) - -#define alloc_disk(minors) alloc_disk_node(minors, NUMA_NO_NODE) - struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); /** -- cgit v1.2.3 From a58bd7683fcb60ae24c8572f932b48bc65719b7c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Aug 2021 15:19:07 +0200 Subject: block: remove the minors argument to __alloc_disk_node This was a leftover from the legacy alloc_disk interface. Switch the scsi ULPs and dasd to set ->minors directly like all other drivers and remove the argument. Signed-off-by: Christoph Hellwig Reviewed-by: Stefan Haberland [dasd] Link: https://lore.kernel.org/r/20210816131910.615153-7-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index ceda9b255dba..d20f101be758 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -259,8 +259,7 @@ static inline sector_t get_capacity(struct gendisk *disk) int bdev_disk_changed(struct gendisk *disk, bool invalidate); void blk_drop_partitions(struct gendisk *disk); -struct gendisk *__alloc_disk_node(int minors, int node_id, - struct lock_class_key *lkclass); +struct gendisk *__alloc_disk_node(int node_id, struct lock_class_key *lkclass); extern void put_disk(struct gendisk *disk); struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); -- cgit v1.2.3 From 4a1fa41d304c7129328d4d5c7f31715b95e23b29 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Aug 2021 15:19:08 +0200 Subject: block: pass a request_queue to __blk_alloc_disk Pass in a request_queue and assign disk->queue in __blk_alloc_disk to ensure struct gendisk always has a valid ->queue pointer. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210816131910.615153-8-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index d20f101be758..13e90e6231d8 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -259,7 +259,8 @@ static inline sector_t get_capacity(struct gendisk *disk) int bdev_disk_changed(struct gendisk *disk, bool invalidate); void blk_drop_partitions(struct gendisk *disk); -struct gendisk *__alloc_disk_node(int node_id, struct lock_class_key *lkclass); +struct gendisk *__alloc_disk_node(struct request_queue *q, int node_id, + struct lock_class_key *lkclass); extern void put_disk(struct gendisk *disk); struct gendisk *__blk_alloc_disk(int node, struct lock_class_key *lkclass); -- cgit v1.2.3 From 61a35cfc26334fe1c8e970ca8fafeae2daae257d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Aug 2021 15:19:09 +0200 Subject: block: hold a request_queue reference for the lifetime of struct gendisk Acquire the queue ref dropped in disk_release in __blk_alloc_disk so any allocate gendisk always has a queue reference. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210816131910.615153-9-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 13e90e6231d8..55acefdd8a20 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -149,7 +149,6 @@ struct gendisk { unsigned long state; #define GD_NEED_PART_SCAN 0 #define GD_READ_ONLY 1 -#define GD_QUEUE_REF 2 struct mutex open_mutex; /* open/close mutex */ unsigned open_partitions; /* number of open partitions */ -- cgit v1.2.3 From d152c682f03ceb65c0d9663d4ba6ee2d46aa784d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Mon, 16 Aug 2021 15:46:24 +0200 Subject: block: add an explicit ->disk backpointer to the request_queue Replace the magic lookup through the kobject tree with an explicit backpointer, given that the device model links are set up and torn down at times when I/O is still possible, leading to potential NULL or invalid pointer dereferences. Fixes: edb0872f44ec ("block: move the bdi from the request_queue to the gendisk") Reported-by: syzbot Signed-off-by: Christoph Hellwig Tested-by: Sven Schnelle Link: https://lore.kernel.org/r/20210816134624.GA24234@lst.de Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index df404c1fb087..22b5b8502d2a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -421,6 +421,8 @@ struct request_queue { spinlock_t queue_lock; + struct gendisk *disk; + /* * queue kobject */ @@ -661,9 +663,6 @@ extern void blk_clear_pm_only(struct request_queue *q); dma_map_page_attrs(dev, (bv)->bv_page, (bv)->bv_offset, (bv)->bv_len, \ (dir), (attrs)) -#define queue_has_disk(q) ((q)->kobj.parent != NULL) -#define queue_to_disk(q) (dev_to_disk(kobj_to_dev((q)->kobj.parent))) - static inline bool queue_is_mq(struct request_queue *q) { return q->mq_ops; -- cgit v1.2.3 From 83cbce9574462c6b4eed6797bdaf18fae6859ab3 Mon Sep 17 00:00:00 2001 From: Luis Chamberlain Date: Wed, 18 Aug 2021 16:45:40 +0200 Subject: block: add error handling for device_add_disk / add_disk Properly unwind on errors in device_add_disk. This is the initial work as drivers are not converted yet, which will follow in separate patches. Signed-off-by: Luis Chamberlain [hch: major rebase. All bugs are probably mine] Signed-off-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Link: https://lore.kernel.org/r/20210818144542.19305-10-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/genhd.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 55acefdd8a20..c68d83c87f83 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -214,11 +214,11 @@ static inline dev_t disk_devt(struct gendisk *disk) void disk_uevent(struct gendisk *disk, enum kobject_action action); /* block/genhd.c */ -extern void device_add_disk(struct device *parent, struct gendisk *disk, - const struct attribute_group **groups); -static inline void add_disk(struct gendisk *disk) +int device_add_disk(struct device *parent, struct gendisk *disk, + const struct attribute_group **groups); +static inline int add_disk(struct gendisk *disk) { - device_add_disk(NULL, disk, NULL); + return device_add_disk(NULL, disk, NULL); } extern void del_gendisk(struct gendisk *gp); -- cgit v1.2.3 From 0bdfbca8a623e262e0f343b143151000a300cbaf Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 20 Aug 2021 03:45:33 +0300 Subject: block: Add alternative_gpt_sector() operation Add alternative_gpt_sector() block device operation which specifies alternative location of a GPT entry. This allows us to support Android devices that have GPT entry at a non-standard location and can't be repartitioned easily. Reviewed-by: Christoph Hellwig Suggested-by: Christoph Hellwig Signed-off-by: Dmitry Osipenko Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20210820004536.15791-2-digetx@gmail.com Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 22b5b8502d2a..c9cb12483e12 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1872,6 +1872,13 @@ struct block_device_operations { char *(*devnode)(struct gendisk *disk, umode_t *mode); struct module *owner; const struct pr_ops *pr_ops; + + /* + * Special callback for probing GPT entry at a given sector. + * Needed by Android devices, used by GPT scanner and MMC blk + * driver. + */ + int (*alternative_gpt_sector)(struct gendisk *disk, sector_t *sector); }; #ifdef CONFIG_COMPAT -- cgit v1.2.3 From dc913385dd74e625271482c30aefedd1e5af7b8c Mon Sep 17 00:00:00 2001 From: Dmitry Osipenko Date: Fri, 20 Aug 2021 03:45:35 +0300 Subject: mmc: block: Support alternative_gpt_sector() operation Support generic alternative_gpt_sector() block device operation. It calculates location of GPT entry for eMMC of NVIDIA Tegra Android devices. Add new MMC_CAP2_ALT_GPT_TEGRA flag that enables scanning of alternative GPT sector and add raw_boot_mult field to mmc_ext_csd which allows to get size of the boot partitions that is needed for the calculation. Signed-off-by: Dmitry Osipenko Reviewed-by: Ulf Hansson Link: https://lore.kernel.org/r/20210820004536.15791-4-digetx@gmail.com Signed-off-by: Jens Axboe --- include/linux/mmc/card.h | 1 + include/linux/mmc/host.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mmc/card.h b/include/linux/mmc/card.h index 74e6c0624d27..37f975875102 100644 --- a/include/linux/mmc/card.h +++ b/include/linux/mmc/card.h @@ -109,6 +109,7 @@ struct mmc_ext_csd { u8 raw_hc_erase_gap_size; /* 221 */ u8 raw_erase_timeout_mult; /* 223 */ u8 raw_hc_erase_grp_size; /* 224 */ + u8 raw_boot_mult; /* 226 */ u8 raw_sec_trim_mult; /* 229 */ u8 raw_sec_erase_mult; /* 230 */ u8 raw_sec_feature_support;/* 231 */ diff --git a/include/linux/mmc/host.h b/include/linux/mmc/host.h index 0abd47e9ef9b..78dadf86b38f 100644 --- a/include/linux/mmc/host.h +++ b/include/linux/mmc/host.h @@ -398,6 +398,7 @@ struct mmc_host { #else #define MMC_CAP2_CRYPTO 0 #endif +#define MMC_CAP2_ALT_GPT_TEGRA (1 << 28) /* Host with eMMC that has GPT entry at a non-standard location */ int fixed_drv_type; /* fixed driver type for non-removable media */ -- cgit v1.2.3 From 158ee7b65653d9f841823c249014c2d0dfdeeb8f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 24 Aug 2021 17:18:23 +0200 Subject: block: mark blkdev_fsync static MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit blkdev_fsync is only used inside of block_dev.c since the removal of the raw drŅ–ver. Signed-off-by: Christoph Hellwig Link: https://lore.kernel.org/r/20210824151823.1575100-1-hch@lst.de Signed-off-by: Jens Axboe --- include/linux/fs.h | 4 ---- 1 file changed, 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 640574294216..9220cdf648b0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3246,10 +3246,6 @@ ssize_t vfs_iocb_iter_read(struct file *file, struct kiocb *iocb, ssize_t vfs_iocb_iter_write(struct file *file, struct kiocb *iocb, struct iov_iter *iter); -/* fs/block_dev.c */ -extern int blkdev_fsync(struct file *filp, loff_t start, loff_t end, - int datasync); - /* fs/splice.c */ extern ssize_t generic_file_splice_read(struct file *, loff_t *, struct pipe_inode_info *, size_t, unsigned int); -- cgit v1.2.3