From a55b70f1273a54b33482db8b2568da435fefd6c2 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 25 Oct 2022 08:59:16 -0700
Subject: block: remove bio_start_io_acct_time

bio_start_io_acct_time is not actually used anywhere, so remove it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20221025155916.270303-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 50e358a19d98..57ed49f20d2e 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1458,7 +1458,6 @@ unsigned long bdev_start_io_acct(struct block_device *bdev,
 void bdev_end_io_acct(struct block_device *bdev, enum req_op op,
 		unsigned long start_time);
 
-void bio_start_io_acct_time(struct bio *bio, unsigned long start_time);
 unsigned long bio_start_io_acct(struct bio *bio);
 void bio_end_io_acct_remapped(struct bio *bio, unsigned long start_time,
 		struct block_device *orig_bdev);
-- 
cgit v1.2.3


From b179c98f76978a0fae072c2b2dad8e143218afd8 Mon Sep 17 00:00:00 2001
From: Bart Van Assche <bvanassche@acm.org>
Date: Tue, 25 Oct 2022 12:17:53 -0700
Subject: block: Remove request.write_hint

Commit c75e707fe1aa ("block: remove the per-bio/request write hint")
removed all code that uses the struct request write_hint member. Hence
also remove 'write_hint' itself.

Reviewed-by: Ming Lei <ming.lei@redhat.com>
Cc: Christoph Hellwig <hch@lst.de>
Signed-off-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221025191755.1711437-2-bvanassche@acm.org
Reviewed-by: Keith Busch <kbusch@kernel.org>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index ba18e9bdb799..569053ed959d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -140,7 +140,6 @@ struct request {
 	struct blk_crypto_keyslot *crypt_keyslot;
 #endif
 
-	unsigned short write_hint;
 	unsigned short ioprio;
 
 	enum mq_rq_state state;
-- 
cgit v1.2.3


From adff215830fcf3ef74f2f0d4dd5a47a6927d450b Mon Sep 17 00:00:00 2001
From: Dawei Li <set_pte_at@outlook.com>
Date: Sun, 30 Oct 2022 13:20:08 +0800
Subject: block: simplify blksize_bits() implementation

Convert current looping-based implementation into bit operation,
which can bring improvement for:

1) bitops is more efficient for its arch-level optimization.

2) Given that blksize_bits() is inline, _if_ @size is compile-time
constant, it's possible that order_base_2() _may_ make output
compile-time evaluated, depending on code context and compiler behavior.

Signed-off-by: Dawei Li <set_pte_at@outlook.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/TYCP286MB23238842958D7C083D6B67CECA349@TYCP286MB2323.JPNP286.PROD.OUTLOOK.COM
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 57ed49f20d2e..32137d85c9ad 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1349,12 +1349,7 @@ static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr,
 /* assumes size > 256 */
 static inline unsigned int blksize_bits(unsigned int size)
 {
-	unsigned int bits = 8;
-	do {
-		bits++;
-		size >>= 1;
-	} while (size > 256);
-	return bits;
+	return order_base_2(size >> SECTOR_SHIFT) + SECTOR_SHIFT;
 }
 
 static inline unsigned int block_size(struct block_device *bdev)
-- 
cgit v1.2.3


From 80bd4a7aab4c9ce59bf5e35fdf52aa23d8a3c9f5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2022 16:00:47 +0100
Subject: blk-mq: move the srcu_struct used for quiescing to the tagset

All I/O submissions have fairly similar latencies, and a tagset-wide
quiesce is a fairly common operation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chao Leng <lengchao@huawei.com>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20221101150050.3510-12-hch@lst.de
[axboe: fix whitespace]
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 4 ++++
 include/linux/blkdev.h | 9 ---------
 2 files changed, 4 insertions(+), 9 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 569053ed959d..f059edebb11d 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -7,6 +7,7 @@
 #include <linux/lockdep.h>
 #include <linux/scatterlist.h>
 #include <linux/prefetch.h>
+#include <linux/srcu.h>
 
 struct blk_mq_tags;
 struct blk_flush_queue;
@@ -500,6 +501,8 @@ enum hctx_type {
  * @tag_list_lock: Serializes tag_list accesses.
  * @tag_list:	   List of the request queues that use this tag set. See also
  *		   request_queue.tag_set_list.
+ * @srcu:	   Use as lock when type of the request queue is blocking
+ *		   (BLK_MQ_F_BLOCKING).
  */
 struct blk_mq_tag_set {
 	struct blk_mq_queue_map	map[HCTX_MAX_TYPES];
@@ -520,6 +523,7 @@ struct blk_mq_tag_set {
 
 	struct mutex		tag_list_lock;
 	struct list_head	tag_list;
+	struct srcu_struct	*srcu;
 };
 
 /**
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 32137d85c9ad..6a6fa167fc82 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -22,7 +22,6 @@
 #include <linux/blkzoned.h>
 #include <linux/sched.h>
 #include <linux/sbitmap.h>
-#include <linux/srcu.h>
 #include <linux/uuid.h>
 #include <linux/xarray.h>
 
@@ -543,18 +542,11 @@ struct request_queue {
 	struct mutex		debugfs_mutex;
 
 	bool			mq_sysfs_init_done;
-
-	/**
-	 * @srcu: Sleepable RCU. Use as lock when type of the request queue
-	 * is blocking (BLK_MQ_F_BLOCKING). Must be the last member
-	 */
-	struct srcu_struct	srcu[];
 };
 
 /* Keep blk_queue_flag_name[] in sync with the definitions below */
 #define QUEUE_FLAG_STOPPED	0	/* queue is stopped */
 #define QUEUE_FLAG_DYING	1	/* queue being torn down */
-#define QUEUE_FLAG_HAS_SRCU	2	/* SRCU is allocated */
 #define QUEUE_FLAG_NOMERGES     3	/* disable merge attempts */
 #define QUEUE_FLAG_SAME_COMP	4	/* complete on same CPU-group */
 #define QUEUE_FLAG_FAIL_IO	5	/* fake timeout */
@@ -590,7 +582,6 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 
 #define blk_queue_stopped(q)	test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags)
 #define blk_queue_dying(q)	test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags)
-#define blk_queue_has_srcu(q)	test_bit(QUEUE_FLAG_HAS_SRCU, &(q)->queue_flags)
 #define blk_queue_init_done(q)	test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags)
 #define blk_queue_nomerges(q)	test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags)
 #define blk_queue_noxmerges(q)	\
-- 
cgit v1.2.3


From 483239c75ba768e0e2c0e0c503e5fc13c3d5773a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 1 Nov 2022 16:00:48 +0100
Subject: blk-mq: pass a tagset to blk_mq_wait_quiesce_done

Nothing in blk_mq_wait_quiesce_done needs the request_queue now, so just
pass the tagset, and move the non-mq check into the only caller that
needs it.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Chao Leng <lengchao@huawei.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20221101150050.3510-13-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index f059edebb11d..061ea6e7af01 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -880,7 +880,7 @@ void blk_mq_start_hw_queues(struct request_queue *q);
 void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_quiesce_queue(struct request_queue *q);
-void blk_mq_wait_quiesce_done(struct request_queue *q);
+void blk_mq_wait_quiesce_done(struct blk_mq_tag_set *set);
 void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
-- 
cgit v1.2.3


From 414dd48e882c5a39e7bd01b096ee6497eb3314b0 Mon Sep 17 00:00:00 2001
From: Chao Leng <lengchao@huawei.com>
Date: Tue, 1 Nov 2022 16:00:49 +0100
Subject: blk-mq: add tagset quiesce interface

Drivers that have shared tagsets may need to quiesce potentially a lot
of request queues that all share a single tagset (e.g. nvme). Add an
interface to quiesce all the queues on a given tagset. This interface is
useful because it can speedup the quiesce by doing it in parallel.

Because some queues should not need to be quiesced (e.g. the nvme
connect_q) when quiescing the tagset, introduce a
QUEUE_FLAG_SKIP_TAGSET_QUIESCE flag to allow this new interface to
ski quiescing a particular queue.

Signed-off-by: Chao Leng <lengchao@huawei.com>
[hch: simplify for the per-tag_set srcu_struct]
Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Sagi Grimberg <sagi@grimberg.me>
Reviewed-by: Ming Lei <ming.lei@redhat.com>
Reviewed-by: Chao Leng <lengchao@huawei.com>
Reviewed-by: Hannes Reinecke <hare@suse.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20221101150050.3510-14-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-mq.h | 2 ++
 include/linux/blkdev.h | 3 +++
 2 files changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index 061ea6e7af01..109a0e30c470 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -881,6 +881,8 @@ void blk_mq_start_stopped_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
 void blk_mq_start_stopped_hw_queues(struct request_queue *q, bool async);
 void blk_mq_quiesce_queue(struct request_queue *q);
 void blk_mq_wait_quiesce_done(struct blk_mq_tag_set *set);
+void blk_mq_quiesce_tagset(struct blk_mq_tag_set *set);
+void blk_mq_unquiesce_tagset(struct blk_mq_tag_set *set);
 void blk_mq_unquiesce_queue(struct request_queue *q);
 void blk_mq_delay_run_hw_queue(struct blk_mq_hw_ctx *hctx, unsigned long msecs);
 void blk_mq_run_hw_queue(struct blk_mq_hw_ctx *hctx, bool async);
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 6a6fa167fc82..9188aa3f6259 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -571,6 +571,7 @@ struct request_queue {
 #define QUEUE_FLAG_HCTX_ACTIVE	28	/* at least one blk-mq hctx is active */
 #define QUEUE_FLAG_NOWAIT       29	/* device supports NOWAIT */
 #define QUEUE_FLAG_SQ_SCHED     30	/* single queue style io dispatch */
+#define QUEUE_FLAG_SKIP_TAGSET_QUIESCE	31 /* quiesce_tagset skip the queue*/
 
 #define QUEUE_FLAG_MQ_DEFAULT	((1UL << QUEUE_FLAG_IO_STAT) |		\
 				 (1UL << QUEUE_FLAG_SAME_COMP) |	\
@@ -610,6 +611,8 @@ bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q);
 #define blk_queue_pm_only(q)	atomic_read(&(q)->pm_only)
 #define blk_queue_registered(q)	test_bit(QUEUE_FLAG_REGISTERED, &(q)->queue_flags)
 #define blk_queue_sq_sched(q)	test_bit(QUEUE_FLAG_SQ_SCHED, &(q)->queue_flags)
+#define blk_queue_skip_tagset_quiesce(q) \
+	test_bit(QUEUE_FLAG_SKIP_TAGSET_QUIESCE, &(q)->queue_flags)
 
 extern void blk_set_pm_only(struct request_queue *q);
 extern void blk_clear_pm_only(struct request_queue *q);
-- 
cgit v1.2.3


From 0f0892356fa174bdd8bd655c820ee3658c4c9f01 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Fri, 21 Oct 2022 11:41:08 -0600
Subject: mm: allow multiple error returns in try_grab_page()

In order to add checks for P2PDMA memory into try_grab_page(), expand
the error return from a bool to an int/error code. Update all the
callsites handle change in usage.

Also remove the WARN_ON_ONCE() call at the callsites seeing there
already is a WARN_ON_ONCE() inside the function if it fails.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Dan Williams <dan.j.williams@intel.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20221021174116.7200-2-logang@deltatee.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/mm.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 8bbcccbc5565..62a91dc1272b 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -1129,7 +1129,7 @@ static inline void get_page(struct page *page)
 	folio_get(page_folio(page));
 }
 
-bool __must_check try_grab_page(struct page *page, unsigned int flags);
+int __must_check try_grab_page(struct page *page, unsigned int flags);
 
 static inline __must_check bool try_get_page(struct page *page)
 {
-- 
cgit v1.2.3


From 4003f107fa2eabb0aab90e37a1ed7b74c6f0d132 Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Fri, 21 Oct 2022 11:41:09 -0600
Subject: mm: introduce FOLL_PCI_P2PDMA to gate getting PCI P2PDMA pages

GUP Callers that expect PCI P2PDMA pages can now set FOLL_PCI_P2PDMA to
allow obtaining P2PDMA pages. If GUP is called without the flag and a
P2PDMA page is found, it will return an error in try_grab_page() or
try_grab_folio().

The check is safe to do before taking the reference to the page in both
cases seeing the page should be protected by either the appropriate
ptl or mmap_lock; or the gup fast guarantees preventing TLB flushes.

try_grab_folio() has one call site that WARNs on failure and cannot
actually deal with the failure of this function (it seems it will
get into an infinite loop). Expand the comment there to document a
couple more conditions on why it will not fail.

FOLL_PCI_P2PDMA cannot be set if FOLL_LONGTERM is set. This is to copy
fsdax until pgmap refcounts are fixed (see the link below for more
information).

Link: https://lkml.kernel.org/r/Yy4Ot5MoOhsgYLTQ@ziepe.ca
Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20221021174116.7200-3-logang@deltatee.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/mm.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/mm.h b/include/linux/mm.h
index 62a91dc1272b..6b081a8dcf88 100644
--- a/include/linux/mm.h
+++ b/include/linux/mm.h
@@ -2958,6 +2958,7 @@ struct page *follow_page(struct vm_area_struct *vma, unsigned long address,
 #define FOLL_SPLIT_PMD	0x20000	/* split huge pmd before returning */
 #define FOLL_PIN	0x40000	/* pages must be released via unpin_user_page */
 #define FOLL_FAST_ONLY	0x80000	/* gup_fast: prevent fall-back to slow gup */
+#define FOLL_PCI_P2PDMA	0x100000 /* allow returning PCI P2PDMA pages */
 
 /*
  * FOLL_PIN and FOLL_LONGTERM may be used in various combinations with each
-- 
cgit v1.2.3


From d82076403cef7fcd1e7617c9db48bf21ebdc1f9c Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Fri, 21 Oct 2022 11:41:10 -0600
Subject: iov_iter: introduce iov_iter_get_pages_[alloc_]flags()

Add iov_iter_get_pages_flags() and iov_iter_get_pages_alloc_flags()
which take a flags argument that is passed to get_user_pages_fast().

This is so that FOLL_PCI_P2PDMA can be passed when appropriate.

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20221021174116.7200-4-logang@deltatee.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/uio.h | 6 ++++++
 1 file changed, 6 insertions(+)

(limited to 'include')

diff --git a/include/linux/uio.h b/include/linux/uio.h
index 2e3134b14ffd..9ede533ce64c 100644
--- a/include/linux/uio.h
+++ b/include/linux/uio.h
@@ -247,8 +247,14 @@ void iov_iter_pipe(struct iov_iter *i, unsigned int direction, struct pipe_inode
 void iov_iter_discard(struct iov_iter *i, unsigned int direction, size_t count);
 void iov_iter_xarray(struct iov_iter *i, unsigned int direction, struct xarray *xarray,
 		     loff_t start, size_t count);
+ssize_t iov_iter_get_pages(struct iov_iter *i, struct page **pages,
+		size_t maxsize, unsigned maxpages, size_t *start,
+		unsigned gup_flags);
 ssize_t iov_iter_get_pages2(struct iov_iter *i, struct page **pages,
 			size_t maxsize, unsigned maxpages, size_t *start);
+ssize_t iov_iter_get_pages_alloc(struct iov_iter *i,
+		struct page ***pages, size_t maxsize, size_t *start,
+		unsigned gup_flags);
 ssize_t iov_iter_get_pages_alloc2(struct iov_iter *i, struct page ***pages,
 			size_t maxsize, size_t *start);
 int iov_iter_npages(const struct iov_iter *i, int maxpages);
-- 
cgit v1.2.3


From 49580e690755d0e51ed7aa2c33225dd884fa738a Mon Sep 17 00:00:00 2001
From: Logan Gunthorpe <logang@deltatee.com>
Date: Fri, 21 Oct 2022 11:41:11 -0600
Subject: block: add check when merging zone device pages

Consecutive zone device pages should not be merged into the same sgl
or bvec segment with other types of pages or if they belong to different
pgmaps. Otherwise getting the pgmap of a given segment is not possible
without scanning the entire segment. This helper returns true either if
both pages are not zone device pages or both pages are zone device
pages with the same pgmap.

Add a helper to determine if zone device pages are mergeable and use
this helper in page_is_mergeable().

Signed-off-by: Logan Gunthorpe <logang@deltatee.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: John Hubbard <jhubbard@nvidia.com>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Link: https://lore.kernel.org/r/20221021174116.7200-5-logang@deltatee.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/mmzone.h | 24 ++++++++++++++++++++++++
 1 file changed, 24 insertions(+)

(limited to 'include')

diff --git a/include/linux/mmzone.h b/include/linux/mmzone.h
index 5f74891556f3..9c49ec5d0e25 100644
--- a/include/linux/mmzone.h
+++ b/include/linux/mmzone.h
@@ -986,6 +986,25 @@ static inline bool is_zone_device_page(const struct page *page)
 {
 	return page_zonenum(page) == ZONE_DEVICE;
 }
+
+/*
+ * Consecutive zone device pages should not be merged into the same sgl
+ * or bvec segment with other types of pages or if they belong to different
+ * pgmaps. Otherwise getting the pgmap of a given segment is not possible
+ * without scanning the entire segment. This helper returns true either if
+ * both pages are not zone device pages or both pages are zone device pages
+ * with the same pgmap.
+ */
+static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
+						     const struct page *b)
+{
+	if (is_zone_device_page(a) != is_zone_device_page(b))
+		return false;
+	if (!is_zone_device_page(a))
+		return true;
+	return a->pgmap == b->pgmap;
+}
+
 extern void memmap_init_zone_device(struct zone *, unsigned long,
 				    unsigned long, struct dev_pagemap *);
 #else
@@ -993,6 +1012,11 @@ static inline bool is_zone_device_page(const struct page *page)
 {
 	return false;
 }
+static inline bool zone_device_pages_have_same_pgmap(const struct page *a,
+						     const struct page *b)
+{
+	return true;
+}
 #endif
 
 static inline bool folio_is_zone_device(const struct folio *folio)
-- 
cgit v1.2.3


From 4f8126bb2308066b877859e4b5923ffb54143630 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@suse.de>
Date: Sat, 5 Nov 2022 19:10:55 -0400
Subject: sbitmap: Use single per-bitmap counting to wake up queued tags

sbitmap suffers from code complexity, as demonstrated by recent fixes,
and eventual lost wake ups on nested I/O completion.  The later happens,
from what I understand, due to the non-atomic nature of the updates to
wait_cnt, which needs to be subtracted and eventually reset when equal
to zero.  This two step process can eventually miss an update when a
nested completion happens to interrupt the CPU in between the wait_cnt
updates.  This is very hard to fix, as shown by the recent changes to
this code.

The code complexity arises mostly from the corner cases to avoid missed
wakes in this scenario.  In addition, the handling of wake_batch
recalculation plus the synchronization with sbq_queue_wake_up is
non-trivial.

This patchset implements the idea originally proposed by Jan [1], which
removes the need for the two-step updates of wait_cnt.  This is done by
tracking the number of completions and wakeups in always increasing,
per-bitmap counters.  Instead of having to reset the wait_cnt when it
reaches zero, we simply keep counting, and attempt to wake up N threads
in a single wait queue whenever there is enough space for a batch.
Waking up less than batch_wake shouldn't be a problem, because we
haven't changed the conditions for wake up, and the existing batch
calculation guarantees at least enough remaining completions to wake up
a batch for each queue at any time.

Performance-wise, one should expect very similar performance to the
original algorithm for the case where there is no queueing.  In both the
old algorithm and this implementation, the first thing is to check
ws_active, which bails out if there is no queueing to be managed. In the
new code, we took care to avoid accounting completions and wakeups when
there is no queueing, to not pay the cost of atomic operations
unnecessarily, since it doesn't skew the numbers.

For more interesting cases, where there is queueing, we need to take
into account the cross-communication of the atomic operations.  I've
been benchmarking by running parallel fio jobs against a single hctx
nullb in different hardware queue depth scenarios, and verifying both
IOPS and queueing.

Each experiment was repeated 5 times on a 20-CPU box, with 20 parallel
jobs. fio was issuing fixed-size randwrites with qd=64 against nullb,
varying only the hardware queue length per test.

queue size 2                 4                 8                 16                 32                 64
6.1-rc2    1681.1K (1.6K)    2633.0K (12.7K)   6940.8K (16.3K)   8172.3K (617.5K)   8391.7K (367.1K)   8606.1K (351.2K)
patched    1721.8K (15.1K)   3016.7K (3.8K)    7543.0K (89.4K)   8132.5K (303.4K)   8324.2K (230.6K)   8401.8K (284.7K)

The following is a similar experiment, ran against a nullb with a single
bitmap shared by 20 hctx spread across 2 NUMA nodes. This has 40
parallel fio jobs operating on the same device

queue size 2 	             4                 8              	16             	    32		       64
6.1-rc2	   1081.0K (2.3K)    957.2K (1.5K)     1699.1K (5.7K) 	6178.2K (124.6K)    12227.9K (37.7K)   13286.6K (92.9K)
patched	   1081.8K (2.8K)    1316.5K (5.4K)    2364.4K (1.8K) 	6151.4K  (20.0K)    11893.6K (17.5K)   12385.6K (18.4K)

It has also survived blktests and a 12h-stress run against nullb. I also
ran the code against nvme and a scsi SSD, and I didn't observe
performance regression in those. If there are other tests you think I
should run, please let me know and I will follow up with results.

[1] https://lore.kernel.org/all/aef9de29-e9f5-259a-f8be-12d1b734e72@google.com/

Cc: Hugh Dickins <hughd@google.com>
Cc: Keith Busch <kbusch@kernel.org>
Cc: Liu Song <liusong@linux.alibaba.com>
Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
Link: https://lore.kernel.org/r/20221105231055.25953-1-krisman@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sbitmap.h | 16 +++++++++++-----
 1 file changed, 11 insertions(+), 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index 4d2d5205ab58..d662cf136021 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -86,11 +86,6 @@ struct sbitmap {
  * struct sbq_wait_state - Wait queue in a &struct sbitmap_queue.
  */
 struct sbq_wait_state {
-	/**
-	 * @wait_cnt: Number of frees remaining before we wake up.
-	 */
-	atomic_t wait_cnt;
-
 	/**
 	 * @wait: Wait queue.
 	 */
@@ -138,6 +133,17 @@ struct sbitmap_queue {
 	 * sbitmap_queue_get_shallow()
 	 */
 	unsigned int min_shallow_depth;
+
+	/**
+	 * @completion_cnt: Number of bits cleared passed to the
+	 * wakeup function.
+	 */
+	atomic_t completion_cnt;
+
+	/**
+	 * @wakeup_cnt: Number of thread wake ups issued.
+	 */
+	atomic_t wakeup_cnt;
 };
 
 /**
-- 
cgit v1.2.3


From 42271ca389edb0446b9e492858b4c38083b0b9f8 Mon Sep 17 00:00:00 2001
From: Giulio Benetti <giulio.benetti@benettiengineering.com>
Date: Wed, 19 Oct 2022 18:04:07 +0200
Subject: lib/raid6: drop RAID6_USE_EMPTY_ZERO_PAGE

RAID6_USE_EMPTY_ZERO_PAGE is unused and hardcoded to 0, so let's drop it.

Signed-off-by: Giulio Benetti <giulio.benetti@benettiengineering.com>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Song Liu <song@kernel.org>
---
 include/linux/raid/pq.h | 8 --------
 1 file changed, 8 deletions(-)

(limited to 'include')

diff --git a/include/linux/raid/pq.h b/include/linux/raid/pq.h
index d6e5a1feb947..f29aaaf2eb21 100644
--- a/include/linux/raid/pq.h
+++ b/include/linux/raid/pq.h
@@ -10,17 +10,9 @@
 
 #ifdef __KERNEL__
 
-/* Set to 1 to use kernel-wide empty_zero_page */
-#define RAID6_USE_EMPTY_ZERO_PAGE 0
 #include <linux/blkdev.h>
 
-/* We need a pre-zeroed page... if we don't want to use the kernel-provided
-   one define it here */
-#if RAID6_USE_EMPTY_ZERO_PAGE
-# define raid6_empty_zero_page empty_zero_page
-#else
 extern const char raid6_empty_zero_page[PAGE_SIZE];
-#endif
 
 #else /* ! __KERNEL__ */
 /* Used for testing in user space */
-- 
cgit v1.2.3


From 855b7717f44b13e0990aa5ad36bbf9aa35051516 Mon Sep 17 00:00:00 2001
From: Kanchan Joshi <joshi.k@samsung.com>
Date: Mon, 31 Oct 2022 21:53:50 +0530
Subject: nvme: fine-granular CAP_SYS_ADMIN for nvme io commands

Currently both io and admin commands are kept under a
coarse-granular CAP_SYS_ADMIN check, disregarding file mode completely.

$ ls -l /dev/ng*
crw-rw-rw- 1 root root 242, 0 Sep  9 19:20 /dev/ng0n1
crw------- 1 root root 242, 1 Sep  9 19:20 /dev/ng0n2

In the example above, ng0n1 appears as if it may allow unprivileged
read/write operation but it does not and behaves same as ng0n2.

This patch implements a shift from CAP_SYS_ADMIN to more fine-granular
control for io-commands.
If CAP_SYS_ADMIN is present, nothing else is checked as before.
Otherwise, following rules are in place
- any admin-cmd is not allowed
- vendor-specific and fabric commmand are not allowed
- io-commands that can write are allowed if matching FMODE_WRITE
permission is present
- io-commands that read are allowed

Add a helper nvme_cmd_allowed that implements above policy.
Change all the callers of CAP_SYS_ADMIN to go through nvme_cmd_allowed
for any decision making.
Since file open mode is counted for any approval/denial, change at
various places to keep file-mode information handy.

Signed-off-by: Kanchan Joshi <joshi.k@samsung.com>
Reviewed-by: Jens Axboe <axboe@kernel.dk>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Signed-off-by: Christoph Hellwig <hch@lst.de>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 050d7d0cd81b..1d102b662e88 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -797,6 +797,7 @@ enum nvme_opcode {
 	nvme_cmd_zone_mgmt_send	= 0x79,
 	nvme_cmd_zone_mgmt_recv	= 0x7a,
 	nvme_cmd_zone_append	= 0x7d,
+	nvme_cmd_vendor_start	= 0x80,
 };
 
 #define nvme_opcode_name(opcode)	{ opcode, #opcode }
-- 
cgit v1.2.3


From 1b96f862ecccb3e6f950eba584bebf22955cecc5 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sun, 30 Oct 2022 16:50:15 +0100
Subject: nvme: implement the DEAC bit for the Write Zeroes command

While the specification allows devices to either deallocate data
or to actually write zeroes on any Write Zeroes command, many SSDs
only do the sensible thing and deallocate data when the DEAC bit
is specific.  Set it when it is supported and the caller doesn't
explicitly opt out of deallocation.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Keith Busch <kbusch@kernel.org>
Reviewed-by: Chaitanya Kulkarni <kch@nvidia.com>
Reviewed-by: Martin K. Petersen <martin.petersen@oracle.com>
---
 include/linux/nvme.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/nvme.h b/include/linux/nvme.h
index 1d102b662e88..d6be2a686100 100644
--- a/include/linux/nvme.h
+++ b/include/linux/nvme.h
@@ -964,6 +964,7 @@ enum {
 	NVME_RW_PRINFO_PRCHK_GUARD	= 1 << 12,
 	NVME_RW_PRINFO_PRACT		= 1 << 13,
 	NVME_RW_DTYPE_STREAMS		= 1 << 4,
+	NVME_WZ_DEAC			= 1 << 9,
 };
 
 struct nvme_dsm_cmd {
-- 
cgit v1.2.3


From 6e4068a11413b96687a03c39814539e202de294b Mon Sep 17 00:00:00 2001
From: Pavel Begunkov <asml.silence@gmail.com>
Date: Wed, 2 Nov 2022 15:18:19 +0000
Subject: mempool: introduce mempool_is_saturated

Introduce a helper mempool_is_saturated(), which tells if the mempool is
under-filled or not. We need it to figure out whether it should be
freed right into the mempool or could be cached with top level caches.

Signed-off-by: Pavel Begunkov <asml.silence@gmail.com>
Link: https://lore.kernel.org/r/636aed30be8c35d78f45e244998bc6209283cccc.1667384020.git.asml.silence@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/mempool.h | 5 +++++
 1 file changed, 5 insertions(+)

(limited to 'include')

diff --git a/include/linux/mempool.h b/include/linux/mempool.h
index 0c964ac107c2..4aae6c06c5f2 100644
--- a/include/linux/mempool.h
+++ b/include/linux/mempool.h
@@ -30,6 +30,11 @@ static inline bool mempool_initialized(mempool_t *pool)
 	return pool->elements != NULL;
 }
 
+static inline bool mempool_is_saturated(mempool_t *pool)
+{
+	return READ_ONCE(pool->curr_nr) >= pool->min_nr;
+}
+
 void mempool_exit(mempool_t *pool);
 int mempool_init_node(mempool_t *pool, int min_nr, mempool_alloc_t *alloc_fn,
 		      mempool_free_t *free_fn, void *pool_data,
-- 
cgit v1.2.3


From ee7dc86b6d3e3b86c2c487f713eda657850de238 Mon Sep 17 00:00:00 2001
From: Gabriel Krisman Bertazi <krisman@suse.de>
Date: Tue, 15 Nov 2022 17:45:52 -0500
Subject: wait: Return number of exclusive waiters awaken

Sbitmap code will need to know how many waiters were actually woken for
its batched wakeups implementation.  Return the number of woken
exclusive waiters from __wake_up() to facilitate that.

Suggested-by: Jan Kara <jack@suse.cz>
Signed-off-by: Gabriel Krisman Bertazi <krisman@suse.de>
Reviewed-by: Jan Kara <jack@suse.cz>
Link: https://lore.kernel.org/r/20221115224553.23594-3-krisman@suse.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/wait.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/wait.h b/include/linux/wait.h
index 7f5a51aae0a7..a0307b516b09 100644
--- a/include/linux/wait.h
+++ b/include/linux/wait.h
@@ -209,7 +209,7 @@ __remove_wait_queue(struct wait_queue_head *wq_head, struct wait_queue_entry *wq
 	list_del(&wq_entry->entry);
 }
 
-void __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
+int __wake_up(struct wait_queue_head *wq_head, unsigned int mode, int nr, void *key);
 void __wake_up_locked_key(struct wait_queue_head *wq_head, unsigned int mode, void *key);
 void __wake_up_locked_key_bookmark(struct wait_queue_head *wq_head,
 		unsigned int mode, void *key, wait_queue_entry_t *bookmark);
-- 
cgit v1.2.3


From 7abc077788363ac7194aefd355306f8e974feff7 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 15 Nov 2022 22:10:51 +0800
Subject: block: remove delayed holder registration

Now that dm has been fixed to track of holder registrations before
add_disk, the somewhat buggy block layer code can be safely removed.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Yu Kuai <yukuai3@huawei.com>
Link: https://lore.kernel.org/r/20221115141054.1051801-8-yukuai1@huaweicloud.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 5 -----
 1 file changed, 5 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 9188aa3f6259..516e45246868 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -833,7 +833,6 @@ void set_capacity(struct gendisk *disk, sector_t size);
 #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
 int bd_link_disk_holder(struct block_device *bdev, struct gendisk *disk);
 void bd_unlink_disk_holder(struct block_device *bdev, struct gendisk *disk);
-int bd_register_pending_holders(struct gendisk *disk);
 #else
 static inline int bd_link_disk_holder(struct block_device *bdev,
 				      struct gendisk *disk)
@@ -844,10 +843,6 @@ static inline void bd_unlink_disk_holder(struct block_device *bdev,
 					 struct gendisk *disk)
 {
 }
-static inline int bd_register_pending_holders(struct gendisk *disk)
-{
-	return 0;
-}
 #endif /* CONFIG_BLOCK_HOLDER_DEPRECATED */
 
 dev_t part_devt(struct gendisk *disk, u8 partno);
-- 
cgit v1.2.3


From dae590a6c96c799434e0ff8156ef29b88c257e60 Mon Sep 17 00:00:00 2001
From: Waiman Long <longman@redhat.com>
Date: Fri, 4 Nov 2022 20:59:02 -0400
Subject: blk-cgroup: Flush stats at blkgs destruction path

As noted by Michal, the blkg_iostat_set's in the lockless list
hold reference to blkg's to protect against their removal. Those
blkg's hold reference to blkcg. When a cgroup is being destroyed,
cgroup_rstat_flush() is only called at css_release_work_fn() which is
called when the blkcg reference count reaches 0. This circular dependency
will prevent blkcg from being freed until some other events cause
cgroup_rstat_flush() to be called to flush out the pending blkcg stats.

To prevent this delayed blkcg removal, add a new cgroup_rstat_css_flush()
function to flush stats for a given css and cpu and call it at the blkgs
destruction path, blkcg_destroy_blkgs(), whenever there are still some
pending stats to be flushed. This will ensure that blkcg reference
count can reach 0 ASAP.

Signed-off-by: Waiman Long <longman@redhat.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20221105005902.407297-4-longman@redhat.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/cgroup.h | 1 +
 1 file changed, 1 insertion(+)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 528bd44b59e2..6c4e66b3fa84 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -766,6 +766,7 @@ void cgroup_rstat_flush(struct cgroup *cgrp);
 void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp);
 void cgroup_rstat_flush_hold(struct cgroup *cgrp);
 void cgroup_rstat_flush_release(void);
+void cgroup_rstat_css_cpu_flush(struct cgroup_subsys_state *css, int cpu);
 
 /*
  * Basic resource stats.
-- 
cgit v1.2.3


From fce3caea0f241f5d34855c82c399d5e0e2d91f07 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 14 Nov 2022 05:29:42 +0100
Subject: blk-crypto: don't use struct request_queue for public interfaces

Switch all public blk-crypto interfaces to use struct block_device
arguments to specify the device they operate on instead of th
request_queue, which is a block layer implementation detail.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20221114042944.1009870-2-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-crypto.h | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h
index 69b24fe92cbf..561ca92e204d 100644
--- a/include/linux/blk-crypto.h
+++ b/include/linux/blk-crypto.h
@@ -71,9 +71,6 @@ struct bio_crypt_ctx {
 #include <linux/blk_types.h>
 #include <linux/blkdev.h>
 
-struct request;
-struct request_queue;
-
 #ifdef CONFIG_BLK_INLINE_ENCRYPTION
 
 static inline bool bio_has_crypt_ctx(struct bio *bio)
@@ -94,13 +91,13 @@ int blk_crypto_init_key(struct blk_crypto_key *blk_key, const u8 *raw_key,
 			unsigned int dun_bytes,
 			unsigned int data_unit_size);
 
-int blk_crypto_start_using_key(const struct blk_crypto_key *key,
-			       struct request_queue *q);
+int blk_crypto_start_using_key(struct block_device *bdev,
+			       const struct blk_crypto_key *key);
 
-int blk_crypto_evict_key(struct request_queue *q,
+int blk_crypto_evict_key(struct block_device *bdev,
 			 const struct blk_crypto_key *key);
 
-bool blk_crypto_config_supported(struct request_queue *q,
+bool blk_crypto_config_supported(struct block_device *bdev,
 				 const struct blk_crypto_config *cfg);
 
 #else /* CONFIG_BLK_INLINE_ENCRYPTION */
-- 
cgit v1.2.3


From 6715c98b6cf003f26b1b2f655393134e9d999a05 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 14 Nov 2022 05:29:43 +0100
Subject: blk-crypto: add a blk_crypto_config_supported_natively helper

Add a blk_crypto_config_supported_natively helper that wraps
__blk_crypto_cfg_supported to retrieve the crypto_profile from the
request queue.  With this fscrypt can stop including
blk-crypto-profile.h and rely on the public consumer interface in
blk-crypto.h.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20221114042944.1009870-3-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-crypto.h | 2 ++
 1 file changed, 2 insertions(+)

(limited to 'include')

diff --git a/include/linux/blk-crypto.h b/include/linux/blk-crypto.h
index 561ca92e204d..a33d32f5c268 100644
--- a/include/linux/blk-crypto.h
+++ b/include/linux/blk-crypto.h
@@ -97,6 +97,8 @@ int blk_crypto_start_using_key(struct block_device *bdev,
 int blk_crypto_evict_key(struct block_device *bdev,
 			 const struct blk_crypto_key *key);
 
+bool blk_crypto_config_supported_natively(struct block_device *bdev,
+					  const struct blk_crypto_config *cfg);
 bool blk_crypto_config_supported(struct block_device *bdev,
 				 const struct blk_crypto_config *cfg);
 
-- 
cgit v1.2.3


From 3569788c08235c6f3e9e6ca724b2df44787ff487 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 14 Nov 2022 05:29:44 +0100
Subject: blk-crypto: move internal only declarations to blk-crypto-internal.h

 blk_crypto_get_keyslot, blk_crypto_put_keyslot, __blk_crypto_evict_key
and __blk_crypto_cfg_supported are only used internally by the
blk-crypto code, so move the out of blk-crypto-profile.h, which is
included by drivers that supply blk-crypto functionality.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Eric Biggers <ebiggers@google.com>
Link: https://lore.kernel.org/r/20221114042944.1009870-4-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk-crypto-profile.h | 12 ------------
 1 file changed, 12 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk-crypto-profile.h b/include/linux/blk-crypto-profile.h
index bbab65bd5428..e6802b69cdd6 100644
--- a/include/linux/blk-crypto-profile.h
+++ b/include/linux/blk-crypto-profile.h
@@ -138,18 +138,6 @@ int devm_blk_crypto_profile_init(struct device *dev,
 
 unsigned int blk_crypto_keyslot_index(struct blk_crypto_keyslot *slot);
 
-blk_status_t blk_crypto_get_keyslot(struct blk_crypto_profile *profile,
-				    const struct blk_crypto_key *key,
-				    struct blk_crypto_keyslot **slot_ptr);
-
-void blk_crypto_put_keyslot(struct blk_crypto_keyslot *slot);
-
-bool __blk_crypto_cfg_supported(struct blk_crypto_profile *profile,
-				const struct blk_crypto_config *cfg);
-
-int __blk_crypto_evict_key(struct blk_crypto_profile *profile,
-			   const struct blk_crypto_key *key);
-
 void blk_crypto_reprogram_all_keys(struct blk_crypto_profile *profile);
 
 void blk_crypto_profile_destroy(struct blk_crypto_profile *profile);
-- 
cgit v1.2.3


From 2cd10a496a86787367716b684dadfecbb594095b Mon Sep 17 00:00:00 2001
From: Joel Colledge <joel.colledge@linbit.com>
Date: Tue, 22 Nov 2022 14:43:00 +0100
Subject: lru_cache: remove unused lc_private, lc_set, lc_index_of
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Signed-off-by: Joel Colledge <joel.colledge@linbit.com>
Signed-off-by: Christoph Böhmwalder <christoph.boehmwalder@linbit.com>
Link: https://lore.kernel.org/r/20221122134301.69258-4-christoph.boehmwalder@linbit.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/lru_cache.h | 3 ---
 1 file changed, 3 deletions(-)

(limited to 'include')

diff --git a/include/linux/lru_cache.h b/include/linux/lru_cache.h
index 07add7882a5d..c9afcdd9324c 100644
--- a/include/linux/lru_cache.h
+++ b/include/linux/lru_cache.h
@@ -199,7 +199,6 @@ struct lru_cache {
 	unsigned long flags;
 
 
-	void  *lc_private;
 	const char *name;
 
 	/* nr_elements there */
@@ -241,7 +240,6 @@ extern struct lru_cache *lc_create(const char *name, struct kmem_cache *cache,
 		unsigned e_count, size_t e_size, size_t e_off);
 extern void lc_reset(struct lru_cache *lc);
 extern void lc_destroy(struct lru_cache *lc);
-extern void lc_set(struct lru_cache *lc, unsigned int enr, int index);
 extern void lc_del(struct lru_cache *lc, struct lc_element *element);
 
 extern struct lc_element *lc_get_cumulative(struct lru_cache *lc, unsigned int enr);
@@ -297,6 +295,5 @@ extern bool lc_is_used(struct lru_cache *lc, unsigned int enr);
 	container_of(ptr, type, member)
 
 extern struct lc_element *lc_element_by_index(struct lru_cache *lc, unsigned i);
-extern unsigned int lc_index_of(struct lru_cache *lc, struct lc_element *e);
 
 #endif
-- 
cgit v1.2.3


From c62256dda37133a48d56cecc15e4a4d527d4cc46 Mon Sep 17 00:00:00 2001
From: Jens Axboe <axboe@kernel.dk>
Date: Wed, 30 Nov 2022 08:25:46 -0700
Subject: Revert "blk-cgroup: Flush stats at blkgs destruction path"

This reverts commit dae590a6c96c799434e0ff8156ef29b88c257e60.

We've had a few reports on this causing a crash at boot time, because
of a reference issue. While this problem seemginly did exist before
the patch and needs solving separately, this patch makes it a lot
easier to trigger.

Link: https://lore.kernel.org/linux-block/CA+QYu4oxiRKC6hJ7F27whXy-PRBx=Tvb+-7TQTONN8qTtV3aDA@mail.gmail.com/
Link: https://lore.kernel.org/linux-block/69af7ccb-6901-c84c-0e95-5682ccfb750c@acm.org/
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/cgroup.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h
index 6c4e66b3fa84..528bd44b59e2 100644
--- a/include/linux/cgroup.h
+++ b/include/linux/cgroup.h
@@ -766,7 +766,6 @@ void cgroup_rstat_flush(struct cgroup *cgrp);
 void cgroup_rstat_flush_irqsafe(struct cgroup *cgrp);
 void cgroup_rstat_flush_hold(struct cgroup *cgrp);
 void cgroup_rstat_flush_release(void);
-void cgroup_rstat_css_cpu_flush(struct cgroup_subsys_state *css, int cpu);
 
 /*
  * Basic resource stats.
-- 
cgit v1.2.3


From 2bd85221a625b316114bafaab527770b607095d3 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Mon, 14 Nov 2022 05:26:36 +0100
Subject: block: untangle request_queue refcounting from sysfs

The kobject embedded into the request_queue is used for the queue
directory in sysfs, but that is a child of the gendisks directory and is
intimately tied to it.  Move this kobject to the gendisk and use a
refcount_t in the request_queue for the actual request_queue refcounting
that is completely unrelated to the device model.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20221114042637.1009333-5-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 516e45246868..469299ea0660 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -155,6 +155,7 @@ struct gendisk {
 	unsigned open_partitions;	/* number of open partitions */
 
 	struct backing_dev_info	*bdi;
+	struct kobject queue_kobj;	/* the queue/ directory */
 	struct kobject *slave_dir;
 #ifdef CONFIG_BLOCK_HOLDER_DEPRECATED
 	struct list_head slave_bdevs;
@@ -430,10 +431,7 @@ struct request_queue {
 
 	struct gendisk		*disk;
 
-	/*
-	 * queue kobject
-	 */
-	struct kobject kobj;
+	refcount_t		refs;
 
 	/*
 	 * mq queue kobject
-- 
cgit v1.2.3


From 63c9eac4b6d75859703f5820414986edefb01210 Mon Sep 17 00:00:00 2001
From: Kemeng Shi <shikemeng@huawei.com>
Date: Tue, 18 Oct 2022 20:19:30 +0800
Subject: blk-iocost: Trace vtime_base_rate instead of vtime_rate

Since commit ac33e91e2daca ("blk-iocost: implement vtime loss
compensation") rename original vtime_rate to vtime_base_rate
and current vtime_rate is original vtime_rate with compensation.
The current rate showed in tracepoint is mixed with vtime_rate
and vtime_base_rate:
1) In function ioc_adjust_base_vrate, the first trace_iocost_ioc_vrate_adj
shows vtime_rate, the second trace_iocost_ioc_vrate_adj shows
vtime_base_rate.
2) In function iocg_activate shows vtime_rate by calling
TRACE_IOCG_PATH(iocg_activate...
3) In function ioc_check_iocgs shows vtime_rate by calling
TRACE_IOCG_PATH(iocg_idle...

Trace vtime_base_rate instead of vtime_rate as:
1) Before commit ac33e91e2daca ("blk-iocost: implement vtime loss
compensation"), the traced rate is without compensation, so still
show rate without compensation.
2) The vtime_base_rate is more stable while vtime_rate heavily depends on
excess budeget on current period which may change abruptly in next period.

Signed-off-by: Kemeng Shi <shikemeng@huawei.com>
Acked-by: Tejun Heo <tj@kernel.org>
Link: https://lore.kernel.org/r/20221018121932.10792-4-shikemeng@huawei.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/trace/events/iocost.h | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

(limited to 'include')

diff --git a/include/trace/events/iocost.h b/include/trace/events/iocost.h
index 6d1626e7a4ce..af8bfed528fc 100644
--- a/include/trace/events/iocost.h
+++ b/include/trace/events/iocost.h
@@ -38,7 +38,7 @@ DECLARE_EVENT_CLASS(iocost_iocg_state,
 		__assign_str(cgroup, path);
 		__entry->now = now->now;
 		__entry->vnow = now->vnow;
-		__entry->vrate = now->vrate;
+		__entry->vrate = iocg->ioc->vtime_base_rate;
 		__entry->last_period = last_period;
 		__entry->cur_period = cur_period;
 		__entry->vtime = vtime;
@@ -160,7 +160,7 @@ TRACE_EVENT(iocost_ioc_vrate_adj,
 
 	TP_fast_assign(
 		__assign_str(devname, ioc_name(ioc));
-		__entry->old_vrate = atomic64_read(&ioc->vtime_rate);
+		__entry->old_vrate = ioc->vtime_base_rate;
 		__entry->new_vrate = new_vrate;
 		__entry->busy_level = ioc->busy_level;
 		__entry->read_missed_ppm = missed_ppm[READ];
-- 
cgit v1.2.3


From f40eb99897af665f11858dd7b56edcb62c3f3c67 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Fri, 2 Dec 2022 19:27:58 +0100
Subject: pktcdvd: remove driver.

Way back in 2016 in commit 5a8b187c61e9 ("pktcdvd: mark as unmaintained
and deprecated") this driver was marked as "will be removed soon".  5
years seems long enough to have it stick around after that, so finally
remove the thing now.

Reported-by: Christoph Hellwig <hch@infradead.org>
Cc: Jens Axboe <axboe@kernel.dk>
Cc: Thomas Maier <balagi@justmail.de>
Cc: Peter Osterlund <petero2@telia.com>
Cc: linux-block@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20221202182758.1339039-1-gregkh@linuxfoundation.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/pktcdvd.h      | 197 -------------------------------------------
 include/uapi/linux/pktcdvd.h | 112 ------------------------
 2 files changed, 309 deletions(-)
 delete mode 100644 include/linux/pktcdvd.h
 delete mode 100644 include/uapi/linux/pktcdvd.h

(limited to 'include')

diff --git a/include/linux/pktcdvd.h b/include/linux/pktcdvd.h
deleted file mode 100644
index f9c5ac80d59b..000000000000
--- a/include/linux/pktcdvd.h
+++ /dev/null
@@ -1,197 +0,0 @@
-/*
- * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
- * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
- * DVD-RW devices.
- *
- */
-#ifndef __PKTCDVD_H
-#define __PKTCDVD_H
-
-#include <linux/blkdev.h>
-#include <linux/completion.h>
-#include <linux/cdrom.h>
-#include <linux/kobject.h>
-#include <linux/sysfs.h>
-#include <linux/mempool.h>
-#include <uapi/linux/pktcdvd.h>
-
-/* default bio write queue congestion marks */
-#define PKT_WRITE_CONGESTION_ON    10000
-#define PKT_WRITE_CONGESTION_OFF   9000
-
-
-struct packet_settings
-{
-	__u32			size;		/* packet size in (512 byte) sectors */
-	__u8			fp;		/* fixed packets */
-	__u8			link_loss;	/* the rest is specified
-						 * as per Mt Fuji */
-	__u8			write_type;
-	__u8			track_mode;
-	__u8			block_mode;
-};
-
-/*
- * Very crude stats for now
- */
-struct packet_stats
-{
-	unsigned long		pkt_started;
-	unsigned long		pkt_ended;
-	unsigned long		secs_w;
-	unsigned long		secs_rg;
-	unsigned long		secs_r;
-};
-
-struct packet_cdrw
-{
-	struct list_head	pkt_free_list;
-	struct list_head	pkt_active_list;
-	spinlock_t		active_list_lock; /* Serialize access to pkt_active_list */
-	struct task_struct	*thread;
-	atomic_t		pending_bios;
-};
-
-/*
- * Switch to high speed reading after reading this many kilobytes
- * with no interspersed writes.
- */
-#define HI_SPEED_SWITCH 512
-
-struct packet_iosched
-{
-	atomic_t		attention;	/* Set to non-zero when queue processing is needed */
-	int			writing;	/* Non-zero when writing, zero when reading */
-	spinlock_t		lock;		/* Protecting read/write queue manipulations */
-	struct bio_list		read_queue;
-	struct bio_list		write_queue;
-	sector_t		last_write;	/* The sector where the last write ended */
-	int			successive_reads;
-};
-
-/*
- * 32 buffers of 2048 bytes
- */
-#if (PAGE_SIZE % CD_FRAMESIZE) != 0
-#error "PAGE_SIZE must be a multiple of CD_FRAMESIZE"
-#endif
-#define PACKET_MAX_SIZE		128
-#define FRAMES_PER_PAGE		(PAGE_SIZE / CD_FRAMESIZE)
-#define PACKET_MAX_SECTORS	(PACKET_MAX_SIZE * CD_FRAMESIZE >> 9)
-
-enum packet_data_state {
-	PACKET_IDLE_STATE,			/* Not used at the moment */
-	PACKET_WAITING_STATE,			/* Waiting for more bios to arrive, so */
-						/* we don't have to do as much */
-						/* data gathering */
-	PACKET_READ_WAIT_STATE,			/* Waiting for reads to fill in holes */
-	PACKET_WRITE_WAIT_STATE,		/* Waiting for the write to complete */
-	PACKET_RECOVERY_STATE,			/* Recover after read/write errors */
-	PACKET_FINISHED_STATE,			/* After write has finished */
-
-	PACKET_NUM_STATES			/* Number of possible states */
-};
-
-/*
- * Information needed for writing a single packet
- */
-struct pktcdvd_device;
-
-struct packet_data
-{
-	struct list_head	list;
-
-	spinlock_t		lock;		/* Lock protecting state transitions and */
-						/* orig_bios list */
-
-	struct bio_list		orig_bios;	/* Original bios passed to pkt_make_request */
-						/* that will be handled by this packet */
-	int			write_size;	/* Total size of all bios in the orig_bios */
-						/* list, measured in number of frames */
-
-	struct bio		*w_bio;		/* The bio we will send to the real CD */
-						/* device once we have all data for the */
-						/* packet we are going to write */
-	sector_t		sector;		/* First sector in this packet */
-	int			frames;		/* Number of frames in this packet */
-
-	enum packet_data_state	state;		/* Current state */
-	atomic_t		run_sm;		/* Incremented whenever the state */
-						/* machine needs to be run */
-	long			sleep_time;	/* Set this to non-zero to make the state */
-						/* machine run after this many jiffies. */
-
-	atomic_t		io_wait;	/* Number of pending IO operations */
-	atomic_t		io_errors;	/* Number of read/write errors during IO */
-
-	struct bio		*r_bios[PACKET_MAX_SIZE]; /* bios to use during data gathering */
-	struct page		*pages[PACKET_MAX_SIZE / FRAMES_PER_PAGE];
-
-	int			cache_valid;	/* If non-zero, the data for the zone defined */
-						/* by the sector variable is completely cached */
-						/* in the pages[] vector. */
-
-	int			id;		/* ID number for debugging */
-	struct pktcdvd_device	*pd;
-};
-
-struct pkt_rb_node {
-	struct rb_node		rb_node;
-	struct bio		*bio;
-};
-
-struct packet_stacked_data
-{
-	struct bio		*bio;		/* Original read request bio */
-	struct pktcdvd_device	*pd;
-};
-#define PSD_POOL_SIZE		64
-
-struct pktcdvd_device
-{
-	struct block_device	*bdev;		/* dev attached */
-	dev_t			pkt_dev;	/* our dev */
-	char			name[20];
-	struct packet_settings	settings;
-	struct packet_stats	stats;
-	int			refcnt;		/* Open count */
-	int			write_speed;	/* current write speed, kB/s */
-	int			read_speed;	/* current read speed, kB/s */
-	unsigned long		offset;		/* start offset */
-	__u8			mode_offset;	/* 0 / 8 */
-	__u8			type;
-	unsigned long		flags;
-	__u16			mmc3_profile;
-	__u32			nwa;		/* next writable address */
-	__u32			lra;		/* last recorded address */
-	struct packet_cdrw	cdrw;
-	wait_queue_head_t	wqueue;
-
-	spinlock_t		lock;		/* Serialize access to bio_queue */
-	struct rb_root		bio_queue;	/* Work queue of bios we need to handle */
-	int			bio_queue_size;	/* Number of nodes in bio_queue */
-	bool			congested;	/* Someone is waiting for bio_queue_size
-						 * to drop. */
-	sector_t		current_sector;	/* Keep track of where the elevator is */
-	atomic_t		scan_queue;	/* Set to non-zero when pkt_handle_queue */
-						/* needs to be run. */
-	mempool_t		rb_pool;	/* mempool for pkt_rb_node allocations */
-
-	struct packet_iosched   iosched;
-	struct gendisk		*disk;
-
-	int			write_congestion_off;
-	int			write_congestion_on;
-
-	struct device		*dev;		/* sysfs pktcdvd[0-7] dev */
-
-	struct dentry		*dfs_d_root;	/* debugfs: devname directory */
-	struct dentry		*dfs_f_info;	/* debugfs: info file */
-};
-
-#endif /* __PKTCDVD_H */
diff --git a/include/uapi/linux/pktcdvd.h b/include/uapi/linux/pktcdvd.h
deleted file mode 100644
index 9cbb55d21c94..000000000000
--- a/include/uapi/linux/pktcdvd.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/* SPDX-License-Identifier: GPL-2.0 WITH Linux-syscall-note */
-/*
- * Copyright (C) 2000 Jens Axboe <axboe@suse.de>
- * Copyright (C) 2001-2004 Peter Osterlund <petero2@telia.com>
- *
- * May be copied or modified under the terms of the GNU General Public
- * License.  See linux/COPYING for more information.
- *
- * Packet writing layer for ATAPI and SCSI CD-R, CD-RW, DVD-R, and
- * DVD-RW devices.
- *
- */
-#ifndef _UAPI__PKTCDVD_H
-#define _UAPI__PKTCDVD_H
-
-#include <linux/types.h>
-
-/*
- * 1 for normal debug messages, 2 is very verbose. 0 to turn it off.
- */
-#define PACKET_DEBUG		1
-
-#define	MAX_WRITERS		8
-
-#define PKT_RB_POOL_SIZE	512
-
-/*
- * How long we should hold a non-full packet before starting data gathering.
- */
-#define PACKET_WAIT_TIME	(HZ * 5 / 1000)
-
-/*
- * use drive write caching -- we need deferred error handling to be
- * able to successfully recover with this option (drive will return good
- * status as soon as the cdb is validated).
- */
-#if defined(CONFIG_CDROM_PKTCDVD_WCACHE)
-#define USE_WCACHING		1
-#else
-#define USE_WCACHING		0
-#endif
-
-/*
- * No user-servicable parts beyond this point ->
- */
-
-/*
- * device types
- */
-#define PACKET_CDR		1
-#define	PACKET_CDRW		2
-#define PACKET_DVDR		3
-#define PACKET_DVDRW		4
-
-/*
- * flags
- */
-#define PACKET_WRITABLE		1	/* pd is writable */
-#define PACKET_NWA_VALID	2	/* next writable address valid */
-#define PACKET_LRA_VALID	3	/* last recorded address valid */
-#define PACKET_MERGE_SEGS	4	/* perform segment merging to keep */
-					/* underlying cdrom device happy */
-
-/*
- * Disc status -- from READ_DISC_INFO
- */
-#define PACKET_DISC_EMPTY	0
-#define PACKET_DISC_INCOMPLETE	1
-#define PACKET_DISC_COMPLETE	2
-#define PACKET_DISC_OTHER	3
-
-/*
- * write type, and corresponding data block type
- */
-#define PACKET_MODE1		1
-#define PACKET_MODE2		2
-#define PACKET_BLOCK_MODE1	8
-#define PACKET_BLOCK_MODE2	10
-
-/*
- * Last session/border status
- */
-#define PACKET_SESSION_EMPTY		0
-#define PACKET_SESSION_INCOMPLETE	1
-#define PACKET_SESSION_RESERVED		2
-#define PACKET_SESSION_COMPLETE		3
-
-#define PACKET_MCN			"4a656e734178626f65323030300000"
-
-#undef PACKET_USE_LS
-
-#define PKT_CTRL_CMD_SETUP	0
-#define PKT_CTRL_CMD_TEARDOWN	1
-#define PKT_CTRL_CMD_STATUS	2
-
-struct pkt_ctrl_command {
-	__u32 command;				/* in: Setup, teardown, status */
-	__u32 dev_index;			/* in/out: Device index */
-	__u32 dev;				/* in/out: Device nr for cdrw device */
-	__u32 pkt_dev;				/* in/out: Device nr for packet device */
-	__u32 num_devices;			/* out: Largest device index + 1 */
-	__u32 padding;				/* Not used */
-};
-
-/*
- * packet ioctls
- */
-#define PACKET_IOCTL_MAGIC	('X')
-#define PACKET_CTRL_CMD		_IOWR(PACKET_IOCTL_MAGIC, 1, struct pkt_ctrl_command)
-
-
-#endif /* _UAPI__PKTCDVD_H */
-- 
cgit v1.2.3


From 85d6ce58e493ac8b7122e2fbe3f41b94d6ebdc11 Mon Sep 17 00:00:00 2001
From: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Date: Sat, 3 Dec 2022 15:07:47 +0100
Subject: block: remove devnode callback from struct block_device_operations

With the removal of the pktcdvd driver, there are no in-kernel users of
the devnode callback in struct block_device_operations, so it can be
safely removed.  If it is needed for new block drivers in the future, it
can be brought back.

Cc: Jens Axboe <axboe@kernel.dk>
Cc: linux-block@vger.kernel.org
Signed-off-by: Greg Kroah-Hartman <gregkh@linuxfoundation.org>
Link: https://lore.kernel.org/r/20221203140747.1942969-1-gregkh@linuxfoundation.org
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blkdev.h | 1 -
 1 file changed, 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 469299ea0660..2db2ad72af0f 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -1395,7 +1395,6 @@ struct block_device_operations {
 	void (*swap_slot_free_notify) (struct block_device *, unsigned long);
 	int (*report_zones)(struct gendisk *, sector_t sector,
 			unsigned int nr_zones, report_zones_cb cb, void *data);
-	char *(*devnode)(struct gendisk *disk, umode_t *mode);
 	/* returns the length of the identifier or a negative errno: */
 	int (*get_unique_id)(struct gendisk *disk, u8 id[16],
 			enum blk_unique_id id_type);
-- 
cgit v1.2.3


From db1c7d77976775483a8ef240b4c705f113e13ea1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 Dec 2022 15:44:07 +0100
Subject: block: bio_copy_data_iter

With the pktcdvdv removal, bio_copy_data_iter is unused now.  Fold the
logic into bio_copy_data and remove the separate lower level function.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/20221206144407.722049-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/bio.h | 2 --
 1 file changed, 2 deletions(-)

(limited to 'include')

diff --git a/include/linux/bio.h b/include/linux/bio.h
index 2c5806997bbf..b231a665682a 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -475,8 +475,6 @@ void __bio_release_pages(struct bio *bio, bool mark_dirty);
 extern void bio_set_pages_dirty(struct bio *bio);
 extern void bio_check_pages_dirty(struct bio *bio);
 
-extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter,
-			       struct bio *src, struct bvec_iter *src_iter);
 extern void bio_copy_data(struct bio *dst, struct bio *src);
 extern void bio_free_pages(struct bio *bio);
 void guard_bio_eod(struct bio *bio);
-- 
cgit v1.2.3


From c34b7ac65087554627f4840f4ecd6f2107a68fd1 Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Tue, 6 Dec 2022 15:40:57 +0100
Subject: block: remove bio_set_op_attrs

This macro is obsolete, so replace the last few uses with open coded
bi_opf assignments.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Acked-by: Coly Li <colyli@suse.de <mailto:colyli@suse.de>>
Reviewed-by: Johannes Thumshirn <johannes.thumshirn@wdc.com>
Reviewed-by: Bart Van Assche <bvanassche@acm.org>
Link: https://lore.kernel.org/r/20221206144057.720846-1-hch@lst.de
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/blk_types.h | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'include')

diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index e0b098089ef2..99be590f952f 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -472,13 +472,6 @@ static inline enum req_op bio_op(const struct bio *bio)
 	return bio->bi_opf & REQ_OP_MASK;
 }
 
-/* obsolete, don't use in new code */
-static inline void bio_set_op_attrs(struct bio *bio, enum req_op op,
-				    blk_opf_t op_flags)
-{
-	bio->bi_opf = op | op_flags;
-}
-
 static inline bool op_is_write(blk_opf_t op)
 {
 	return !!(op & (__force blk_opf_t)1);
-- 
cgit v1.2.3


From c1f480b2d092960ecf8bb0bd1f27982c33ada42a Mon Sep 17 00:00:00 2001
From: Luca Boccassi <bluca@debian.org>
Date: Tue, 6 Dec 2022 09:29:13 +0000
Subject: sed-opal: allow using IOC_OPAL_SAVE for locking too
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Usually when closing a crypto device (eg: dm-crypt with LUKS) the
volume key is not required, as it requires root privileges anyway, and
root can deny access to a disk in many ways regardless. Requiring the
volume key to lock the device is a peculiarity of the OPAL
specification.

Given we might already have saved the key if the user requested it via
the 'IOC_OPAL_SAVE' ioctl, we can use that key to lock the device if no
key was provided here and the locking range matches, and the user sets
the appropriate flag with 'IOC_OPAL_SAVE'. This allows integrating OPAL
with tools and libraries that are used to the common behaviour and do
not ask for the volume key when closing a device.

Callers can always pass a non-zero key and it will be used regardless,
as before.

Suggested-by: Štěpán Horáček <stepan.horacek@gmail.com>
Signed-off-by: Luca Boccassi <bluca@debian.org>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Reviewed-by: Christian Brauner <brauner@kernel.org>
Link: https://lore.kernel.org/r/20221206092913.4625-1-luca.boccassi@gmail.com
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/uapi/linux/sed-opal.h | 8 +++++++-
 1 file changed, 7 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h
index 2573772e2fb3..1fed3c9294fc 100644
--- a/include/uapi/linux/sed-opal.h
+++ b/include/uapi/linux/sed-opal.h
@@ -44,6 +44,11 @@ enum opal_lock_state {
 	OPAL_LK = 0x04, /* 0100 */
 };
 
+enum opal_lock_flags {
+	/* IOC_OPAL_SAVE will also store the provided key for locking */
+	OPAL_SAVE_FOR_LOCK = 0x01,
+};
+
 struct opal_key {
 	__u8 lr;
 	__u8 key_len;
@@ -76,7 +81,8 @@ struct opal_user_lr_setup {
 struct opal_lock_unlock {
 	struct opal_session_info session;
 	__u32 l_state;
-	__u8 __align[4];
+	__u16 flags;
+	__u8 __align[2];
 };
 
 struct opal_new_pw {
-- 
cgit v1.2.3


From 56fb8d90031f71fa8af48fdff8498b9263b9c759 Mon Sep 17 00:00:00 2001
From: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Date: Mon, 5 Dec 2022 20:16:48 +0100
Subject: block: sed-opal: Don't include <linux/kernel.h>

There is no need to include <linux/kernel.h> here.

Prefer the less invasive <linux/types.h> and <linux/compiler_types.h>
which are needed in this .h file itself.

Signed-off-by: Christophe JAILLET <christophe.jaillet@wanadoo.fr>
Reviewed-by: Christoph Hellwig <hch@lst.de>
Link: https://lore.kernel.org/r/c1d479b39e30fe70c4579a1af035d4db49421f56.1670069909.git.christophe.jaillet@wanadoo.fr
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 include/linux/sed-opal.h | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

(limited to 'include')

diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h
index 6f837bb6c715..31ac562a17d7 100644
--- a/include/linux/sed-opal.h
+++ b/include/linux/sed-opal.h
@@ -11,7 +11,8 @@
 #define LINUX_OPAL_H
 
 #include <uapi/linux/sed-opal.h>
-#include <linux/kernel.h>
+#include <linux/compiler_types.h>
+#include <linux/types.h>
 
 struct opal_dev;
 
-- 
cgit v1.2.3