summaryrefslogtreecommitdiff
path: root/include/linux
diff options
context:
space:
mode:
Diffstat (limited to 'include/linux')
-rw-r--r--include/linux/backing-dev-defs.h4
-rw-r--r--include/linux/bio-integrity.h7
-rw-r--r--include/linux/bio.h2
-rw-r--r--include/linux/blk-integrity.h19
-rw-r--r--include/linux/blk-mq-dma.h28
-rw-r--r--include/linux/blk-mq.h30
-rw-r--r--include/linux/blk_types.h14
-rw-r--r--include/linux/blkdev.h62
-rw-r--r--include/linux/blktrace_api.h3
-rw-r--r--include/linux/device-mapper.h10
-rw-r--r--include/linux/kfifo.h34
-rw-r--r--include/linux/sbitmap.h6
12 files changed, 146 insertions, 73 deletions
diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h
index 610ef62b6a32..0217c1073735 100644
--- a/include/linux/backing-dev-defs.h
+++ b/include/linux/backing-dev-defs.h
@@ -170,7 +170,9 @@ struct backing_dev_info {
u64 id;
struct rb_node rb_node; /* keyed by ->id */
struct list_head bdi_list;
- unsigned long ra_pages; /* max readahead in PAGE_SIZE units */
+ /* max readahead in PAGE_SIZE units */
+ unsigned long __data_racy ra_pages;
+
unsigned long io_pages; /* max allowed IO size */
struct kref refcnt; /* Reference counter for the structure */
diff --git a/include/linux/bio-integrity.h b/include/linux/bio-integrity.h
index 851254f36eb3..21e4652dcfd2 100644
--- a/include/linux/bio-integrity.h
+++ b/include/linux/bio-integrity.h
@@ -13,7 +13,8 @@ enum bip_flags {
BIP_CHECK_GUARD = 1 << 5, /* guard check */
BIP_CHECK_REFTAG = 1 << 6, /* reftag check */
BIP_CHECK_APPTAG = 1 << 7, /* apptag check */
- BIP_P2P_DMA = 1 << 8, /* using P2P address */
+
+ BIP_MEMPOOL = 1 << 15, /* buffer backed by mempool */
};
struct bio_integrity_payload {
@@ -140,4 +141,8 @@ static inline int bio_integrity_add_page(struct bio *bio, struct page *page,
return 0;
}
#endif /* CONFIG_BLK_DEV_INTEGRITY */
+
+void bio_integrity_alloc_buf(struct bio *bio, bool zero_buffer);
+void bio_integrity_free_buf(struct bio_integrity_payload *bip);
+
#endif /* _LINUX_BIO_INTEGRITY_H */
diff --git a/include/linux/bio.h b/include/linux/bio.h
index 16c1c85613b7..ad2d57908c1c 100644
--- a/include/linux/bio.h
+++ b/include/linux/bio.h
@@ -324,6 +324,8 @@ extern struct bio *bio_split(struct bio *bio, int sectors,
gfp_t gfp, struct bio_set *bs);
int bio_split_io_at(struct bio *bio, const struct queue_limits *lim,
unsigned *segs, unsigned max_bytes, unsigned len_align);
+u8 bio_seg_gap(struct request_queue *q, struct bio *prev, struct bio *next,
+ u8 gaps_bit);
/**
* bio_next_split - get next @sectors from a bio, splitting if necessary
diff --git a/include/linux/blk-integrity.h b/include/linux/blk-integrity.h
index b659373788f6..a6b84206eb94 100644
--- a/include/linux/blk-integrity.h
+++ b/include/linux/blk-integrity.h
@@ -8,6 +8,11 @@
struct request;
+/*
+ * Maximum contiguous integrity buffer allocation.
+ */
+#define BLK_INTEGRITY_MAX_SIZE SZ_2M
+
enum blk_integrity_flags {
BLK_INTEGRITY_NOVERIFY = 1 << 0,
BLK_INTEGRITY_NOGENERATE = 1 << 1,
@@ -28,14 +33,6 @@ static inline bool queue_limits_stack_integrity_bdev(struct queue_limits *t,
#ifdef CONFIG_BLK_DEV_INTEGRITY
int blk_rq_map_integrity_sg(struct request *, struct scatterlist *);
-static inline bool blk_rq_integrity_dma_unmap(struct request *req,
- struct device *dma_dev, struct dma_iova_state *state,
- size_t mapped_len)
-{
- return blk_dma_unmap(req, dma_dev, state, mapped_len,
- bio_integrity(req->bio)->bip_flags & BIP_P2P_DMA);
-}
-
int blk_rq_count_integrity_sg(struct request_queue *, struct bio *);
int blk_rq_integrity_map_user(struct request *rq, void __user *ubuf,
ssize_t bytes);
@@ -124,12 +121,6 @@ static inline int blk_rq_map_integrity_sg(struct request *q,
{
return 0;
}
-static inline bool blk_rq_integrity_dma_unmap(struct request *req,
- struct device *dma_dev, struct dma_iova_state *state,
- size_t mapped_len)
-{
- return false;
-}
static inline int blk_rq_integrity_map_user(struct request *rq,
void __user *ubuf,
ssize_t bytes)
diff --git a/include/linux/blk-mq-dma.h b/include/linux/blk-mq-dma.h
index 51829958d872..cb88fc791fbd 100644
--- a/include/linux/blk-mq-dma.h
+++ b/include/linux/blk-mq-dma.h
@@ -16,13 +16,13 @@ struct blk_dma_iter {
/* Output address range for this iteration */
dma_addr_t addr;
u32 len;
+ struct pci_p2pdma_map_state p2pdma;
/* Status code. Only valid when blk_rq_dma_map_iter_* returned false */
blk_status_t status;
/* Internal to blk_rq_dma_map_iter_* */
struct blk_map_iter iter;
- struct pci_p2pdma_map_state p2pdma;
};
bool blk_rq_dma_map_iter_start(struct request *req, struct device *dma_dev,
@@ -43,36 +43,34 @@ static inline bool blk_rq_dma_map_coalesce(struct dma_iova_state *state)
}
/**
- * blk_dma_unmap - try to DMA unmap a request
+ * blk_rq_dma_unmap - try to DMA unmap a request
* @req: request to unmap
* @dma_dev: device to unmap from
* @state: DMA IOVA state
* @mapped_len: number of bytes to unmap
- * @is_p2p: true if mapped with PCI_P2PDMA_MAP_BUS_ADDR
+ * @map: peer-to-peer mapping type
*
* Returns %false if the callers need to manually unmap every DMA segment
* mapped using @iter or %true if no work is left to be done.
*/
-static inline bool blk_dma_unmap(struct request *req, struct device *dma_dev,
- struct dma_iova_state *state, size_t mapped_len, bool is_p2p)
+static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
+ struct dma_iova_state *state, size_t mapped_len,
+ enum pci_p2pdma_map_type map)
{
- if (is_p2p)
+ if (map == PCI_P2PDMA_MAP_BUS_ADDR)
return true;
if (dma_use_iova(state)) {
+ unsigned int attrs = 0;
+
+ if (map == PCI_P2PDMA_MAP_THRU_HOST_BRIDGE)
+ attrs |= DMA_ATTR_MMIO;
+
dma_iova_destroy(dma_dev, state, mapped_len, rq_dma_dir(req),
- 0);
+ attrs);
return true;
}
return !dma_need_unmap(dma_dev);
}
-
-static inline bool blk_rq_dma_unmap(struct request *req, struct device *dma_dev,
- struct dma_iova_state *state, size_t mapped_len)
-{
- return blk_dma_unmap(req, dma_dev, state, mapped_len,
- req->cmd_flags & REQ_P2PDMA);
-}
-
#endif /* BLK_MQ_DMA_H */
diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h
index b25d12545f46..eb7254b3dddd 100644
--- a/include/linux/blk-mq.h
+++ b/include/linux/blk-mq.h
@@ -152,6 +152,14 @@ struct request {
unsigned short nr_phys_segments;
unsigned short nr_integrity_segments;
+ /*
+ * The lowest set bit for address gaps between physical segments. This
+ * provides information necessary for dma optimization opprotunities,
+ * like for testing if the segments can be coalesced against the
+ * device's iommu granule.
+ */
+ unsigned char phys_gap_bit;
+
#ifdef CONFIG_BLK_INLINE_ENCRYPTION
struct bio_crypt_ctx *crypt_ctx;
struct blk_crypto_keyslot *crypt_keyslot;
@@ -208,6 +216,14 @@ struct request {
void *end_io_data;
};
+/*
+ * Returns a mask with all bits starting at req->phys_gap_bit set to 1.
+ */
+static inline unsigned long req_phys_gap_mask(const struct request *req)
+{
+ return ~(((1 << req->phys_gap_bit) >> 1) - 1);
+}
+
static inline enum req_op req_op(const struct request *req)
{
return req->cmd_flags & REQ_OP_MASK;
@@ -999,8 +1015,20 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq)
return rq + 1;
}
+static inline struct blk_mq_hw_ctx *queue_hctx(struct request_queue *q, int id)
+{
+ struct blk_mq_hw_ctx *hctx;
+
+ rcu_read_lock();
+ hctx = rcu_dereference(q->queue_hw_ctx)[id];
+ rcu_read_unlock();
+
+ return hctx;
+}
+
#define queue_for_each_hw_ctx(q, hctx, i) \
- xa_for_each(&(q)->hctx_table, (i), (hctx))
+ for ((i) = 0; (i) < (q)->nr_hw_queues && \
+ ({ hctx = queue_hctx((q), i); 1; }); (i)++)
#define hctx_for_each_ctx(hctx, ctx, i) \
for ((i) = 0; (i) < (hctx)->nr_ctx && \
diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h
index 44c30183ecc3..cbbcb9051ec3 100644
--- a/include/linux/blk_types.h
+++ b/include/linux/blk_types.h
@@ -218,6 +218,18 @@ struct bio {
enum rw_hint bi_write_hint;
u8 bi_write_stream;
blk_status_t bi_status;
+
+ /*
+ * The bvec gap bit indicates the lowest set bit in any address offset
+ * between all bi_io_vecs. This field is initialized only after the bio
+ * is split to the hardware limits (see bio_split_io_at()). The value
+ * may be used to consider DMA optimization when performing that
+ * mapping. The value is compared to a power of two mask where the
+ * result depends on any bit set within the mask, so saving the lowest
+ * bit is sufficient to know if any segment gap collides with the mask.
+ */
+ u8 bi_bvec_gap_bit;
+
atomic_t __bi_remaining;
struct bvec_iter bi_iter;
@@ -381,7 +393,6 @@ enum req_flag_bits {
__REQ_DRV, /* for driver use */
__REQ_FS_PRIVATE, /* for file system (submitter) use */
__REQ_ATOMIC, /* for atomic write operations */
- __REQ_P2PDMA, /* contains P2P DMA pages */
/*
* Command specific flags, keep last:
*/
@@ -414,7 +425,6 @@ enum req_flag_bits {
#define REQ_DRV (__force blk_opf_t)(1ULL << __REQ_DRV)
#define REQ_FS_PRIVATE (__force blk_opf_t)(1ULL << __REQ_FS_PRIVATE)
#define REQ_ATOMIC (__force blk_opf_t)(1ULL << __REQ_ATOMIC)
-#define REQ_P2PDMA (__force blk_opf_t)(1ULL << __REQ_P2PDMA)
#define REQ_NOUNMAP (__force blk_opf_t)(1ULL << __REQ_NOUNMAP)
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index 70b671a9a7f7..72e34acd439c 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -38,6 +38,7 @@ struct blk_flush_queue;
struct kiocb;
struct pr_ops;
struct rq_qos;
+struct blk_report_zones_args;
struct blk_queue_stats;
struct blk_stat_callback;
struct blk_crypto_profile;
@@ -172,6 +173,7 @@ struct gendisk {
#define GD_ADDED 4
#define GD_SUPPRESS_PART_SCAN 5
#define GD_OWNS_QUEUE 6
+#define GD_ZONE_APPEND_USED 7
struct mutex open_mutex; /* open/close mutex */
unsigned open_partitions; /* number of open partitions */
@@ -195,7 +197,7 @@ struct gendisk {
unsigned int nr_zones;
unsigned int zone_capacity;
unsigned int last_zone_capacity;
- unsigned long __rcu *conv_zones_bitmap;
+ u8 __rcu *zones_cond;
unsigned int zone_wplugs_hash_bits;
atomic_t nr_zone_wplugs;
spinlock_t zone_wplugs_lock;
@@ -378,7 +380,7 @@ struct queue_limits {
unsigned int max_sectors;
unsigned int max_user_sectors;
unsigned int max_segment_size;
- unsigned int min_segment_size;
+ unsigned int max_fast_segment_size;
unsigned int physical_block_size;
unsigned int logical_block_size;
unsigned int alignment_offset;
@@ -432,9 +434,17 @@ struct queue_limits {
typedef int (*report_zones_cb)(struct blk_zone *zone, unsigned int idx,
void *data);
+int disk_report_zone(struct gendisk *disk, struct blk_zone *zone,
+ unsigned int idx, struct blk_report_zones_args *args);
+
+int blkdev_get_zone_info(struct block_device *bdev, sector_t sector,
+ struct blk_zone *zone);
+
#define BLK_ALL_ZONES ((unsigned int)-1)
int blkdev_report_zones(struct block_device *bdev, sector_t sector,
unsigned int nr_zones, report_zones_cb cb, void *data);
+int blkdev_report_zones_cached(struct block_device *bdev, sector_t sector,
+ unsigned int nr_zones, report_zones_cb cb, void *data);
int blkdev_zone_mgmt(struct block_device *bdev, enum req_op op,
sector_t sectors, sector_t nr_sectors);
int blk_revalidate_disk_zones(struct gendisk *disk);
@@ -485,7 +495,7 @@ struct request_queue {
*/
unsigned long queue_flags;
- unsigned int rq_timeout;
+ unsigned int __data_racy rq_timeout;
unsigned int queue_depth;
@@ -493,7 +503,7 @@ struct request_queue {
/* hw dispatch queues */
unsigned int nr_hw_queues;
- struct xarray hctx_table;
+ struct blk_mq_hw_ctx * __rcu *queue_hw_ctx;
struct percpu_ref q_usage_counter;
struct lock_class_key io_lock_cls_key;
@@ -921,12 +931,20 @@ static inline unsigned int bdev_zone_capacity(struct block_device *bdev,
{
return disk_zone_capacity(bdev->bd_disk, pos);
}
+
+bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector);
+
#else /* CONFIG_BLK_DEV_ZONED */
static inline unsigned int disk_nr_zones(struct gendisk *disk)
{
return 0;
}
+static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
+{
+ return false;
+}
+
static inline bool bio_needs_zone_write_plugging(struct bio *bio)
{
return false;
@@ -1504,6 +1522,12 @@ static inline sector_t bdev_zone_sectors(struct block_device *bdev)
return q->limits.chunk_sectors;
}
+static inline sector_t bdev_zone_start(struct block_device *bdev,
+ sector_t sector)
+{
+ return sector & ~(bdev_zone_sectors(bdev) - 1);
+}
+
static inline sector_t bdev_offset_from_zone_start(struct block_device *bdev,
sector_t sector)
{
@@ -1529,33 +1553,6 @@ static inline bool bdev_is_zone_aligned(struct block_device *bdev,
return bdev_is_zone_start(bdev, sector);
}
-/**
- * bdev_zone_is_seq - check if a sector belongs to a sequential write zone
- * @bdev: block device to check
- * @sector: sector number
- *
- * Check if @sector on @bdev is contained in a sequential write required zone.
- */
-static inline bool bdev_zone_is_seq(struct block_device *bdev, sector_t sector)
-{
- bool is_seq = false;
-
-#if IS_ENABLED(CONFIG_BLK_DEV_ZONED)
- if (bdev_is_zoned(bdev)) {
- struct gendisk *disk = bdev->bd_disk;
- unsigned long *bitmap;
-
- rcu_read_lock();
- bitmap = rcu_dereference(disk->conv_zones_bitmap);
- is_seq = !bitmap ||
- !test_bit(disk_zone_no(disk, sector), bitmap);
- rcu_read_unlock();
- }
-#endif
-
- return is_seq;
-}
-
int blk_zone_issue_zeroout(struct block_device *bdev, sector_t sector,
sector_t nr_sects, gfp_t gfp_mask);
@@ -1662,7 +1659,8 @@ struct block_device_operations {
/* this callback is with swap_lock and sometimes page table lock held */
void (*swap_slot_free_notify) (struct block_device *, unsigned long);
int (*report_zones)(struct gendisk *, sector_t sector,
- unsigned int nr_zones, report_zones_cb cb, void *data);
+ unsigned int nr_zones,
+ struct blk_report_zones_args *args);
char *(*devnode)(struct gendisk *disk, umode_t *mode);
/* returns the length of the identifier or a negative errno: */
int (*get_unique_id)(struct gendisk *disk, u8 id[16],
diff --git a/include/linux/blktrace_api.h b/include/linux/blktrace_api.h
index 122c62e561fc..05c8754456aa 100644
--- a/include/linux/blktrace_api.h
+++ b/include/linux/blktrace_api.h
@@ -14,11 +14,12 @@
#include <linux/sysfs.h>
struct blk_trace {
+ int version;
int trace_state;
struct rchan *rchan;
unsigned long __percpu *sequence;
unsigned char __percpu *msg_data;
- u16 act_mask;
+ u64 act_mask;
u64 start_lba;
u64 end_lba;
u32 pid;
diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h
index 84fdc3a6a19a..38f625af6ab4 100644
--- a/include/linux/device-mapper.h
+++ b/include/linux/device-mapper.h
@@ -538,12 +538,18 @@ void dm_submit_bio_remap(struct bio *clone, struct bio *tgt_clone);
#ifdef CONFIG_BLK_DEV_ZONED
struct dm_report_zones_args {
struct dm_target *tgt;
+ struct gendisk *disk;
sector_t next_sector;
- void *orig_data;
- report_zones_cb orig_cb;
unsigned int zone_idx;
+ /* for block layer ->report_zones */
+ struct blk_report_zones_args *rep_args;
+
+ /* for internal users */
+ report_zones_cb cb;
+ void *data;
+
/* must be filled by ->report_zones before calling dm_report_zones_cb */
sector_t start;
};
diff --git a/include/linux/kfifo.h b/include/linux/kfifo.h
index fd743d4c4b4b..8b81ac74829c 100644
--- a/include/linux/kfifo.h
+++ b/include/linux/kfifo.h
@@ -370,6 +370,30 @@ __kfifo_int_must_check_helper( \
)
/**
+ * kfifo_alloc_node - dynamically allocates a new fifo buffer on a NUMA node
+ * @fifo: pointer to the fifo
+ * @size: the number of elements in the fifo, this must be a power of 2
+ * @gfp_mask: get_free_pages mask, passed to kmalloc()
+ * @node: NUMA node to allocate memory on
+ *
+ * This macro dynamically allocates a new fifo buffer with NUMA node awareness.
+ *
+ * The number of elements will be rounded-up to a power of 2.
+ * The fifo will be release with kfifo_free().
+ * Return 0 if no error, otherwise an error code.
+ */
+#define kfifo_alloc_node(fifo, size, gfp_mask, node) \
+__kfifo_int_must_check_helper( \
+({ \
+ typeof((fifo) + 1) __tmp = (fifo); \
+ struct __kfifo *__kfifo = &__tmp->kfifo; \
+ __is_kfifo_ptr(__tmp) ? \
+ __kfifo_alloc_node(__kfifo, size, sizeof(*__tmp->type), gfp_mask, node) : \
+ -EINVAL; \
+}) \
+)
+
+/**
* kfifo_free - frees the fifo
* @fifo: the fifo to be freed
*/
@@ -899,8 +923,14 @@ __kfifo_uint_must_check_helper( \
)
-extern int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
- size_t esize, gfp_t gfp_mask);
+extern int __kfifo_alloc_node(struct __kfifo *fifo, unsigned int size,
+ size_t esize, gfp_t gfp_mask, int node);
+
+static inline int __kfifo_alloc(struct __kfifo *fifo, unsigned int size,
+ size_t esize, gfp_t gfp_mask)
+{
+ return __kfifo_alloc_node(fifo, size, esize, gfp_mask, NUMA_NO_NODE);
+}
extern void __kfifo_free(struct __kfifo *fifo);
diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h
index ffb9907c7070..cc7ad189caa5 100644
--- a/include/linux/sbitmap.h
+++ b/include/linux/sbitmap.h
@@ -75,7 +75,7 @@ struct sbitmap {
*/
struct sbitmap_word *map;
- /*
+ /**
* @alloc_hint: Cache of last successfully allocated or freed bit.
*
* This is per-cpu, which allows multiple users to stick to different
@@ -128,7 +128,7 @@ struct sbitmap_queue {
*/
struct sbq_wait_state *ws;
- /*
+ /**
* @ws_active: count of currently active ws waitqueues
*/
atomic_t ws_active;
@@ -547,6 +547,8 @@ static inline void sbq_index_atomic_inc(atomic_t *index)
* sbitmap_queue.
* @sbq: Bitmap queue to wait on.
* @wait_index: A counter per "user" of @sbq.
+ *
+ * Return: Next wait queue to be used
*/
static inline struct sbq_wait_state *sbq_wait_ptr(struct sbitmap_queue *sbq,
atomic_t *wait_index)