From 2f578aaf51624aa6fcff041fc7dc5c2d4dfa447f Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Sun, 9 Jun 2019 05:15:51 +0900 Subject: block: move tag field position in struct request __data_len and __sector are internal fields which should not be accessed directly in driver-level like the comment above it. But, tag field can be accessed by driver level directly so that we need to make the comment right by moving it to some other place. Cc: Jens Axboe Cc: linux-block@vger.kernel.org Signed-off-by: Minwoo Im Reviewed-by: Chaitanya Kulkarni Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 592669bcc536..90e6914bea0c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -137,11 +137,11 @@ struct request { unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; + int tag; int internal_tag; /* the following two fields are internal, NEVER access directly */ unsigned int __data_len; /* total data len */ - int tag; sector_t __sector; /* sector cursor */ struct bio *bio; -- cgit v1.2.3 From 3a211b71529fdd0a89095b18fb19155db0c8fb5d Mon Sep 17 00:00:00 2001 From: Pavel Begunkov Date: Thu, 23 May 2019 18:43:11 +0300 Subject: blk-core: Remove blk_end_request*() declarations Commit a1ce35fa49852db60fc6e268 ("block: remove dead elevator code") deleted blk_end_request() and friends, but some declaration are still left. Purge them. Signed-off-by: Pavel Begunkov Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 90e6914bea0c..ad49a775c54f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1026,21 +1026,9 @@ void blk_steal_bios(struct bio_list *list, struct request *rq); * * blk_update_request() completes given number of bytes and updates * the request without completing it. - * - * blk_end_request() and friends. __blk_end_request() must be called - * with the request queue spinlock acquired. - * - * Several drivers define their own end_request and call - * blk_end_request() for parts of the original function. - * This prevents code duplication in drivers. */ extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void blk_end_request_all(struct request *rq, blk_status_t error); -extern bool __blk_end_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes); -extern void __blk_end_request_all(struct request *rq, blk_status_t error); -extern bool __blk_end_request_cur(struct request *rq, blk_status_t error); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -- cgit v1.2.3 From f924cddebc900f7cb10d5538d69523e558fa681c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:29:00 +0200 Subject: block: remove blk_init_request_from_bio MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit lightnvm should have never used this function, as it is sending passthrough requests, so switch it to blk_rq_append_bio like all the other passthrough request users. Inline blk_init_request_from_bio into the only remaining caller. Reviewed-by: Hannes Reinecke Reviewed-by: Minwoo Im Reviewed-by: Javier González Reviewed-by: Matias Bjørling Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index ad49a775c54f..2d4dfe82767a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -828,7 +828,6 @@ extern void blk_unregister_queue(struct gendisk *disk); extern blk_qc_t generic_make_request(struct bio *bio); extern blk_qc_t direct_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); -extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, blk_mq_req_flags_t flags); -- cgit v1.2.3 From 14ccb66b3f585b2bc21e7256c96090abed5a512c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:29:01 +0200 Subject: block: remove the bi_phys_segments field in struct bio We only need the number of segments in the blk-mq submission path. Remove the field from struct bio, and return it from a variant of blk_queue_split instead of that it can passed as an argument to those functions that need the value. This also means we stop recounting segments except for cloning and partial segments. To keep the number of arguments in this how path down remove pointless struct request_queue arguments from any of the functions that had it and grew a nr_segs argument. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 - include/linux/blk-mq.h | 2 +- include/linux/blk_types.h | 6 ------ include/linux/blkdev.h | 1 - include/linux/elevator.h | 2 +- 5 files changed, 2 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 0f23b5682640..ee11c4324751 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -408,7 +408,6 @@ static inline void bio_wouldblock_error(struct bio *bio) } struct request_queue; -extern int bio_phys_segments(struct request_queue *, struct bio *); extern int submit_bio_wait(struct bio *bio); extern void bio_advance(struct bio *, unsigned); diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 15d1aa53d96c..3fa1fa59f9b2 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -306,7 +306,7 @@ void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs bool blk_mq_complete_request(struct request *rq); void blk_mq_complete_request_sync(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, - struct bio *bio); + struct bio *bio, unsigned int nr_segs); bool blk_mq_queue_stopped(struct request_queue *q); void blk_mq_stop_hw_queue(struct blk_mq_hw_ctx *hctx); void blk_mq_start_hw_queue(struct blk_mq_hw_ctx *hctx); diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 95202f80676c..6a53799c3fe2 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -154,11 +154,6 @@ struct bio { blk_status_t bi_status; u8 bi_partno; - /* Number of segments in this BIO after - * physical address coalescing is performed. - */ - unsigned int bi_phys_segments; - struct bvec_iter bi_iter; atomic_t __bi_remaining; @@ -210,7 +205,6 @@ struct bio { */ enum { BIO_NO_PAGE_REF, /* don't put release vec pages */ - BIO_SEG_VALID, /* bi_phys_segments valid */ BIO_CLONED, /* doesn't own data */ BIO_BOUNCED, /* bio is a bounce bio */ BIO_USER_MAPPED, /* contains user pages */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2d4dfe82767a..d5d3bb45dfb6 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -841,7 +841,6 @@ extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); extern void blk_queue_split(struct request_queue *, struct bio **); -extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); extern int scsi_cmd_blk_ioctl(struct block_device *, fmode_t, unsigned int, void __user *); diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 6e8bc53740f0..169bb2e02516 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -34,7 +34,7 @@ struct elevator_mq_ops { void (*depth_updated)(struct blk_mq_hw_ctx *); bool (*allow_merge)(struct request_queue *, struct request *, struct bio *); - bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *); + bool (*bio_merge)(struct blk_mq_hw_ctx *, struct bio *, unsigned int); int (*request_merge)(struct request_queue *q, struct request **, struct bio *); void (*request_merged)(struct request_queue *, struct request *, enum elv_merge); void (*requests_merged)(struct request_queue *, struct request *, struct request *); -- cgit v1.2.3 From 239eeb085753d4356f731a773f363eb5bed4fe81 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:19 +0200 Subject: blk-cgroup: factor out a helper to read rwstat counter Trying to break up the crazy statements to something readable. Also switch to an unsigned counter as it can't ever turn negative. Reviewed-by: Chaitanya Kulkarni Acked-by: Tejun Heo Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 76c61318fda5..06236f56a840 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -198,6 +198,13 @@ int blkcg_activate_policy(struct request_queue *q, void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol); +static inline u64 blkg_rwstat_read_counter(struct blkg_rwstat *rwstat, + unsigned int idx) +{ + return atomic64_read(&rwstat->aux_cnt[idx]) + + percpu_counter_sum_positive(&rwstat->cpu_cnt[idx]); +} + const char *blkg_dev_name(struct blkcg_gq *blkg); void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 (*prfill)(struct seq_file *, -- cgit v1.2.3 From 5d0b6e48cbef3219c0ed75e0e746c4ed259303c2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:20 +0200 Subject: blk-cgroup: pass blkg_rwstat structures by reference Returning a structure generates rather bad code, so switch to passing by reference. Also don't require the structure to be zeroed and add to the 0-initialized counters, but actually set the counters to the calculated value. Acked-by: Tejun Heo Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) (limited to 'include') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 06236f56a840..3ee858111274 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -224,8 +224,8 @@ int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off); -struct blkg_rwstat blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, - struct blkcg_policy *pol, int off); +void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, + int off, struct blkg_rwstat *sum); struct blkg_conf_ctx { struct gendisk *disk; @@ -700,15 +700,14 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, * * Read the current snapshot of @rwstat and return it in the aux counts. */ -static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) +static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, + struct blkg_rwstat *result) { - struct blkg_rwstat result; int i; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_set(&result.aux_cnt[i], + atomic64_set(&result->aux_cnt[i], percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); - return result; } /** @@ -721,8 +720,9 @@ static inline struct blkg_rwstat blkg_rwstat_read(struct blkg_rwstat *rwstat) */ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { - struct blkg_rwstat tmp = blkg_rwstat_read(rwstat); + struct blkg_rwstat tmp = { }; + blkg_rwstat_read(rwstat, &tmp); return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); } -- cgit v1.2.3 From 7af6fd9112ba310a889c60d0606b4b74049cfe14 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:21 +0200 Subject: blk-cgroup: introduce a new struct blkg_rwstat_sample When sampling the blkcg counts we don't need atomics or per-cpu variables. Introduce a new structure just containing plain u64 counters. Acked-by: Tejun Heo Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 22 ++++++++++++---------- 1 file changed, 12 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 3ee858111274..e4a81767e111 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -63,8 +63,7 @@ struct blkcg { /* * blkg_[rw]stat->aux_cnt is excluded for local stats but included for - * recursive. Used to carry stats of dead children, and, for blkg_rwstat, - * to carry result values from read and sum operations. + * recursive. Used to carry stats of dead children. */ struct blkg_stat { struct percpu_counter cpu_cnt; @@ -76,6 +75,10 @@ struct blkg_rwstat { atomic64_t aux_cnt[BLKG_RWSTAT_NR]; }; +struct blkg_rwstat_sample { + u64 cnt[BLKG_RWSTAT_NR]; +}; + /* * A blkcg_gq (blkg) is association between a block cgroup (blkcg) and a * request_queue (q). This is used by blkcg policies which need to track @@ -213,7 +216,7 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, bool show_total); u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, - const struct blkg_rwstat *rwstat); + const struct blkg_rwstat_sample *rwstat); u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); @@ -225,7 +228,7 @@ int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off); void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, - int off, struct blkg_rwstat *sum); + int off, struct blkg_rwstat_sample *sum); struct blkg_conf_ctx { struct gendisk *disk; @@ -701,13 +704,13 @@ static inline void blkg_rwstat_add(struct blkg_rwstat *rwstat, * Read the current snapshot of @rwstat and return it in the aux counts. */ static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, - struct blkg_rwstat *result) + struct blkg_rwstat_sample *result) { int i; for (i = 0; i < BLKG_RWSTAT_NR; i++) - atomic64_set(&result->aux_cnt[i], - percpu_counter_sum_positive(&rwstat->cpu_cnt[i])); + result->cnt[i] = + percpu_counter_sum_positive(&rwstat->cpu_cnt[i]); } /** @@ -720,11 +723,10 @@ static inline void blkg_rwstat_read(struct blkg_rwstat *rwstat, */ static inline uint64_t blkg_rwstat_total(struct blkg_rwstat *rwstat) { - struct blkg_rwstat tmp = { }; + struct blkg_rwstat_sample tmp = { }; blkg_rwstat_read(rwstat, &tmp); - return atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_READ]) + - atomic64_read(&tmp.aux_cnt[BLKG_RWSTAT_WRITE]); + return tmp.cnt[BLKG_RWSTAT_READ] + tmp.cnt[BLKG_RWSTAT_WRITE]; } /** -- cgit v1.2.3 From c0ce79dca5b0e8373a546ebea2af7b3df94c584e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 6 Jun 2019 12:26:22 +0200 Subject: blk-cgroup: move struct blkg_stat to bfq This structure and assorted infrastructure is only used by the bfq I/O scheduler. Move it there instead of bloating the common code. Acked-by: Tejun Heo Acked-by: Paolo Valente Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 71 ---------------------------------------------- 1 file changed, 71 deletions(-) (limited to 'include') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index e4a81767e111..33f23a858438 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -65,11 +65,6 @@ struct blkcg { * blkg_[rw]stat->aux_cnt is excluded for local stats but included for * recursive. Used to carry stats of dead children. */ -struct blkg_stat { - struct percpu_counter cpu_cnt; - atomic64_t aux_cnt; -}; - struct blkg_rwstat { struct percpu_counter cpu_cnt[BLKG_RWSTAT_NR]; atomic64_t aux_cnt[BLKG_RWSTAT_NR]; @@ -217,7 +212,6 @@ void blkcg_print_blkgs(struct seq_file *sf, struct blkcg *blkcg, u64 __blkg_prfill_u64(struct seq_file *sf, struct blkg_policy_data *pd, u64 v); u64 __blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, const struct blkg_rwstat_sample *rwstat); -u64 blkg_prfill_stat(struct seq_file *sf, struct blkg_policy_data *pd, int off); u64 blkg_prfill_rwstat(struct seq_file *sf, struct blkg_policy_data *pd, int off); int blkg_print_stat_bytes(struct seq_file *sf, void *v); @@ -225,8 +219,6 @@ int blkg_print_stat_ios(struct seq_file *sf, void *v); int blkg_print_stat_bytes_recursive(struct seq_file *sf, void *v); int blkg_print_stat_ios_recursive(struct seq_file *sf, void *v); -u64 blkg_stat_recursive_sum(struct blkcg_gq *blkg, - struct blkcg_policy *pol, int off); void blkg_rwstat_recursive_sum(struct blkcg_gq *blkg, struct blkcg_policy *pol, int off, struct blkg_rwstat_sample *sum); @@ -579,69 +571,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) -{ - int ret; - - ret = percpu_counter_init(&stat->cpu_cnt, 0, gfp); - if (ret) - return ret; - - atomic64_set(&stat->aux_cnt, 0); - return 0; -} - -static inline void blkg_stat_exit(struct blkg_stat *stat) -{ - percpu_counter_destroy(&stat->cpu_cnt); -} - -/** - * blkg_stat_add - add a value to a blkg_stat - * @stat: target blkg_stat - * @val: value to add - * - * Add @val to @stat. The caller must ensure that IRQ on the same CPU - * don't re-enter this function for the same counter. - */ -static inline void blkg_stat_add(struct blkg_stat *stat, uint64_t val) -{ - percpu_counter_add_batch(&stat->cpu_cnt, val, BLKG_STAT_CPU_BATCH); -} - -/** - * blkg_stat_read - read the current value of a blkg_stat - * @stat: blkg_stat to read - */ -static inline uint64_t blkg_stat_read(struct blkg_stat *stat) -{ - return percpu_counter_sum_positive(&stat->cpu_cnt); -} - -/** - * blkg_stat_reset - reset a blkg_stat - * @stat: blkg_stat to reset - */ -static inline void blkg_stat_reset(struct blkg_stat *stat) -{ - percpu_counter_set(&stat->cpu_cnt, 0); - atomic64_set(&stat->aux_cnt, 0); -} - -/** - * blkg_stat_add_aux - add a blkg_stat into another's aux count - * @to: the destination blkg_stat - * @from: the source - * - * Add @from's count including the aux one to @to's aux count. - */ -static inline void blkg_stat_add_aux(struct blkg_stat *to, - struct blkg_stat *from) -{ - atomic64_add(blkg_stat_read(from) + atomic64_read(&from->aux_cnt), - &to->aux_cnt); -} - static inline int blkg_rwstat_init(struct blkg_rwstat *rwstat, gfp_t gfp) { int i, ret; -- cgit v1.2.3 From e47bc4eda953928644109101d07c9c95dc29a458 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 20 Jun 2019 10:59:16 -0700 Subject: block: add centralize REQ_OP_XXX to string helper In order to centralize the REQ_OP_XXX to string conversion which can be used in the block layer and different places in the kernel like f2fs, this patch adds a new helper function along with an array similar to the one present in the blk-mq-debugfs.c. We keep this helper functionality centralize under blk-core.c instead of blk-mq-debugfs.c since blk-core.c is configured using CONFIG_BLOCK and it will not be dependent on blk-mq-debugfs.c which is configured using CONFIG_BLK_DEBUG_FS. Next patch adjusts the code in the blk-mq-debugfs.c with newly introduced helper. Reviewed-by: Bart Van Assche Signed-off-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d5d3bb45dfb6..0c482371c8b3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -865,6 +865,9 @@ extern void blk_execute_rq(struct request_queue *, struct gendisk *, extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); +/* Helper to convert REQ_OP_XXX to its string format XXX */ +extern const char *blk_op_str(unsigned int op); + int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -- cgit v1.2.3 From 0ce353794b6c4dc88592b942e94b33cd1bf2ef54 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Thu, 20 Jun 2019 10:59:19 -0700 Subject: f2fs: use block layer helper for show_bio_op macro Adjust the f2fs tracing code to use newly introduced block layer function blk_op_str() which converts the REQ_OP_XXX into the string XXX. Reviewed-by: Chao Yu Signed-off-by: Chaitanya Kulkarni Signed-off-by: Jens Axboe --- include/trace/events/f2fs.h | 11 +---------- 1 file changed, 1 insertion(+), 10 deletions(-) (limited to 'include') diff --git a/include/trace/events/f2fs.h b/include/trace/events/f2fs.h index 53b96f12300c..e3dc031af7f5 100644 --- a/include/trace/events/f2fs.h +++ b/include/trace/events/f2fs.h @@ -76,16 +76,7 @@ TRACE_DEFINE_ENUM(CP_TRIMMED); #define show_bio_type(op,op_flags) show_bio_op(op), \ show_bio_op_flags(op_flags) -#define show_bio_op(op) \ - __print_symbolic(op, \ - { REQ_OP_READ, "READ" }, \ - { REQ_OP_WRITE, "WRITE" }, \ - { REQ_OP_FLUSH, "FLUSH" }, \ - { REQ_OP_DISCARD, "DISCARD" }, \ - { REQ_OP_SECURE_ERASE, "SECURE_ERASE" }, \ - { REQ_OP_ZONE_RESET, "ZONE_RESET" }, \ - { REQ_OP_WRITE_SAME, "WRITE_SAME" }, \ - { REQ_OP_WRITE_ZEROES, "WRITE_ZEROES" }) +#define show_bio_op(op) blk_op_str(op) #define show_bio_op_flags(flags) \ __print_flags(F2FS_BIO_FLAG_MASK(flags), "|", \ -- cgit v1.2.3 From 150d71f725fd2f5a0015b7fa8df0816a207d4e4b Mon Sep 17 00:00:00 2001 From: James Smart Date: Tue, 14 May 2019 14:58:03 -0700 Subject: nvmet-fc: add transport discovery change event callback support This patch adds support for the nvmet discovery_change transport op. In turn, the transport adds it's own LLDD api callback discovery_event op to request the LLDD to generate an RSCN for the discovery change. Signed-off-by: James Smart Reviewed-by: Hannes Reinecke Reviewed-by: Arun Easi Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme-fc-driver.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index c48e96436f56..98d904961b33 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -791,6 +791,11 @@ struct nvmet_fc_target_port { * nvmefc_tgt_fcp_req. * Entrypoint is Optional. * + * @discovery_event: Called by the transport to generate an RSCN + * change notifications to NVME initiators. The RSCN notifications + * should cause the initiator to rescan the discovery controller + * on the targetport. + * * @max_hw_queues: indicates the maximum number of hw queues the LLDD * supports for cpu affinitization. * Value is Mandatory. Must be at least 1. @@ -832,6 +837,7 @@ struct nvmet_fc_target_template { struct nvmefc_tgt_fcp_req *fcpreq); void (*defer_rcv)(struct nvmet_fc_target_port *tgtport, struct nvmefc_tgt_fcp_req *fcpreq); + void (*discovery_event)(struct nvmet_fc_target_port *tgtport); u32 max_hw_queues; u16 max_sgl_segments; -- cgit v1.2.3 From 7a1f46e3f75cff5042dfa1bb80c9929a0e412abc Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Thu, 6 Jun 2019 14:30:14 +0900 Subject: nvme: introduce nvme_is_fabrics to check fabrics cmd This patch introduces a nvme_is_fabrics() inline function to check whether or not the given command structure is for fabrics. Signed-off-by: Minwoo Im Reviewed-by: Sagi Grimberg Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 8028adacaff3..7080923e78d1 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1165,6 +1165,11 @@ struct nvme_command { }; }; +static inline bool nvme_is_fabrics(struct nvme_command *cmd) +{ + return cmd->common.opcode == nvme_fabrics_command; +} + struct nvme_error_slot { __le64 error_count; __le16 sqid; @@ -1186,7 +1191,7 @@ static inline bool nvme_is_write(struct nvme_command *cmd) * * Why can't we simply have a Fabrics In and Fabrics out command? */ - if (unlikely(cmd->common.opcode == nvme_fabrics_command)) + if (unlikely(nvme_is_fabrics(cmd))) return cmd->fabrics.fctype & 1; return cmd->common.opcode & 1; } -- cgit v1.2.3 From 26f2990d85838caa650744a0ded9e38988a2bd7f Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:30 +0900 Subject: nvme-trace: move opcode symbol print to nvme.h The following patches are going to provide the target-side trace which might need these kind of macros. It would be great if it can be shared between host and target side both. Signed-off-by: Minwoo Im Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 45 +++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 45 insertions(+) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 7080923e78d1..86b3d04baf20 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -562,6 +562,22 @@ enum nvme_opcode { nvme_cmd_resv_release = 0x15, }; +#define nvme_opcode_name(opcode) { opcode, #opcode } +#define show_nvm_opcode_name(val) \ + __print_symbolic(val, \ + nvme_opcode_name(nvme_cmd_flush), \ + nvme_opcode_name(nvme_cmd_write), \ + nvme_opcode_name(nvme_cmd_read), \ + nvme_opcode_name(nvme_cmd_write_uncor), \ + nvme_opcode_name(nvme_cmd_compare), \ + nvme_opcode_name(nvme_cmd_write_zeroes), \ + nvme_opcode_name(nvme_cmd_dsm), \ + nvme_opcode_name(nvme_cmd_resv_register), \ + nvme_opcode_name(nvme_cmd_resv_report), \ + nvme_opcode_name(nvme_cmd_resv_acquire), \ + nvme_opcode_name(nvme_cmd_resv_release)) + + /* * Descriptor subtype - lower 4 bits of nvme_(keyed_)sgl_desc identifier * @@ -794,6 +810,35 @@ enum nvme_admin_opcode { nvme_admin_sanitize_nvm = 0x84, }; +#define nvme_admin_opcode_name(opcode) { opcode, #opcode } +#define show_admin_opcode_name(val) \ + __print_symbolic(val, \ + nvme_admin_opcode_name(nvme_admin_delete_sq), \ + nvme_admin_opcode_name(nvme_admin_create_sq), \ + nvme_admin_opcode_name(nvme_admin_get_log_page), \ + nvme_admin_opcode_name(nvme_admin_delete_cq), \ + nvme_admin_opcode_name(nvme_admin_create_cq), \ + nvme_admin_opcode_name(nvme_admin_identify), \ + nvme_admin_opcode_name(nvme_admin_abort_cmd), \ + nvme_admin_opcode_name(nvme_admin_set_features), \ + nvme_admin_opcode_name(nvme_admin_get_features), \ + nvme_admin_opcode_name(nvme_admin_async_event), \ + nvme_admin_opcode_name(nvme_admin_ns_mgmt), \ + nvme_admin_opcode_name(nvme_admin_activate_fw), \ + nvme_admin_opcode_name(nvme_admin_download_fw), \ + nvme_admin_opcode_name(nvme_admin_ns_attach), \ + nvme_admin_opcode_name(nvme_admin_keep_alive), \ + nvme_admin_opcode_name(nvme_admin_directive_send), \ + nvme_admin_opcode_name(nvme_admin_directive_recv), \ + nvme_admin_opcode_name(nvme_admin_dbbuf), \ + nvme_admin_opcode_name(nvme_admin_format_nvm), \ + nvme_admin_opcode_name(nvme_admin_security_send), \ + nvme_admin_opcode_name(nvme_admin_security_recv), \ + nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) + +#define show_opcode_name(qid, opcode) \ + (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) + enum { NVME_QUEUE_PHYS_CONTIG = (1 << 0), NVME_CQ_IRQ_ENABLED = (1 << 1), -- cgit v1.2.3 From ad795e47cdef078bfd9e48745040d12104005aab Mon Sep 17 00:00:00 2001 From: Minwoo Im Date: Wed, 12 Jun 2019 21:45:31 +0900 Subject: nvme-trace: support for fabrics commands in host-side This patch introduces fabrics commands tracing feature from host-side. This patch does not include any changes for the previous host-side tracing, but just add fabrics commands parsing in cmd=() format. Signed-off-by: Minwoo Im [hch: fixed some whitespace damage] Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 20 +++++++++++++++++--- 1 file changed, 17 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 86b3d04baf20..d98b2d8baf4e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -836,9 +836,6 @@ enum nvme_admin_opcode { nvme_admin_opcode_name(nvme_admin_security_recv), \ nvme_admin_opcode_name(nvme_admin_sanitize_nvm)) -#define show_opcode_name(qid, opcode) \ - (qid ? show_nvm_opcode_name(opcode) : show_admin_opcode_name(opcode)) - enum { NVME_QUEUE_PHYS_CONTIG = (1 << 0), NVME_CQ_IRQ_ENABLED = (1 << 1), @@ -1053,6 +1050,23 @@ enum nvmf_capsule_command { nvme_fabrics_type_property_get = 0x04, }; +#define nvme_fabrics_type_name(type) { type, #type } +#define show_fabrics_type_name(type) \ + __print_symbolic(type, \ + nvme_fabrics_type_name(nvme_fabrics_type_property_set), \ + nvme_fabrics_type_name(nvme_fabrics_type_connect), \ + nvme_fabrics_type_name(nvme_fabrics_type_property_get)) + +/* + * If not fabrics command, fctype will be ignored. + */ +#define show_opcode_name(qid, opcode, fctype) \ + ((opcode) == nvme_fabrics_command ? \ + show_fabrics_type_name(fctype) : \ + ((qid) ? \ + show_nvm_opcode_name(opcode) : \ + show_admin_opcode_name(opcode))) + struct nvmf_common_command { __u8 opcode; __u8 resv1; -- cgit v1.2.3 From a5b47a40bed8b19e956872fb55097d676a68f59e Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Thu, 27 Jun 2019 11:59:41 +0900 Subject: block: Remove unused code bio_flush_dcache_pages() is unused. Remove it. Reviewed-by: Christoph Hellwig Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 ----------- 1 file changed, 11 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index ee11c4324751..5a8ae56e09ff 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -443,17 +443,6 @@ void generic_end_io_acct(struct request_queue *q, int op, struct hd_struct *part, unsigned long start_time); -#ifndef ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -# error "You should define ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE for your platform" -#endif -#if ARCH_IMPLEMENTS_FLUSH_DCACHE_PAGE -extern void bio_flush_dcache_pages(struct bio *bi); -#else -static inline void bio_flush_dcache_pages(struct bio *bi) -{ -} -#endif - extern void bio_copy_data_iter(struct bio *dst, struct bvec_iter *dst_iter, struct bio *src, struct bvec_iter *src_iter); extern void bio_copy_data(struct bio *dst, struct bio *src); -- cgit v1.2.3 From 5e4c7cf60ec3cad59703c203de1dfb31ea608e6e Mon Sep 17 00:00:00 2001 From: Revanth Rajashekar Date: Thu, 27 Jun 2019 16:30:02 -0600 Subject: block: sed-opal: PSID reverttper capability PSID is a 32 character password printed on the drive label, to prove its physical access. This PSID reverttper function is very useful to regain the control over the drive when it is locked and the user can no longer access it because of some failures. However, *all the data on the drive is completely erased*. This method is advisable only when the user is exhausted of all other recovery methods. PSID capabilities are described in: https://trustedcomputinggroup.org/wp-content/uploads/TCG_Storage-Opal_Feature_Set_PSID_v1.00_r1.00.pdf Signed-off-by: Revanth Rajashekar Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 1 + include/uapi/linux/sed-opal.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index 3e76b6d7d97f..f03bbffd3281 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -39,6 +39,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_ENABLE_DISABLE_MBR: case IOC_OPAL_ERASE_LR: case IOC_OPAL_SECURE_ERASE_LR: + case IOC_OPAL_PSID_REVERT_TPR: return true; } return false; diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 33e53b80cd1f..7a03e5b4df6e 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -107,5 +107,6 @@ struct opal_mbr_data { #define IOC_OPAL_ENABLE_DISABLE_MBR _IOW('p', 229, struct opal_mbr_data) #define IOC_OPAL_ERASE_LR _IOW('p', 230, struct opal_session_info) #define IOC_OPAL_SECURE_ERASE_LR _IOW('p', 231, struct opal_session_info) +#define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3 From b2d0d99135ad145667765cbd27f148c1a4cd50d1 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 15:49:20 +0200 Subject: block: move the BIO_NO_PAGE_REF check into bio_release_pages Move the BIO_NO_PAGE_REF check into bio_release_pages instead of duplicating it in both callers. Also make the function available outside of bio.c so that we can reuse it in other direct I/O implementations. Reviewed-by: Minwoo Im Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 5a8ae56e09ff..6d82b4856282 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -426,6 +426,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); +void bio_release_pages(struct bio *bio); struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct iov_iter *, gfp_t); -- cgit v1.2.3 From d241a95f3514a5eb544dfd8d9d141ffd1c89b707 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 15:49:21 +0200 Subject: block: optionally mark pages dirty in bio_release_pages A lot of callers of bio_release_pages also want to mark the released pages as dirty. Add a mark_dirty parameter to avoid a second relatively expensive bio_for_each_segment_all loop. Reviewed-by: Minwoo Im Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 6d82b4856282..2d8c73f0ecaf 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -426,7 +426,7 @@ bool __bio_try_merge_page(struct bio *bio, struct page *page, void __bio_add_page(struct bio *bio, struct page *page, unsigned int len, unsigned int off); int bio_iov_iter_get_pages(struct bio *bio, struct iov_iter *iter); -void bio_release_pages(struct bio *bio); +void bio_release_pages(struct bio *bio, bool mark_dirty); struct rq_map_data; extern struct bio *bio_map_user_iov(struct request_queue *, struct iov_iter *, gfp_t); -- cgit v1.2.3 From b620743077e291ae7d0debd21f50413a8c266229 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 26 Jun 2019 15:49:28 +0200 Subject: block: never take page references for ITER_BVEC If we pass pages through an iov_iter we always already have a reference in the caller. Thus remove the ITER_BVEC_FLAG_NO_REF and don't take reference to pages by default for bvec backed iov_iters. Reviewed-by: Minwoo Im Reviewed-by: Johannes Thumshirn Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/uio.h | 10 +--------- 1 file changed, 1 insertion(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/uio.h b/include/linux/uio.h index 2c90a0842ee8..cea1761c5672 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -19,9 +19,6 @@ struct kvec { }; enum iter_type { - /* set if ITER_BVEC doesn't hold a bv_page ref */ - ITER_BVEC_FLAG_NO_REF = 2, - /* iter types */ ITER_IOVEC = 4, ITER_KVEC = 8, @@ -56,7 +53,7 @@ struct iov_iter { static inline enum iter_type iov_iter_type(const struct iov_iter *i) { - return i->type & ~(READ | WRITE | ITER_BVEC_FLAG_NO_REF); + return i->type & ~(READ | WRITE); } static inline bool iter_is_iovec(const struct iov_iter *i) @@ -89,11 +86,6 @@ static inline unsigned char iov_iter_rw(const struct iov_iter *i) return i->type & (READ | WRITE); } -static inline bool iov_iter_bvec_no_ref(const struct iov_iter *i) -{ - return (i->type & ITER_BVEC_FLAG_NO_REF) != 0; -} - /* * Total number of bytes covered by an iovec. * -- cgit v1.2.3 From c9888443413e4e06013e482fc484dbb9c559c145 Mon Sep 17 00:00:00 2001 From: Jonas Rabenstein Date: Tue, 21 May 2019 22:46:44 +0200 Subject: block: sed-opal: add ioctl for done-mark of shadow mbr Enable users to mark the shadow mbr as done without completely deactivating the shadow mbr feature. This may be useful on reboots, when the power to the disk is not disconnected in between and the shadow mbr stores the required boot files. Of course, this saves also the (few) commands required to enable the feature if it is already enabled and one only wants to mark the shadow mbr as done. Co-authored-by: David Kozub Signed-off-by: Jonas Rabenstein Signed-off-by: David Kozub Reviewed-by: Christoph Hellwig Reviewed by: Scott Bauer Reviewed-by: Jon Derrick Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 1 + include/uapi/linux/sed-opal.h | 12 ++++++++++++ 2 files changed, 13 insertions(+) (limited to 'include') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index f03bbffd3281..f834e8a1495f 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -40,6 +40,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_ERASE_LR: case IOC_OPAL_SECURE_ERASE_LR: case IOC_OPAL_PSID_REVERT_TPR: + case IOC_OPAL_MBR_DONE: return true; } return false; diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 7a03e5b4df6e..5681f55d334b 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -20,6 +20,11 @@ enum opal_mbr { OPAL_MBR_DISABLE = 0x01, }; +enum opal_mbr_done_flag { + OPAL_MBR_NOT_DONE = 0x0, + OPAL_MBR_DONE = 0x01 +}; + enum opal_user { OPAL_ADMIN1 = 0x0, OPAL_USER1 = 0x01, @@ -95,6 +100,12 @@ struct opal_mbr_data { __u8 __align[7]; }; +struct opal_mbr_done { + struct opal_key key; + __u8 done_flag; + __u8 __align[7]; +}; + #define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock) #define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock) #define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key) @@ -108,5 +119,6 @@ struct opal_mbr_data { #define IOC_OPAL_ERASE_LR _IOW('p', 230, struct opal_session_info) #define IOC_OPAL_SECURE_ERASE_LR _IOW('p', 231, struct opal_session_info) #define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key) +#define IOC_OPAL_MBR_DONE _IOW('p', 233, struct opal_mbr_done) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3 From a9b25b4cf2b76d320afc999f881ccb805fecdd84 Mon Sep 17 00:00:00 2001 From: Jonas Rabenstein Date: Tue, 21 May 2019 22:46:45 +0200 Subject: block: sed-opal: ioctl for writing to shadow mbr Allow modification of the shadow mbr. If the shadow mbr is not marked as done, this data will be presented read only as the device content. Only after marking the shadow mbr as done and unlocking a locking range the actual content is accessible. Co-authored-by: David Kozub Signed-off-by: Jonas Rabenstein Signed-off-by: David Kozub Reviewed-by: Scott Bauer Reviewed-by: Jon Derrick Signed-off-by: Jens Axboe --- include/linux/sed-opal.h | 1 + include/uapi/linux/sed-opal.h | 8 ++++++++ 2 files changed, 9 insertions(+) (limited to 'include') diff --git a/include/linux/sed-opal.h b/include/linux/sed-opal.h index f834e8a1495f..53c28d750a45 100644 --- a/include/linux/sed-opal.h +++ b/include/linux/sed-opal.h @@ -41,6 +41,7 @@ static inline bool is_sed_ioctl(unsigned int cmd) case IOC_OPAL_SECURE_ERASE_LR: case IOC_OPAL_PSID_REVERT_TPR: case IOC_OPAL_MBR_DONE: + case IOC_OPAL_WRITE_SHADOW_MBR: return true; } return false; diff --git a/include/uapi/linux/sed-opal.h b/include/uapi/linux/sed-opal.h index 5681f55d334b..c6d035fa1b6c 100644 --- a/include/uapi/linux/sed-opal.h +++ b/include/uapi/linux/sed-opal.h @@ -106,6 +106,13 @@ struct opal_mbr_done { __u8 __align[7]; }; +struct opal_shadow_mbr { + struct opal_key key; + const __u64 data; + __u64 offset; + __u64 size; +}; + #define IOC_OPAL_SAVE _IOW('p', 220, struct opal_lock_unlock) #define IOC_OPAL_LOCK_UNLOCK _IOW('p', 221, struct opal_lock_unlock) #define IOC_OPAL_TAKE_OWNERSHIP _IOW('p', 222, struct opal_key) @@ -120,5 +127,6 @@ struct opal_mbr_done { #define IOC_OPAL_SECURE_ERASE_LR _IOW('p', 231, struct opal_session_info) #define IOC_OPAL_PSID_REVERT_TPR _IOW('p', 232, struct opal_key) #define IOC_OPAL_MBR_DONE _IOW('p', 233, struct opal_mbr_done) +#define IOC_OPAL_WRITE_SHADOW_MBR _IOW('p', 234, struct opal_shadow_mbr) #endif /* _UAPI_SED_OPAL_H */ -- cgit v1.2.3 From 79d08f89bb1b5c2c1ff90d9bb95497ab9e8aa7e0 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 1 Jul 2019 15:14:46 +0800 Subject: block: fix .bi_size overflow 'bio->bi_iter.bi_size' is 'unsigned int', which at most hold 4G - 1 bytes. Before 07173c3ec276 ("block: enable multipage bvecs"), one bio can include very limited pages, and usually at most 256, so the fs bio size won't be bigger than 1M bytes most of times. Since we support multi-page bvec, in theory one fs bio really can be added > 1M pages, especially in case of hugepage, or big writeback with too many dirty pages. Then there is chance in which .bi_size is overflowed. Fixes this issue by using bio_full() to check if the added segment may overflow .bi_size. Cc: Liu Yiding Cc: kernel test robot Cc: "Darrick J. Wong" Cc: linux-xfs@vger.kernel.org Cc: linux-fsdevel@vger.kernel.org Cc: stable@vger.kernel.org Fixes: 07173c3ec276 ("block: enable multipage bvecs") Reviewed-by: Christoph Hellwig Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/bio.h | 18 ++++++++++++++++-- 1 file changed, 16 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index dc630b05e6e5..3cdb84cdc488 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -102,9 +102,23 @@ static inline void *bio_data(struct bio *bio) return NULL; } -static inline bool bio_full(struct bio *bio) +/** + * bio_full - check if the bio is full + * @bio: bio to check + * @len: length of one segment to be added + * + * Return true if @bio is full and one segment with @len bytes can't be + * added to the bio, otherwise return false + */ +static inline bool bio_full(struct bio *bio, unsigned len) { - return bio->bi_vcnt >= bio->bi_max_vecs; + if (bio->bi_vcnt >= bio->bi_max_vecs) + return true; + + if (bio->bi_iter.bi_size > UINT_MAX - len) + return true; + + return false; } static inline bool bio_next_segment(const struct bio *bio, -- cgit v1.2.3