From 600335205b8d162891b5ef2e32343f5b8020efd8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Oct 2018 09:53:52 -0600 Subject: ide: convert to blk-mq ide-disk and ide-cd tested as working just fine, ide-tape and ide-floppy haven't. But the latter don't require changes, so they should work without issue. Add helper function to insert a request from a work queue, since we cannot invoke the blk-mq request insertion from IRQ context. Cc: David Miller Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/ide.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index c74b0321922a..079f8bc0b0f4 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -10,7 +10,7 @@ #include #include #include -#include +#include #include #include #include @@ -529,6 +529,10 @@ struct ide_drive_s { struct request_queue *queue; /* request queue */ + int (*prep_rq)(struct ide_drive_s *, struct request *); + + struct blk_mq_tag_set tag_set; + struct request *rq; /* current request */ void *driver_data; /* extra driver data */ u16 *id; /* identification info */ @@ -612,6 +616,10 @@ struct ide_drive_s { bool sense_rq_armed; struct request *sense_rq; struct request_sense sense_data; + + /* async sense insertion */ + struct work_struct rq_work; + struct list_head rq_list; }; typedef struct ide_drive_s ide_drive_t; @@ -1089,6 +1097,7 @@ extern int ide_pci_clk; int ide_end_rq(ide_drive_t *, struct request *, blk_status_t, unsigned int); void ide_kill_rq(ide_drive_t *, struct request *); +void ide_insert_request_head(ide_drive_t *, struct request *); void __ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); void ide_set_handler(ide_drive_t *, ide_handler_t *, unsigned int); @@ -1208,7 +1217,7 @@ extern void ide_stall_queue(ide_drive_t *drive, unsigned long timeout); extern void ide_timer_expiry(struct timer_list *t); extern irqreturn_t ide_intr(int irq, void *dev_id); -extern void do_ide_request(struct request_queue *); +extern blk_status_t ide_queue_rq(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); extern void ide_requeue_and_plug(ide_drive_t *drive, struct request *rq); void ide_init_disk(struct gendisk *, ide_drive_t *); -- cgit v1.2.3 From 9ba20527f4d1430b5f3e5f566be5af3e156a3284 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 10:15:10 -0600 Subject: blk-mq: provide mq_ops->busy() hook We'll hook into this from blk_lld_busy(), allowing blk-mq to also return whether or not a given queue currently has requests in progress. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 2286dc12c6bc..5c8418ebbfd6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -114,6 +114,7 @@ typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, typedef void (busy_tag_iter_fn)(struct request *, void *, bool); typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (map_queues_fn)(struct blk_mq_tag_set *set); +typedef bool (busy_fn)(struct request_queue *); struct blk_mq_ops { @@ -165,6 +166,11 @@ struct blk_mq_ops { /* Called from inside blk_get_request() */ void (*initialize_rq_fn)(struct request *rq); + /* + * If set, returns whether or not this queue currently is busy + */ + busy_fn *busy; + map_queues_fn *map_queues; #ifdef CONFIG_BLK_DEBUG_FS -- cgit v1.2.3 From c6f2882691e8fd128083abdcc3c5aa5b410c2367 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 10:22:19 -0600 Subject: block: remove q->lld_busy_fn() Nobody is using the legacy path for blk_lld_busy() anymore, remove it. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 4293dc1cd160..e867733b761d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -320,7 +320,6 @@ typedef void (unprep_rq_fn) (struct request_queue *, struct request *); struct bio_vec; typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -typedef int (lld_busy_fn) (struct request_queue *q); typedef int (bsg_job_fn) (struct bsg_job *); typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); typedef void (exit_rq_fn)(struct request_queue *, struct request *); @@ -466,7 +465,6 @@ struct request_queue { softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; - lld_busy_fn *lld_busy_fn; /* Called just after a request is allocated */ init_rq_fn *init_rq_fn; /* Called just before a request is freed */ @@ -1255,7 +1253,6 @@ extern void blk_queue_update_dma_pad(struct request_queue *, unsigned int); extern int blk_queue_dma_drain(struct request_queue *q, dma_drain_needed_fn *dma_drain_needed, void *buf, unsigned int size); -extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); -- cgit v1.2.3 From aae3b069d5ce865ca5ef2902c2a22cef7ab4f3a2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Oct 2018 11:26:25 -0600 Subject: bsg: pass in desired timeout handler This will ease in the conversion to blk-mq, where we can't set a timeout handler after queue init. Cc: Johannes Thumshirn Cc: linux-scsi@vger.kernel.org Reviewed-by: Hannes Reinecke Tested-by: Benjamin Block Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/bsg-lib.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 6aeaf6472665..b13ae143e7ef 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -72,7 +72,7 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, int dd_job_size); + bsg_job_fn *job_fn, rq_timed_out_fn *timeout, int dd_job_size); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); -- cgit v1.2.3 From 5e28b8d8a1b03ce86f33d38a64a4983d2b5c7679 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 26 Oct 2018 11:27:02 -0600 Subject: bsg: provide bsg_remove_queue() helper All drivers do unregister + cleanup, provide a helper for that. Cc: linux-scsi@vger.kernel.org Reviewed-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Reviewed-by: Christoph Hellwig Tested-by: Benjamin Block Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/bsg-lib.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index b13ae143e7ef..9c9b134b1fa5 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -73,6 +73,7 @@ void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); struct request_queue *bsg_setup_queue(struct device *dev, const char *name, bsg_job_fn *job_fn, rq_timed_out_fn *timeout, int dd_job_size); +void bsg_remove_queue(struct request_queue *q); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); -- cgit v1.2.3 From 771a93c489bf486b957c7399f89ee06d43ba2d93 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 22 Oct 2018 05:12:32 -0600 Subject: block: remove blk_complete_request() It's now unused. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e867733b761d..6baea6563364 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1203,7 +1203,6 @@ extern bool __blk_end_request(struct request *rq, blk_status_t error, extern void __blk_end_request_all(struct request *rq, blk_status_t error); extern bool __blk_end_request_cur(struct request *rq, blk_status_t error); -extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); extern void blk_unprep_request(struct request *); -- cgit v1.2.3 From 7ca01926463a15f5d2681458643b2453930b873a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Oct 2018 03:39:36 -0600 Subject: block: remove legacy rq tagging It's now unused, kill it. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 35 ----------------------------------- 1 file changed, 35 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6baea6563364..8afe3331777e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -85,8 +85,6 @@ typedef __u32 __bitwise req_flags_t; #define RQF_SORTED ((__force req_flags_t)(1 << 0)) /* drive already may have started this one */ #define RQF_STARTED ((__force req_flags_t)(1 << 1)) -/* uses tagged queueing */ -#define RQF_QUEUED ((__force req_flags_t)(1 << 2)) /* may not be passed by ioscheduler */ #define RQF_SOFTBARRIER ((__force req_flags_t)(1 << 3)) /* request for flush sequence */ @@ -336,15 +334,6 @@ enum blk_queue_state { Queue_up, }; -struct blk_queue_tag { - struct request **tag_index; /* map of busy tags */ - unsigned long *tag_map; /* bit map of free/busy tags */ - int max_depth; /* what we will send to device */ - int real_max_depth; /* what the array can hold */ - atomic_t refcnt; /* map can be shared */ - int alloc_policy; /* tag allocation policy */ - int next_tag; /* next tag */ -}; #define BLK_TAG_ALLOC_FIFO 0 /* allocate starting from 0 */ #define BLK_TAG_ALLOC_RR 1 /* allocate starting from last allocated tag */ @@ -568,8 +557,6 @@ struct request_queue { unsigned int dma_pad_mask; unsigned int dma_alignment; - struct blk_queue_tag *queue_tags; - unsigned int nr_sorted; unsigned int in_flight[2]; @@ -680,7 +667,6 @@ struct request_queue { u64 write_hints[BLK_MAX_WRITE_HINTS]; }; -#define QUEUE_FLAG_QUEUED 0 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ #define QUEUE_FLAG_DYING 2 /* queue being torn down */ #define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */ @@ -724,7 +710,6 @@ void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); -#define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) @@ -1359,26 +1344,6 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) !list_empty(&plug->cb_list)); } -/* - * tag stuff - */ -extern int blk_queue_start_tag(struct request_queue *, struct request *); -extern struct request *blk_queue_find_tag(struct request_queue *, int); -extern void blk_queue_end_tag(struct request_queue *, struct request *); -extern int blk_queue_init_tags(struct request_queue *, int, struct blk_queue_tag *, int); -extern void blk_queue_free_tags(struct request_queue *); -extern int blk_queue_resize_tags(struct request_queue *, int); -extern struct blk_queue_tag *blk_init_tags(int, int); -extern void blk_free_tags(struct blk_queue_tag *); - -static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, - int tag) -{ - if (unlikely(bqt == NULL || tag >= bqt->real_max_depth)) - return NULL; - return bqt->tag_index[tag]; -} - extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_write_same(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, struct page *page); -- cgit v1.2.3 From a1ce35fa49852db60fc6e268038530be533c5b15 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 10:23:51 -0600 Subject: block: remove dead elevator code This removes a bunch of core and elevator related code. On the core front, we remove anything related to queue running, draining, initialization, plugging, and congestions. We also kill anything related to request allocation, merging, retrieval, and completion. Remove any checking for single queue IO schedulers, as they no longer exist. This means we can also delete a bunch of code related to request issue, adding, completion, etc - and all the SQ related ops and helpers. Also kill the load_default_modules(), as all that did was provide for a way to load the default single queue elevator. Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 93 ++---------------------------------------------- include/linux/elevator.h | 90 +--------------------------------------------- include/linux/init.h | 1 - 3 files changed, 3 insertions(+), 181 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8afe3331777e..a9f6db8abcda 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -58,9 +58,6 @@ struct blk_stat_callback; typedef void (rq_end_io_fn)(struct request *, blk_status_t); -#define BLK_RL_SYNCFULL (1U << 0) -#define BLK_RL_ASYNCFULL (1U << 1) - struct request_list { struct request_queue *q; /* the queue this rl belongs to */ #ifdef CONFIG_BLK_CGROUP @@ -309,11 +306,8 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; -typedef void (request_fn_proc) (struct request_queue *q); typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); -typedef int (prep_rq_fn) (struct request_queue *, struct request *); -typedef void (unprep_rq_fn) (struct request_queue *, struct request *); struct bio_vec; typedef void (softirq_done_fn)(struct request *); @@ -432,8 +426,6 @@ struct request_queue { struct list_head queue_head; struct request *last_merge; struct elevator_queue *elevator; - int nr_rqs[2]; /* # allocated [a]sync rqs */ - int nr_rqs_elvpriv; /* # allocated rqs w/ elvpriv */ struct blk_queue_stats *stats; struct rq_qos *rq_qos; @@ -446,11 +438,8 @@ struct request_queue { */ struct request_list root_rl; - request_fn_proc *request_fn; make_request_fn *make_request_fn; poll_q_fn *poll_fn; - prep_rq_fn *prep_rq_fn; - unprep_rq_fn *unprep_rq_fn; softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; @@ -458,8 +447,6 @@ struct request_queue { init_rq_fn *init_rq_fn; /* Called just before a request is freed */ exit_rq_fn *exit_rq_fn; - /* Called from inside blk_get_request() */ - void (*initialize_rq_fn)(struct request *rq); const struct blk_mq_ops *mq_ops; @@ -475,17 +462,6 @@ struct request_queue { struct blk_mq_hw_ctx **queue_hw_ctx; unsigned int nr_hw_queues; - /* - * Dispatch queue sorting - */ - sector_t end_sector; - struct request *boundary_rq; - - /* - * Delayed queue handling - */ - struct delayed_work delay_work; - struct backing_dev_info *backing_dev_info; /* @@ -548,9 +524,6 @@ struct request_queue { * queue settings */ unsigned long nr_requests; /* Max # of requests */ - unsigned int nr_congestion_on; - unsigned int nr_congestion_off; - unsigned int nr_batching; unsigned int dma_drain_size; void *dma_drain_buffer; @@ -560,13 +533,6 @@ struct request_queue { unsigned int nr_sorted; unsigned int in_flight[2]; - /* - * Number of active block driver functions for which blk_drain_queue() - * must wait. Must be incremented around functions that unlock the - * queue_lock internally, e.g. scsi_request_fn(). - */ - unsigned int request_fn_active; - unsigned int rq_timeout; int poll_nsec; @@ -740,11 +706,6 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); extern void blk_set_pm_only(struct request_queue *q); extern void blk_clear_pm_only(struct request_queue *q); -static inline int queue_in_flight(struct request_queue *q) -{ - return q->in_flight[0] + q->in_flight[1]; -} - static inline bool blk_account_rq(struct request *rq) { return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); @@ -765,7 +726,7 @@ static inline bool blk_account_rq(struct request *rq) */ static inline bool queue_is_rq_based(struct request_queue *q) { - return q->request_fn || q->mq_ops; + return q->mq_ops; } static inline unsigned int blk_queue_cluster(struct request_queue *q) @@ -828,27 +789,6 @@ static inline bool rq_is_sync(struct request *rq) return op_is_sync(rq->cmd_flags); } -static inline bool blk_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - return rl->flags & flag; -} - -static inline void blk_set_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - rl->flags |= flag; -} - -static inline void blk_clear_rl_full(struct request_list *rl, bool sync) -{ - unsigned int flag = sync ? BLK_RL_SYNCFULL : BLK_RL_ASYNCFULL; - - rl->flags &= ~flag; -} - static inline bool rq_mergeable(struct request *rq) { if (blk_rq_is_passthrough(rq)) @@ -969,7 +909,6 @@ extern void blk_put_request(struct request *); extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, blk_mq_req_flags_t flags); -extern void blk_requeue_request(struct request_queue *, struct request *); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, struct bio_set *bs, gfp_t gfp_mask, @@ -979,7 +918,6 @@ extern void blk_rq_unprep_clone(struct request *rq); extern blk_status_t blk_insert_cloned_request(struct request_queue *q, struct request *rq); extern int blk_rq_append_bio(struct request *rq, struct bio **bio); -extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_queue_split(struct request_queue *, struct bio **); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_verify_blk_ioctl(struct block_device *, unsigned int); @@ -992,15 +930,7 @@ extern int sg_scsi_ioctl(struct request_queue *, struct gendisk *, fmode_t, extern int blk_queue_enter(struct request_queue *q, blk_mq_req_flags_t flags); extern void blk_queue_exit(struct request_queue *q); -extern void blk_start_queue(struct request_queue *q); -extern void blk_start_queue_async(struct request_queue *q); -extern void blk_stop_queue(struct request_queue *q); extern void blk_sync_queue(struct request_queue *q); -extern void __blk_stop_queue(struct request_queue *q); -extern void __blk_run_queue(struct request_queue *q); -extern void __blk_run_queue_uncond(struct request_queue *q); -extern void blk_run_queue(struct request_queue *); -extern void blk_run_queue_async(struct request_queue *q); extern int blk_rq_map_user(struct request_queue *, struct request *, struct rq_map_data *, void __user *, unsigned long, gfp_t); @@ -1155,13 +1085,6 @@ static inline unsigned int blk_rq_count_bios(struct request *rq) return nr_bios; } -/* - * Request issue related functions. - */ -extern struct request *blk_peek_request(struct request_queue *q); -extern void blk_start_request(struct request *rq); -extern struct request *blk_fetch_request(struct request_queue *q); - void blk_steal_bios(struct bio_list *list, struct request *rq); /* @@ -1179,9 +1102,6 @@ void blk_steal_bios(struct bio_list *list, struct request *rq); */ extern bool blk_update_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); -extern void blk_finish_request(struct request *rq, blk_status_t error); -extern bool blk_end_request(struct request *rq, blk_status_t error, - unsigned int nr_bytes); extern void blk_end_request_all(struct request *rq, blk_status_t error); extern bool __blk_end_request(struct request *rq, blk_status_t error, unsigned int nr_bytes); @@ -1190,15 +1110,10 @@ extern bool __blk_end_request_cur(struct request *rq, blk_status_t error); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); -extern void blk_unprep_request(struct request *); /* * Access functions for manipulating queue properties */ -extern struct request_queue *blk_init_queue_node(request_fn_proc *rfn, - spinlock_t *lock, int node_id); -extern struct request_queue *blk_init_queue(request_fn_proc *, spinlock_t *); -extern int blk_init_allocated_queue(struct request_queue *); extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); @@ -1239,8 +1154,6 @@ extern int blk_queue_dma_drain(struct request_queue *q, void *buf, unsigned int size); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); -extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); -extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); @@ -1298,7 +1211,6 @@ extern void blk_set_queue_dying(struct request_queue *); * schedule() where blk_schedule_flush_plug() is called. */ struct blk_plug { - struct list_head list; /* requests */ struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ }; @@ -1339,8 +1251,7 @@ static inline bool blk_needs_flush_plug(struct task_struct *tsk) struct blk_plug *plug = tsk->plug; return plug && - (!list_empty(&plug->list) || - !list_empty(&plug->mq_list) || + (!list_empty(&plug->mq_list) || !list_empty(&plug->cb_list)); } diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 015bb59c0331..158004f1754d 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -23,74 +23,6 @@ enum elv_merge { ELEVATOR_DISCARD_MERGE = 3, }; -typedef enum elv_merge (elevator_merge_fn) (struct request_queue *, struct request **, - struct bio *); - -typedef void (elevator_merge_req_fn) (struct request_queue *, struct request *, struct request *); - -typedef void (elevator_merged_fn) (struct request_queue *, struct request *, enum elv_merge); - -typedef int (elevator_allow_bio_merge_fn) (struct request_queue *, - struct request *, struct bio *); - -typedef int (elevator_allow_rq_merge_fn) (struct request_queue *, - struct request *, struct request *); - -typedef void (elevator_bio_merged_fn) (struct request_queue *, - struct request *, struct bio *); - -typedef int (elevator_dispatch_fn) (struct request_queue *, int); - -typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); -typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); -typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); -typedef int (elevator_may_queue_fn) (struct request_queue *, unsigned int); - -typedef void (elevator_init_icq_fn) (struct io_cq *); -typedef void (elevator_exit_icq_fn) (struct io_cq *); -typedef int (elevator_set_req_fn) (struct request_queue *, struct request *, - struct bio *, gfp_t); -typedef void (elevator_put_req_fn) (struct request *); -typedef void (elevator_activate_req_fn) (struct request_queue *, struct request *); -typedef void (elevator_deactivate_req_fn) (struct request_queue *, struct request *); - -typedef int (elevator_init_fn) (struct request_queue *, - struct elevator_type *e); -typedef void (elevator_exit_fn) (struct elevator_queue *); -typedef void (elevator_registered_fn) (struct request_queue *); - -struct elevator_ops -{ - elevator_merge_fn *elevator_merge_fn; - elevator_merged_fn *elevator_merged_fn; - elevator_merge_req_fn *elevator_merge_req_fn; - elevator_allow_bio_merge_fn *elevator_allow_bio_merge_fn; - elevator_allow_rq_merge_fn *elevator_allow_rq_merge_fn; - elevator_bio_merged_fn *elevator_bio_merged_fn; - - elevator_dispatch_fn *elevator_dispatch_fn; - elevator_add_req_fn *elevator_add_req_fn; - elevator_activate_req_fn *elevator_activate_req_fn; - elevator_deactivate_req_fn *elevator_deactivate_req_fn; - - elevator_completed_req_fn *elevator_completed_req_fn; - - elevator_request_list_fn *elevator_former_req_fn; - elevator_request_list_fn *elevator_latter_req_fn; - - elevator_init_icq_fn *elevator_init_icq_fn; /* see iocontext.h */ - elevator_exit_icq_fn *elevator_exit_icq_fn; /* ditto */ - - elevator_set_req_fn *elevator_set_req_fn; - elevator_put_req_fn *elevator_put_req_fn; - - elevator_may_queue_fn *elevator_may_queue_fn; - - elevator_init_fn *elevator_init_fn; - elevator_exit_fn *elevator_exit_fn; - elevator_registered_fn *elevator_registered_fn; -}; - struct blk_mq_alloc_data; struct blk_mq_hw_ctx; @@ -138,16 +70,15 @@ struct elevator_type /* fields provided by elevator implementation */ union { - struct elevator_ops sq; struct elevator_mq_ops mq; } ops; + size_t icq_size; /* see iocontext.h */ size_t icq_align; /* ditto */ struct elv_fs_entry *elevator_attrs; char elevator_name[ELV_NAME_MAX]; const char *elevator_alias; struct module *elevator_owner; - bool uses_mq; #ifdef CONFIG_BLK_DEBUG_FS const struct blk_mq_debugfs_attr *queue_debugfs_attrs; const struct blk_mq_debugfs_attr *hctx_debugfs_attrs; @@ -175,40 +106,25 @@ struct elevator_queue struct kobject kobj; struct mutex sysfs_lock; unsigned int registered:1; - unsigned int uses_mq:1; DECLARE_HASHTABLE(hash, ELV_HASH_BITS); }; /* * block elevator interface */ -extern void elv_dispatch_sort(struct request_queue *, struct request *); -extern void elv_dispatch_add_tail(struct request_queue *, struct request *); -extern void elv_add_request(struct request_queue *, struct request *, int); -extern void __elv_add_request(struct request_queue *, struct request *, int); extern enum elv_merge elv_merge(struct request_queue *, struct request **, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, enum elv_merge); -extern void elv_bio_merged(struct request_queue *q, struct request *, - struct bio *); extern bool elv_attempt_insert_merge(struct request_queue *, struct request *); -extern void elv_requeue_request(struct request_queue *, struct request *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); -extern int elv_may_queue(struct request_queue *, unsigned int); -extern void elv_completed_request(struct request_queue *, struct request *); -extern int elv_set_request(struct request_queue *q, struct request *rq, - struct bio *bio, gfp_t gfp_mask); -extern void elv_put_request(struct request_queue *, struct request *); -extern void elv_drain_elevator(struct request_queue *); /* * io scheduler registration */ -extern void __init load_default_elevator_module(void); extern int elv_register(struct elevator_type *); extern void elv_unregister(struct elevator_type *); @@ -260,9 +176,5 @@ enum { #define rq_entry_fifo(ptr) list_entry((ptr), struct request, queuelist) #define rq_fifo_clear(rq) list_del_init(&(rq)->queuelist) -#else /* CONFIG_BLOCK */ - -static inline void load_default_elevator_module(void) { } - #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/init.h b/include/linux/init.h index 9c2aba1dbabf..5255069f5a9f 100644 --- a/include/linux/init.h +++ b/include/linux/init.h @@ -146,7 +146,6 @@ extern unsigned int reset_devices; /* used by init/main.c */ void setup_arch(char **); void prepare_namespace(void); -void __init load_default_modules(void); int __init init_rootfs(void); #if defined(CONFIG_STRICT_KERNEL_RWX) || defined(CONFIG_STRICT_MODULE_RWX) -- cgit v1.2.3 From f9cd4bfe96955e7a1d3ec54b393dee87b815ba3b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 1 Nov 2018 16:41:41 -0600 Subject: block: get rid of MQ scheduler ops union This is a remnant of when we had ops for both SQ and MQ schedulers. Now it's just MQ, so get rid of the union. Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/elevator.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 158004f1754d..2e9e2763bf47 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -69,9 +69,7 @@ struct elevator_type struct kmem_cache *icq_cache; /* fields provided by elevator implementation */ - union { - struct elevator_mq_ops mq; - } ops; + struct elevator_mq_ops ops; size_t icq_size; /* see iocontext.h */ size_t icq_align; /* ditto */ -- cgit v1.2.3 From 92bc5a24844ada9b010f03c49a493e3edeadaa54 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Oct 2018 13:52:28 -0600 Subject: block: remove __blk_put_request() Now there's no difference between blk_put_request() and __blk_put_request() anymore, get rid of the underscore version and convert the few callers. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a9f6db8abcda..c502a7f40e84 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -906,7 +906,6 @@ extern blk_qc_t direct_make_request(struct bio *bio); extern void blk_rq_init(struct request_queue *q, struct request *rq); extern void blk_init_request_from_bio(struct request *req, struct bio *bio); extern void blk_put_request(struct request *); -extern void __blk_put_request(struct request_queue *, struct request *); extern struct request *blk_get_request(struct request_queue *, unsigned int op, blk_mq_req_flags_t flags); extern int blk_lld_busy(struct request_queue *q); -- cgit v1.2.3 From 4316b79e4321d4140164e42f228778e5bc66c84f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 10:25:07 -0600 Subject: block: kill legacy parts of timeout handling The only user of legacy timing now is BSG, which is invoked from the mq timeout handler. Kill the legacy code, and rename the q->rq_timed_out_fn to q->bsg_job_timeout_fn. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c502a7f40e84..0364fc53f5c8 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -441,7 +441,6 @@ struct request_queue { make_request_fn *make_request_fn; poll_q_fn *poll_fn; softirq_done_fn *softirq_done_fn; - rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; /* Called just after a request is allocated */ init_rq_fn *init_rq_fn; @@ -541,7 +540,6 @@ struct request_queue { struct timer_list timeout; struct work_struct timeout_work; - struct list_head timeout_list; struct list_head icq_list; #ifdef CONFIG_BLK_CGROUP @@ -601,6 +599,7 @@ struct request_queue { #if defined(CONFIG_BLK_DEV_BSG) bsg_job_fn *bsg_job_fn; + rq_timed_out_fn *bsg_job_timeout_fn; struct bsg_class_device bsg_dev; #endif @@ -1156,7 +1155,6 @@ extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); -extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); -- cgit v1.2.3 From 1028e4b335665290dc563d5272f3c6b84e7fd66e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 09:47:17 -0600 Subject: bsg: move bsg-lib parts outside of request queue Get rid of the special bsg job fn and timeout handler, move them into a private bsg_set instead. Mostly from Christoph, with fixes for error handling and cleanups. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ----- include/linux/bsg-lib.h | 5 ++++- 2 files changed, 4 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0364fc53f5c8..877a3d235c45 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -312,7 +312,6 @@ typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); struct bio_vec; typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -typedef int (bsg_job_fn) (struct bsg_job *); typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); typedef void (exit_rq_fn)(struct request_queue *, struct request *); @@ -321,8 +320,6 @@ enum blk_eh_timer_return { BLK_EH_RESET_TIMER, /* reset timer and try again */ }; -typedef enum blk_eh_timer_return (rq_timed_out_fn)(struct request *); - enum blk_queue_state { Queue_down, Queue_up, @@ -598,8 +595,6 @@ struct request_queue { atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) - bsg_job_fn *bsg_job_fn; - rq_timed_out_fn *bsg_job_timeout_fn; struct bsg_class_device bsg_dev; #endif diff --git a/include/linux/bsg-lib.h b/include/linux/bsg-lib.h index 9c9b134b1fa5..b356e0006731 100644 --- a/include/linux/bsg-lib.h +++ b/include/linux/bsg-lib.h @@ -31,6 +31,9 @@ struct device; struct scatterlist; struct request_queue; +typedef int (bsg_job_fn) (struct bsg_job *); +typedef enum blk_eh_timer_return (bsg_timeout_fn)(struct request *); + struct bsg_buffer { unsigned int payload_len; int sg_cnt; @@ -72,7 +75,7 @@ struct bsg_job { void bsg_job_done(struct bsg_job *job, int result, unsigned int reply_payload_rcv_len); struct request_queue *bsg_setup_queue(struct device *dev, const char *name, - bsg_job_fn *job_fn, rq_timed_out_fn *timeout, int dd_job_size); + bsg_job_fn *job_fn, bsg_timeout_fn *timeout, int dd_job_size); void bsg_remove_queue(struct request_queue *q); void bsg_job_put(struct bsg_job *job); int __must_check bsg_job_get(struct bsg_job *job); -- cgit v1.2.3 From db6d995235606191fa9db0c717e9d843200b71ea Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 2 Nov 2018 08:46:15 -0600 Subject: block: remove request_list code It's now dead code, nobody uses it. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 97 ---------------------------------------------- include/linux/blkdev.h | 34 ---------------- 2 files changed, 131 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 6d766a19f2bb..1b299e025e83 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -122,9 +122,6 @@ struct blkcg_gq { /* all non-root blkcg_gq's are guaranteed to have access to parent */ struct blkcg_gq *parent; - /* request allocation list for this blkcg-q pair */ - struct request_list rl; - /* reference count */ atomic_t refcnt; @@ -515,94 +512,6 @@ static inline void blkg_put(struct blkcg_gq *blkg) if (((d_blkg) = __blkg_lookup(css_to_blkcg(pos_css), \ (p_blkg)->q, false))) -/** - * blk_get_rl - get request_list to use - * @q: request_queue of interest - * @bio: bio which will be attached to the allocated request (may be %NULL) - * - * The caller wants to allocate a request from @q to use for @bio. Find - * the request_list to use and obtain a reference on it. Should be called - * under queue_lock. This function is guaranteed to return non-%NULL - * request_list. - */ -static inline struct request_list *blk_get_rl(struct request_queue *q, - struct bio *bio) -{ - struct blkcg *blkcg; - struct blkcg_gq *blkg; - - rcu_read_lock(); - - blkcg = bio_blkcg(bio); - - /* bypass blkg lookup and use @q->root_rl directly for root */ - if (blkcg == &blkcg_root) - goto root_rl; - - /* - * Try to use blkg->rl. blkg lookup may fail under memory pressure - * or if either the blkcg or queue is going away. Fall back to - * root_rl in such cases. - */ - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) - goto root_rl; - - blkg_get(blkg); - rcu_read_unlock(); - return &blkg->rl; -root_rl: - rcu_read_unlock(); - return &q->root_rl; -} - -/** - * blk_put_rl - put request_list - * @rl: request_list to put - * - * Put the reference acquired by blk_get_rl(). Should be called under - * queue_lock. - */ -static inline void blk_put_rl(struct request_list *rl) -{ - if (rl->blkg->blkcg != &blkcg_root) - blkg_put(rl->blkg); -} - -/** - * blk_rq_set_rl - associate a request with a request_list - * @rq: request of interest - * @rl: target request_list - * - * Associate @rq with @rl so that accounting and freeing can know the - * request_list @rq came from. - */ -static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) -{ - rq->rl = rl; -} - -/** - * blk_rq_rl - return the request_list a request came from - * @rq: request of interest - * - * Return the request_list @rq is allocated from. - */ -static inline struct request_list *blk_rq_rl(struct request *rq) -{ - return rq->rl; -} - -struct request_list *__blk_queue_next_rl(struct request_list *rl, - struct request_queue *q); -/** - * blk_queue_for_each_rl - iterate through all request_lists of a request_queue - * - * Should be used under queue_lock. - */ -#define blk_queue_for_each_rl(rl, q) \ - for ((rl) = &(q)->root_rl; (rl); (rl) = __blk_queue_next_rl((rl), (q))) - static inline int blkg_stat_init(struct blkg_stat *stat, gfp_t gfp) { int ret; @@ -939,12 +848,6 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } -static inline struct request_list *blk_get_rl(struct request_queue *q, - struct bio *bio) { return &q->root_rl; } -static inline void blk_put_rl(struct request_list *rl) { } -static inline void blk_rq_set_rl(struct request *rq, struct request_list *rl) { } -static inline struct request_list *blk_rq_rl(struct request *rq) { return &rq->q->root_rl; } - static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 877a3d235c45..e0c661a95c39 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -58,22 +58,6 @@ struct blk_stat_callback; typedef void (rq_end_io_fn)(struct request *, blk_status_t); -struct request_list { - struct request_queue *q; /* the queue this rl belongs to */ -#ifdef CONFIG_BLK_CGROUP - struct blkcg_gq *blkg; /* blkg this request pool belongs to */ -#endif - /* - * count[], starved[], and wait[] are indexed by - * BLK_RW_SYNC/BLK_RW_ASYNC - */ - int count[2]; - int starved[2]; - mempool_t *rq_pool; - wait_queue_head_t wait[2]; - unsigned int flags; -}; - /* * request flags */ typedef __u32 __bitwise req_flags_t; @@ -259,10 +243,6 @@ struct request { /* for bidi */ struct request *next_rq; - -#ifdef CONFIG_BLK_CGROUP - struct request_list *rl; /* rl this rq is alloced from */ -#endif }; static inline bool blk_op_is_scsi(unsigned int op) @@ -312,8 +292,6 @@ typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); struct bio_vec; typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); -typedef int (init_rq_fn)(struct request_queue *, struct request *, gfp_t); -typedef void (exit_rq_fn)(struct request_queue *, struct request *); enum blk_eh_timer_return { BLK_EH_DONE, /* drivers has completed the command */ @@ -427,22 +405,10 @@ struct request_queue { struct blk_queue_stats *stats; struct rq_qos *rq_qos; - /* - * If blkcg is not used, @q->root_rl serves all requests. If blkcg - * is used, root blkg allocates from @q->root_rl and all other - * blkgs from their own blkg->rl. Which one to use should be - * determined using bio_request_list(). - */ - struct request_list root_rl; - make_request_fn *make_request_fn; poll_q_fn *poll_fn; softirq_done_fn *softirq_done_fn; dma_drain_needed_fn *dma_drain_needed; - /* Called just after a request is allocated */ - init_rq_fn *init_rq_fn; - /* Called just before a request is freed */ - exit_rq_fn *exit_rq_fn; const struct blk_mq_ops *mq_ops; -- cgit v1.2.3 From 7d692330e7cd581ccfee982334bf06b236cb999a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Oct 2018 10:48:12 -0600 Subject: block: get rid of blk_queued_rq() No point in hiding what this does, just open code it in the one spot where we are still using it. Reviewed-by: Hannes Reinecke Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e0c661a95c39..c675e2b5af62 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -673,8 +673,6 @@ static inline bool blk_account_rq(struct request *rq) #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) -/* rq->queuelist of dequeued request must be list_empty() */ -#define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -- cgit v1.2.3 From c7bb9ad1744ea14e61e5fff99ee5282709b0c9d9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 31 Oct 2018 09:43:30 -0600 Subject: block: get rid of q->softirq_done_fn() With the legacy path gone, all we do is funnel it through the mq_ops->complete() operation. Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 ++- include/linux/blkdev.h | 3 --- 2 files changed, 2 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 5c8418ebbfd6..9dd574e5436a 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -115,6 +115,7 @@ typedef void (busy_tag_iter_fn)(struct request *, void *, bool); typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (map_queues_fn)(struct blk_mq_tag_set *set); typedef bool (busy_fn)(struct request_queue *); +typedef void (complete_fn)(struct request *); struct blk_mq_ops { @@ -142,7 +143,7 @@ struct blk_mq_ops { */ poll_fn *poll; - softirq_done_fn *complete; + complete_fn *complete; /* * Called when the block layer side of a hardware queue has been diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c675e2b5af62..d4104844d6bb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -290,7 +290,6 @@ typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); struct bio_vec; -typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); enum blk_eh_timer_return { @@ -407,7 +406,6 @@ struct request_queue { make_request_fn *make_request_fn; poll_q_fn *poll_fn; - softirq_done_fn *softirq_done_fn; dma_drain_needed_fn *dma_drain_needed; const struct blk_mq_ops *mq_ops; @@ -1113,7 +1111,6 @@ extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_virt_boundary(struct request_queue *, unsigned long); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); -extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush_queueable(struct request_queue *q, bool queueable); extern void blk_queue_write_cache(struct request_queue *q, bool enabled, bool fua); -- cgit v1.2.3 From 9cf2bab6307659b940da65d16dcc8f82c69f3a97 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 31 Oct 2018 17:01:22 -0600 Subject: block: kill request ->cpu member This was used for completion placement for the legacy path, but for mq we have rq->mq_ctx->cpu for that. Add a helper to get the request CPU assignment, as the mq_ctx type is private to blk-mq. Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ include/linux/blkdev.h | 2 -- 2 files changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9dd574e5436a..d83a26fb37e5 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -300,6 +300,8 @@ void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); +unsigned int blk_mq_rq_cpu(struct request *rq); + /** * blk_mq_mark_complete() - Set request state to complete * @rq: request to set to complete state diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d4104844d6bb..c8fa4d3d7fee 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -130,7 +130,6 @@ struct request { struct request_queue *q; struct blk_mq_ctx *mq_ctx; - int cpu; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; @@ -669,7 +668,6 @@ static inline bool blk_account_rq(struct request *rq) return (rq->rq_flags & RQF_STARTED) && !blk_rq_is_passthrough(rq); } -#define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) #define list_entry_rq(ptr) list_entry((ptr), struct request, queuelist) -- cgit v1.2.3 From a8908939af569ce2419f43fd56eeaf003bc3d85d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 16 Oct 2018 14:23:06 -0600 Subject: blk-mq: kill q->mq_map It's just a pointer to set->mq_map, use that instead. Move the assignment a bit earlier, so we always know it's valid. Reviewed-by: Christoph Hellwig Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c8fa4d3d7fee..2ae7465d68ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -409,8 +409,6 @@ struct request_queue { const struct blk_mq_ops *mq_ops; - unsigned int *mq_map; - /* sw queues */ struct blk_mq_ctx __percpu *queue_ctx; unsigned int nr_queues; -- cgit v1.2.3 From ed76e329d74a4b15ac0f5fd3adbd52ec0178a134 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 13:06:14 -0600 Subject: blk-mq: abstract out queue map This is in preparation for allowing multiple sets of maps per queue, if so desired. Reviewed-by: Hannes Reinecke Reviewed-by: Bart Van Assche Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq-pci.h | 4 ++-- include/linux/blk-mq-virtio.h | 4 ++-- include/linux/blk-mq.h | 15 ++++++++++++--- 3 files changed, 16 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq-pci.h b/include/linux/blk-mq-pci.h index 9f4c17f0d2d8..0b1f45c62623 100644 --- a/include/linux/blk-mq-pci.h +++ b/include/linux/blk-mq-pci.h @@ -2,10 +2,10 @@ #ifndef _LINUX_BLK_MQ_PCI_H #define _LINUX_BLK_MQ_PCI_H -struct blk_mq_tag_set; +struct blk_mq_queue_map; struct pci_dev; -int blk_mq_pci_map_queues(struct blk_mq_tag_set *set, struct pci_dev *pdev, +int blk_mq_pci_map_queues(struct blk_mq_queue_map *qmap, struct pci_dev *pdev, int offset); #endif /* _LINUX_BLK_MQ_PCI_H */ diff --git a/include/linux/blk-mq-virtio.h b/include/linux/blk-mq-virtio.h index 69b4da262c45..687ae287e1dc 100644 --- a/include/linux/blk-mq-virtio.h +++ b/include/linux/blk-mq-virtio.h @@ -2,10 +2,10 @@ #ifndef _LINUX_BLK_MQ_VIRTIO_H #define _LINUX_BLK_MQ_VIRTIO_H -struct blk_mq_tag_set; +struct blk_mq_queue_map; struct virtio_device; -int blk_mq_virtio_map_queues(struct blk_mq_tag_set *set, +int blk_mq_virtio_map_queues(struct blk_mq_queue_map *qmap, struct virtio_device *vdev, int first_vec); #endif /* _LINUX_BLK_MQ_VIRTIO_H */ diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d83a26fb37e5..176164888628 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -74,10 +74,19 @@ struct blk_mq_hw_ctx { struct srcu_struct srcu[0]; }; +struct blk_mq_queue_map { + unsigned int *mq_map; + unsigned int nr_queues; +}; + +enum { + HCTX_MAX_TYPES = 1, +}; + struct blk_mq_tag_set { - unsigned int *mq_map; + struct blk_mq_queue_map map[HCTX_MAX_TYPES]; const struct blk_mq_ops *ops; - unsigned int nr_hw_queues; + unsigned int nr_hw_queues; /* nr hw queues across maps */ unsigned int queue_depth; /* max hw supported */ unsigned int reserved_tags; unsigned int cmd_size; /* per-request extra data */ @@ -295,7 +304,7 @@ void blk_mq_freeze_queue_wait(struct request_queue *q); int blk_mq_freeze_queue_wait_timeout(struct request_queue *q, unsigned long timeout); -int blk_mq_map_queues(struct blk_mq_tag_set *set); +int blk_mq_map_queues(struct blk_mq_queue_map *qmap); void blk_mq_update_nr_hw_queues(struct blk_mq_tag_set *set, int nr_hw_queues); void blk_mq_quiesce_queue_nowait(struct request_queue *q); -- cgit v1.2.3 From f31967f0e455d08d3ea1d2f849bf62dafc92dbf4 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 13:13:29 -0600 Subject: blk-mq: allow software queue to map to multiple hardware queues The mapping used to be dependent on just the CPU location, but now it's a tuple of (type, cpu) instead. This is a prep patch for allowing a single software queue to map to multiple hardware queues. No functional changes in this patch. This changes the software queue count to an unsigned short to save a bit of space. We can still support 64K-1 CPUs, which should be enough. Add a check to catch a wrap. Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 176164888628..6c39d546c50b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -37,7 +37,8 @@ struct blk_mq_hw_ctx { struct blk_mq_ctx *dispatch_from; unsigned int dispatch_busy; - unsigned int nr_ctx; + unsigned short type; + unsigned short nr_ctx; struct blk_mq_ctx **ctxs; spinlock_t dispatch_wait_lock; -- cgit v1.2.3 From b3c661b15d5ab11d982e58bee23e05c1780528a1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 30 Oct 2018 10:36:06 -0600 Subject: blk-mq: support multiple hctx maps Add support for the tag set carrying multiple queue maps, and for the driver to inform blk-mq how many it wishes to support through setting set->nr_maps. This adds an mq_ops helper for drivers that support more than 1 map, mq_ops->rq_flags_to_type(). The function takes request/bio flags and CPU, and returns a queue map index for that. We then use the type information in blk_mq_map_queue() to index the map set. Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 6c39d546c50b..8994c95056a8 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -85,7 +85,14 @@ enum { }; struct blk_mq_tag_set { + /* + * map[] holds ctx -> hctx mappings, one map exists for each type + * that the driver wishes to support. There are no restrictions + * on maps being of the same size, and it's perfectly legal to + * share maps between types. + */ struct blk_mq_queue_map map[HCTX_MAX_TYPES]; + unsigned int nr_maps; /* nr entries in map[] */ const struct blk_mq_ops *ops; unsigned int nr_hw_queues; /* nr hw queues across maps */ unsigned int queue_depth; /* max hw supported */ @@ -109,6 +116,8 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +/* takes rq->cmd_flags as input, returns a hardware type index */ +typedef int (rq_flags_to_type_fn)(struct request_queue *, unsigned int); typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); @@ -134,6 +143,11 @@ struct blk_mq_ops { */ queue_rq_fn *queue_rq; + /* + * Return a queue map type for the given request/bio flags + */ + rq_flags_to_type_fn *rq_flags_to_type; + /* * Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the -- cgit v1.2.3 From ea4f995ee8b8f0578b3319949f2edd5d812fdb0a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 29 Oct 2018 15:06:13 -0600 Subject: blk-mq: cache request hardware queue mapping We call blk_mq_map_queue() a lot, at least two times for each request per IO, sometimes more. Since we now have an indirect call as well in that function. cache the mapping so we don't have to re-call blk_mq_map_queue() for the same request multiple times. Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Reviewed-by: Hannes Reinecke Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2ae7465d68ab..9b1f470cc784 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -129,6 +129,7 @@ enum mq_rq_state { struct request { struct request_queue *q; struct blk_mq_ctx *mq_ctx; + struct blk_mq_hw_ctx *mq_hctx; unsigned int cmd_flags; /* op and common flags */ req_flags_t rq_flags; -- cgit v1.2.3 From 843477d4cc5c4bb4e346c561ecd3b9d0bd67e8c8 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 24 Oct 2018 13:16:11 -0600 Subject: blk-mq: initial support for multiple queue maps Add a queue offset to the tag map. This enables users to map iteratively, for each queue map type they support. Bump maximum number of supported maps to 2, we're now fully able to support more than 1 map. Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 8994c95056a8..729ce0f00433 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -78,10 +78,11 @@ struct blk_mq_hw_ctx { struct blk_mq_queue_map { unsigned int *mq_map; unsigned int nr_queues; + unsigned int queue_offset; }; enum { - HCTX_MAX_TYPES = 1, + HCTX_MAX_TYPES = 2, }; struct blk_mq_tag_set { -- cgit v1.2.3 From d1e36282b0bbd5de6a9c4d5275e94ef3b3438f48 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 29 Aug 2018 10:36:56 -0600 Subject: block: add REQ_HIPRI and inherit it from IOCB_HIPRI We use IOCB_HIPRI to poll for IO in the caller instead of scheduling. This information is not available for (or after) IO submission. The driver may make different queue choices based on the type of IO, so make the fact that we will poll for this IO known to the lower layers as well. Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 1dcf652ba0aa..dbdbfbd6a987 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -323,6 +323,8 @@ enum req_flag_bits { /* command specific flags for REQ_OP_WRITE_ZEROES: */ __REQ_NOUNMAP, /* do not free blocks when zeroing */ + __REQ_HIPRI, + /* for driver use */ __REQ_DRV, __REQ_SWAP, /* swapping request. */ @@ -343,8 +345,8 @@ enum req_flag_bits { #define REQ_RAHEAD (1ULL << __REQ_RAHEAD) #define REQ_BACKGROUND (1ULL << __REQ_BACKGROUND) #define REQ_NOWAIT (1ULL << __REQ_NOWAIT) - #define REQ_NOUNMAP (1ULL << __REQ_NOUNMAP) +#define REQ_HIPRI (1ULL << __REQ_HIPRI) #define REQ_DRV (1ULL << __REQ_DRV) #define REQ_SWAP (1ULL << __REQ_SWAP) -- cgit v1.2.3 From 4b04cc6a8f86c4842314def22332de1f15de8523 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 5 Nov 2018 12:44:33 -0700 Subject: nvme: add separate poll queue map Adds support for defining a variable number of poll queues, currently configurable with the 'poll_queues' module parameter. Defaults to a single poll queue. And now we finally have poll support without triggering interrupts! Reviewed-by: Hannes Reinecke Reviewed-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 729ce0f00433..9f5e93f40857 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -82,7 +82,7 @@ struct blk_mq_queue_map { }; enum { - HCTX_MAX_TYPES = 2, + HCTX_MAX_TYPES = 3, }; struct blk_mq_tag_set { -- cgit v1.2.3 From 7baa85727d0406ffd2b2303cd803a145aa35c505 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 8 Nov 2018 10:24:07 -0700 Subject: blk-mq-tag: change busy_iter_fn to return whether to continue or not We have this functionality in sbitmap, but we don't export it in blk-mq for users of the tags busy iteration. This can be useful for stopping the iteration, if the caller doesn't need to find more requests. Reviewed-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 9f5e93f40857..ff497dfcbbf9 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -129,9 +129,9 @@ typedef int (init_request_fn)(struct blk_mq_tag_set *set, struct request *, typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, unsigned int); -typedef void (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, +typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); -typedef void (busy_tag_iter_fn)(struct request *, void *, bool); +typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); typedef int (map_queues_fn)(struct blk_mq_tag_set *set); typedef bool (busy_fn)(struct request_queue *); -- cgit v1.2.3 From ae8799125d565c798e49dcab4bf182dbfc483524 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 8 Nov 2018 09:03:51 -0700 Subject: blk-mq: provide a helper to check if a queue is busy Returns true if the queue currently has requests pending, false if not. DM can use this to replace the atomic_inc/dec they do per device to see if a device is busy. Reviewed-by: Mike Snitzer Reviewed-by: Bart Van Assche Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ff497dfcbbf9..929e8abc5535 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -250,6 +250,8 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); +bool blk_mq_queue_busy(struct request_queue *q); + enum { /* return when out of requests */ BLK_MQ_REQ_NOWAIT = (__force blk_mq_req_flags_t)(1 << 0), -- cgit v1.2.3 From 9d037ad707ed6069fbea4e38e6ee37e027b13f1d Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 19:37:44 +0100 Subject: block: remove req->timeout_list Unused now that the legacy request path is gone. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9b1f470cc784..dc2a6f625ecb 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -228,8 +228,6 @@ struct request { /* access through blk_rq_set_deadline, blk_rq_deadline */ unsigned long __deadline; - struct list_head timeout_list; - union { struct __call_single_data csd; u64 fifo_time; -- cgit v1.2.3 From 535ac5d3fe63b9ea1dda379f606f9d0d377d7184 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 14:42:35 +0100 Subject: ide: cleanup ->prep_rq calling convention The return value is just used as a binary yes/no decision, so switch it to a bool instead of the old BLKPREP_* values returned as an int. Also clean up a few related comments. Reviewed-by: Bart Van Assche Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/ide.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 079f8bc0b0f4..272704ff21ee 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -529,7 +529,7 @@ struct ide_drive_s { struct request_queue *queue; /* request queue */ - int (*prep_rq)(struct ide_drive_s *, struct request *); + bool (*prep_rq)(struct ide_drive_s *, struct request *); struct blk_mq_tag_set tag_set; -- cgit v1.2.3 From 0e17e06cbf7ede285ab74bab44d888b40c21f828 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 9 Nov 2018 14:42:41 +0100 Subject: block: remove the BLKPREP_* values. Unused now. Reviewed-by: Johannes Thumshirn Reviewed-by: Bart Van Assche Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dc2a6f625ecb..e67ad2dd025e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -776,16 +776,6 @@ static inline unsigned int blk_queue_depth(struct request_queue *q) return q->nr_requests; } -/* - * q->prep_rq_fn return values - */ -enum { - BLKPREP_OK, /* serve it */ - BLKPREP_KILL, /* fatal error, kill, return -EIO */ - BLKPREP_DEFER, /* leave on queue */ - BLKPREP_INVALID, /* invalid command, kill, return -EREMOTEIO */ -}; - extern unsigned long blk_max_low_pfn, blk_max_pfn; /* -- cgit v1.2.3 From 22ce0a7ccf23d55d1fdaa2974002f8b5ae765665 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 10 Nov 2018 09:30:49 +0100 Subject: ide: don't use req->special Just replace it with a field of the same name in struct ide_req. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/ide.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ide.h b/include/linux/ide.h index 272704ff21ee..e7d29ae633cd 100644 --- a/include/linux/ide.h +++ b/include/linux/ide.h @@ -50,6 +50,7 @@ struct ide_request { struct scsi_request sreq; u8 sense[SCSI_SENSE_BUFFERSIZE]; u8 type; + void *special; }; static inline struct ide_request *ide_req(struct request *rq) -- cgit v1.2.3 From 7ff4f8035695984c513598e2d49c8277d5d234ca Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 14 Nov 2018 15:22:49 -0700 Subject: block: remove dead queue members No more users of ->in_flight[] or ->nr_sorted, get rid of them. Fixes: a1ce35fa4985 ("block: remove dead elevator code") Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 --- 1 file changed, 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e67ad2dd025e..c961329be96b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -486,9 +486,6 @@ struct request_queue { unsigned int dma_pad_mask; unsigned int dma_alignment; - unsigned int nr_sorted; - unsigned int in_flight[2]; - unsigned int rq_timeout; int poll_nsec; -- cgit v1.2.3 From 8f4236d9008b0973a8281256ccfde6913cdec6cb Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Nov 2018 17:02:04 +0100 Subject: block: remove QUEUE_FLAG_BYPASS and ->bypass Unused since the removal of the legacy request code. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 6 +----- include/linux/blkdev.h | 3 --- 2 files changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 1b299e025e83..2c68efc603bd 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -325,16 +325,12 @@ static inline struct blkcg_gq *__blkg_lookup(struct blkcg *blkcg, * @q: request_queue of interest * * Lookup blkg for the @blkcg - @q pair. This function should be called - * under RCU read lock and is guaranteed to return %NULL if @q is bypassing - * - see blk_queue_bypass_start() for details. + * under RCU read loc. */ static inline struct blkcg_gq *blkg_lookup(struct blkcg *blkcg, struct request_queue *q) { WARN_ON_ONCE(!rcu_read_lock_held()); - - if (unlikely(blk_queue_bypass(q))) - return NULL; return __blkg_lookup(blkcg, q, false); } diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c961329be96b..dd1e53fd4acf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -548,7 +548,6 @@ struct request_queue { struct mutex sysfs_lock; - int bypass_depth; atomic_t mq_freeze_depth; #if defined(CONFIG_BLK_DEV_BSG) @@ -586,7 +585,6 @@ struct request_queue { #define QUEUE_FLAG_STOPPED 1 /* queue is stopped */ #define QUEUE_FLAG_DYING 2 /* queue being torn down */ -#define QUEUE_FLAG_BYPASS 3 /* act as dumb FIFO queue */ #define QUEUE_FLAG_BIDI 4 /* queue supports bidi requests */ #define QUEUE_FLAG_NOMERGES 5 /* disable merge attempts */ #define QUEUE_FLAG_SAME_COMP 6 /* complete on same CPU-group */ @@ -630,7 +628,6 @@ bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) #define blk_queue_dead(q) test_bit(QUEUE_FLAG_DEAD, &(q)->queue_flags) -#define blk_queue_bypass(q) test_bit(QUEUE_FLAG_BYPASS, &(q)->queue_flags) #define blk_queue_init_done(q) test_bit(QUEUE_FLAG_INIT_DONE, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) #define blk_queue_noxmerges(q) \ -- cgit v1.2.3 From 079076b3416e78ba2bb3ce38e05e320c388c3120 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Nov 2018 17:02:05 +0100 Subject: block: remove deadline __deadline manipulation helpers No users left since the removal of the legacy request interface, we can remove all the magic bit stealing now and make it a normal field. But use WRITE_ONCE/READ_ONCE on the new deadline field, given that we don't seem to have any mechanism to guarantee a new value actually gets seen by other threads. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index dd1e53fd4acf..60507ab7b358 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -224,9 +224,7 @@ struct request { refcount_t ref; unsigned int timeout; - - /* access through blk_rq_set_deadline, blk_rq_deadline */ - unsigned long __deadline; + unsigned long deadline; union { struct __call_single_data csd; -- cgit v1.2.3 From 57d74df90783f6a6b3e79dfdd2a567ce5db3b790 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Nov 2018 17:02:07 +0100 Subject: block: use atomic bitops for ->queue_flags ->queue_flags is generally not set or cleared in the fast path, and also generally set or cleared one flag at a time. Make use of the normal atomic bitops for it so that we don't need to take the queue_lock, which is otherwise mostly unused in the core block layer now. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 60507ab7b358..30d8e0fbd104 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -621,7 +621,6 @@ struct request_queue { void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); bool blk_queue_flag_test_and_set(unsigned int flag, struct request_queue *q); -bool blk_queue_flag_test_and_clear(unsigned int flag, struct request_queue *q); #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_dying(q) test_bit(QUEUE_FLAG_DYING, &(q)->queue_flags) -- cgit v1.2.3 From 6d46964230d182c4b6097379738849a809d791dc Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 14 Nov 2018 17:02:18 +0100 Subject: block: remove the lock argument to blk_alloc_queue_node With the legacy request path gone there is no real need to override the queue_lock. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 30d8e0fbd104..c4a3a660e3f0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1122,8 +1122,7 @@ extern long nr_blockdev_pages(void); bool __must_check blk_get_queue(struct request_queue *); struct request_queue *blk_alloc_queue(gfp_t); -struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id, - spinlock_t *lock); +struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id); extern void blk_put_queue(struct request_queue *); extern void blk_set_queue_dying(struct request_queue *); -- cgit v1.2.3 From 0d945c1f966b2bcb67bb12be749da0a7fb00201b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 15 Nov 2018 12:17:28 -0700 Subject: block: remove the queue_lock indirection With the legacy request path gone there is no good reason to keep queue_lock as a pointer, we can always use the embedded lock now. Reviewed-by: Hannes Reinecke Signed-off-by: Christoph Hellwig Fixed floppy and blk-cgroup missing conversions and half done edits. Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 4 ++-- include/linux/blkdev.h | 8 +------- 2 files changed, 3 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 2c68efc603bd..a9e2e2037129 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -717,11 +717,11 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { - spin_lock_irq(q->queue_lock); + spin_lock_irq(&q->queue_lock); blkg = blkg_lookup_create(blkcg, q); if (IS_ERR(blkg)) blkg = NULL; - spin_unlock_irq(q->queue_lock); + spin_unlock_irq(&q->queue_lock); } throtl = blk_throtl_bio(q, blkg, bio); diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index c4a3a660e3f0..1d185f1fc333 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -446,13 +446,7 @@ struct request_queue { */ gfp_t bounce_gfp; - /* - * protects queue structures from reentrancy. ->__queue_lock should - * _never_ be used directly, it is queue private. always use - * ->queue_lock. - */ - spinlock_t __queue_lock; - spinlock_t *queue_lock; + spinlock_t queue_lock; /* * queue kobject -- cgit v1.2.3 From 344e9ffcbd1898e1dc04085564a6e05c30ea8199 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 15 Nov 2018 12:22:51 -0700 Subject: block: add queue_is_mq() helper Various spots check for q->mq_ops being non-NULL, but provide a helper to do this instead. Where the ->mq_ops != NULL check is redundant, remove it. Since mq == rq-based now that legacy is gone, get rid of the queue_is_rq_based() and just use queue_is_mq() everywhere. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1d185f1fc333..41aaa05e42c1 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -656,11 +656,7 @@ static inline bool blk_account_rq(struct request *rq) #define rq_data_dir(rq) (op_is_write(req_op(rq)) ? WRITE : READ) -/* - * Driver can handle struct request, if it either has an old style - * request_fn defined, or is blk-mq based. - */ -static inline bool queue_is_rq_based(struct request_queue *q) +static inline bool queue_is_mq(struct request_queue *q) { return q->mq_ops; } -- cgit v1.2.3 From 0619317ff8baa2da9238191ad5167ed3618c16d9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 13 Nov 2018 21:16:54 -0700 Subject: block: add polled wakeup task helper If we're polling for IO on a device that doesn't use interrupts, then IO completion loop (and wake of task) is done by submitting task itself. If that is the case, then we don't need to enter the wake_up_process() function, we can simply mark ourselves as TASK_RUNNING. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 41aaa05e42c1..91c44f7a7f62 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1772,4 +1772,17 @@ static inline int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, #endif /* CONFIG_BLOCK */ +static inline void blk_wake_io_task(struct task_struct *waiter) +{ + /* + * If we're polling, the task itself is doing the completions. For + * that case, we don't need to signal a wakeup, it's enough to just + * mark us as RUNNING. + */ + if (waiter == current) + __set_current_state(TASK_RUNNING); + else + wake_up_process(waiter); +} + #endif -- cgit v1.2.3 From 2b78eae147a13ab2ca7caa121dd3fca2eecf8613 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 16 Nov 2018 09:10:01 +0100 Subject: block: remove the rq_alloc_data request_queue field Reviewed-by: Omar Sandoval Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 91c44f7a7f62..1ad6eafc43f2 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -567,7 +567,6 @@ struct request_queue { bool mq_sysfs_init_done; size_t cmd_size; - void *rq_alloc_data; struct work_struct release_work; -- cgit v1.2.3 From 85f4d4b65fdd67f1d6dc9eeb1d91923cef07eb6a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 6 Nov 2018 13:30:55 -0700 Subject: block: have ->poll_fn() return number of entries polled We currently only really support sync poll, ie poll with 1 IO in flight. This prepares us for supporting async poll. Note that the returned value isn't necessarily 100% accurate. If poll races with IRQ completion, we assume that the fact that the task is now runnable means we found at least one entry. In reality it could be more than 1, or not even 1. This is fine, the caller will just need to take this into account. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1ad6eafc43f2..e97c0a3b2262 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -283,7 +283,7 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); -typedef bool (poll_q_fn) (struct request_queue *q, blk_qc_t); +typedef int (poll_q_fn) (struct request_queue *q, blk_qc_t); struct bio_vec; typedef int (dma_drain_needed_fn)(struct request *); -- cgit v1.2.3 From 92f806d678e5136e4777b21e5ed5368482ac9ea9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 19 Nov 2018 11:37:31 -0700 Subject: nvme-fc: remove ->poll implementation It's specifically looking for a given request, which we will not be supporting going forward. Also kill the qla2xxx poll implementation as that's the only user of the nvme-fc poll, and the now unused ->poll_queue() hook. Reviewed-by: Christoph Hellwig Reviewed-by: James Smart Signed-off-by: Jens Axboe --- include/linux/nvme-fc-driver.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index 496ff759f84c..f4ab3b1925ac 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -403,7 +403,6 @@ struct nvme_fc_port_template { void **handle); void (*delete_queue)(struct nvme_fc_local_port *, unsigned int qidx, void *handle); - void (*poll_queue)(struct nvme_fc_local_port *, void *handle); int (*ls_req)(struct nvme_fc_local_port *, struct nvme_fc_remote_port *, struct nvmefc_ls_req *); -- cgit v1.2.3 From e2b3fa5af70c1e646270f6c7c799414f5e904d7a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 20 Nov 2018 10:52:34 +0900 Subject: block: Remove bio->bi_ioc bio->bi_ioc is never set so always NULL. Remove references to it in bio_disassociate_task() and in rq_ioc() and delete this field from struct bio. With this change, rq_ioc() always returns current->io_context without the need for a bio argument. Further simplify the code and make it more readable by also removing this helper, which also allows to simplify blk_mq_sched_assign_ioc() by removing its bio argument. Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Reviewed-by: Adam Manzanares Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dbdbfbd6a987..c0ba1a038ff3 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -174,10 +174,9 @@ struct bio { void *bi_private; #ifdef CONFIG_BLK_CGROUP /* - * Optional ioc and css associated with this bio. Put on bio + * Optional css associated with this bio. Put on bio * release. Read comment on top of bio_associate_current(). */ - struct io_context *bi_ioc; struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; -- cgit v1.2.3 From 64845a1ddd655574886eb48e9a5eaeeb9b05bf0d Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 20 Nov 2018 10:52:35 +0900 Subject: block: Introduce get_current_ioprio() Define get_current_ioprio() as an inline helper to obtain the caller I/O priority from its task I/O context. Use this helper in blk_init_request_from_bio() to set a request ioprio. Reviewed-by: Christoph Hellwig Reviewed-by: Johannes Thumshirn Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/linux/ioprio.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/ioprio.h b/include/linux/ioprio.h index 9e30ed6443db..e9bfe6972aed 100644 --- a/include/linux/ioprio.h +++ b/include/linux/ioprio.h @@ -70,6 +70,19 @@ static inline int task_nice_ioclass(struct task_struct *task) return IOPRIO_CLASS_BE; } +/* + * If the calling process has set an I/O priority, use that. Otherwise, return + * the default I/O priority. + */ +static inline int get_current_ioprio(void) +{ + struct io_context *ioc = current->io_context; + + if (ioc) + return ioc->ioprio; + return IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0); +} + /* * For inheritance, return the highest of the two given priorities */ -- cgit v1.2.3 From 20578bdfd0418efb11ec316229e670d085cd574a Mon Sep 17 00:00:00 2001 From: Damien Le Moal Date: Tue, 20 Nov 2018 10:52:38 +0900 Subject: block: Initialize BIO I/O priority early For the synchronous I/O path case (read(), write() etc system calls), a BIO I/O priority is not initialized until the execution of blk_init_request_from_bio() when the BIO is submitted and a request initialized for the BIO execution. This is due to the ki_ioprio field of the struct kiocb defined on stack being always initialized to IOPRIO_CLASS_NONE, regardless of the calling process I/O context ioprio value set with ioprio_set(). This late initialization can result in the BIO being merged to pending requests even when the I/O priorities differ. Fix this by initializing the ki_iopriority field of on stack struct kiocb using the get_current_ioprio() helper, ensuring that all BIOs allocated and submitted for the system call execution see the correct intended I/O priority early. With this, since a BIO I/O priority is always set to the intended effective value for both the sync and async path, blk_init_request_from_bio() can be simplified. Reviewed-by: Christoph Hellwig Reviewed-by: Adam Manzanares Signed-off-by: Damien Le Moal Signed-off-by: Jens Axboe --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index c95c0807471f..a1ab233e6469 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2021,7 +2021,7 @@ static inline void init_sync_kiocb(struct kiocb *kiocb, struct file *filp) .ki_filp = filp, .ki_flags = iocb_flags(filp), .ki_hint = ki_hint_validate(file_write_hint(filp)), - .ki_ioprio = IOPRIO_PRIO_VALUE(IOPRIO_CLASS_NONE, 0), + .ki_ioprio = get_current_ioprio(), }; } -- cgit v1.2.3 From 1db4909e76f64a85f4aaa187f0f683f5c85a471d Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Tue, 20 Nov 2018 09:44:35 +0800 Subject: blk-mq: not embed .mq_kobj and ctx->kobj into queue instance Even though .mq_kobj, ctx->kobj and q->kobj share same lifetime from block layer's view, actually they don't because userspace may grab one kobject anytime via sysfs. This patch fixes the issue by the following approach: 1) introduce 'struct blk_mq_ctxs' for holding .mq_kobj and managing all ctxs 2) free all allocated ctxs and the 'blk_mq_ctxs' instance in release handler of .mq_kobj 3) grab one ref of .mq_kobj before initializing each ctx->kobj, so that .mq_kobj is always released after all ctxs are freed. This patch fixes kernel panic issue during booting when DEBUG_KOBJECT_RELEASE is enabled. Reported-by: Guenter Roeck Cc: "jianchao.wang" Tested-by: Guenter Roeck Reviewed-by: Greg Kroah-Hartman Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e97c0a3b2262..9b53db06ad08 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -456,7 +456,7 @@ struct request_queue { /* * mq queue kobject */ - struct kobject mq_kobj; + struct kobject *mq_kobj; #ifdef CONFIG_BLK_DEV_INTEGRITY struct blk_integrity integrity; -- cgit v1.2.3 From 1052b8ac5282daf35df331edcbdb645839d17e6a Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Nov 2018 08:21:49 -0700 Subject: blk-mq: when polling for IO, look for any completion If we want to support async IO polling, then we have to allow finding completions that aren't just for the one we are looking for. Always pass in -1 to the mq_ops->poll() helper, and have that return how many events were found in this poll loop. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 9b53db06ad08..f3015e9b5ae3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -867,7 +867,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -bool blk_poll(struct request_queue *q, blk_qc_t cookie); +int blk_poll(struct request_queue *q, blk_qc_t cookie); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { -- cgit v1.2.3 From 9743139c5d11ab170f70a308dcb88c342390adfb Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 16 Nov 2018 09:48:21 -0700 Subject: blk-mq: remove 'tag' parameter from mq_ops->poll() We always pass in -1 now and none of the callers use the tag value, remove the parameter. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 929e8abc5535..ca0520ca6437 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -132,7 +132,7 @@ typedef void (exit_request_fn)(struct blk_mq_tag_set *set, struct request *, typedef bool (busy_iter_fn)(struct blk_mq_hw_ctx *, struct request *, void *, bool); typedef bool (busy_tag_iter_fn)(struct request *, void *, bool); -typedef int (poll_fn)(struct blk_mq_hw_ctx *, unsigned int); +typedef int (poll_fn)(struct blk_mq_hw_ctx *); typedef int (map_queues_fn)(struct blk_mq_tag_set *set); typedef bool (busy_fn)(struct request_queue *); typedef void (complete_fn)(struct request *); -- cgit v1.2.3 From 0a1b8b87d064a47fad9ec475316002da28559207 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 26 Nov 2018 08:24:43 -0700 Subject: block: make blk_poll() take a parameter on whether to spin or not blk_poll() has always kept spinning until it found an IO. This is fine for SYNC polling, since we need to find one request we have pending, but in preparation for ASYNC polling it can be beneficial to just check if we have any entries available or not. Existing callers are converted to pass in 'spin == true', to retain the old behavior. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f3015e9b5ae3..e3c0a8ec16a7 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -283,7 +283,7 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); -typedef int (poll_q_fn) (struct request_queue *q, blk_qc_t); +typedef int (poll_q_fn) (struct request_queue *q, blk_qc_t, bool spin); struct bio_vec; typedef int (dma_drain_needed_fn)(struct request *); @@ -867,7 +867,7 @@ extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, int blk_status_to_errno(blk_status_t status); blk_status_t errno_to_blk_status(int errno); -int blk_poll(struct request_queue *q, blk_qc_t cookie); +int blk_poll(struct request_queue *q, blk_qc_t cookie, bool spin); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { -- cgit v1.2.3 From 16c15eb16a793f2d81ae52f41f43fb6831b34212 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 26 Nov 2018 09:54:28 -0700 Subject: blk-mq: Return true if request was completed A driver may have internal state to cleanup if we're pretending a request didn't complete. Return 'false' if the command wasn't actually completed due to the timeout error injection, and true otherwise. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index ca0520ca6437..6e3da356a8eb 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -298,7 +298,7 @@ void blk_mq_add_to_requeue_list(struct request *rq, bool at_head, bool kick_requeue_list); void blk_mq_kick_requeue_list(struct request_queue *q); void blk_mq_delay_kick_requeue_list(struct request_queue *q, unsigned long msecs); -void blk_mq_complete_request(struct request *rq); +bool blk_mq_complete_request(struct request *rq); bool blk_mq_bio_list_merge(struct request_queue *q, struct list_head *list, struct bio *bio); bool blk_mq_queue_stopped(struct request_queue *q); -- cgit v1.2.3 From af78ff7c6e66832afcdf5418f67b11c409f9e7a1 Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Mon, 26 Nov 2018 09:54:30 -0700 Subject: blk-mq: Simplify request completion state There are no more users relying on blk-mq request states to prevent double completions, so replace the relatively expensive cmpxchg operation with WRITE_ONCE. Reviewed-by: Christoph Hellwig Signed-off-by: Keith Busch Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 14 -------------- 1 file changed, 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 6e3da356a8eb..b8de11e0603b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -329,20 +329,6 @@ void blk_mq_quiesce_queue_nowait(struct request_queue *q); unsigned int blk_mq_rq_cpu(struct request *rq); -/** - * blk_mq_mark_complete() - Set request state to complete - * @rq: request to set to complete state - * - * Returns true if request state was successfully set to complete. If - * successful, the caller is responsibile for seeing this request is ended, as - * blk_mq_complete_request will not work again. - */ -static inline bool blk_mq_mark_complete(struct request *rq) -{ - return cmpxchg(&rq->state, MQ_RQ_IN_FLIGHT, MQ_RQ_COMPLETE) == - MQ_RQ_IN_FLIGHT; -} - /* * Driver command data is immediately after the request. So subtract request * size to get back to the original request, add request size to get the PDU. -- cgit v1.2.3 From 5f0ed774ed2914decfd397569fface997532e94d Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 23 Nov 2018 22:04:33 -0700 Subject: block: sum requests in the plug structure This isn't exactly the same as the previous count, as it includes requests for all devices. But that really doesn't matter, if we have more than the threshold (16) queued up, flush it. It's not worth it to have an expensive list loop for this. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e3c0a8ec16a7..02732cae6080 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1130,6 +1130,7 @@ extern void blk_set_queue_dying(struct request_queue *); struct blk_plug { struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ + unsigned short rq_count; }; #define BLK_MAX_REQUEST_COUNT 16 #define BLK_PLUG_FLUSH_SIZE (128 * 1024) -- cgit v1.2.3 From 94a2c3a32b62e868dc1e3d854326745a7f1b8c7a Mon Sep 17 00:00:00 2001 From: Yufen Yu Date: Wed, 28 Nov 2018 16:42:01 +0800 Subject: block: use rcu_work instead of call_rcu to avoid sleep in softirq We recently got a stack by syzkaller like this: BUG: sleeping function called from invalid context at mm/slab.h:361 in_atomic(): 1, irqs_disabled(): 0, pid: 6644, name: blkid INFO: lockdep is turned off. CPU: 1 PID: 6644 Comm: blkid Not tainted 4.4.163-514.55.6.9.x86_64+ #76 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.10.2-1ubuntu1 04/01/2014 0000000000000000 5ba6a6b879e50c00 ffff8801f6b07b10 ffffffff81cb2194 0000000041b58ab3 ffffffff833c7745 ffffffff81cb2080 5ba6a6b879e50c00 0000000000000000 0000000000000001 0000000000000004 0000000000000000 Call Trace: [] __dump_stack lib/dump_stack.c:15 [inline] [] dump_stack+0x114/0x1a0 lib/dump_stack.c:51 [] ___might_sleep+0x291/0x490 kernel/sched/core.c:7675 [] __might_sleep+0xb3/0x270 kernel/sched/core.c:7637 [] slab_pre_alloc_hook mm/slab.h:361 [inline] [] slab_alloc_node mm/slub.c:2610 [inline] [] slab_alloc mm/slub.c:2692 [inline] [] kmem_cache_alloc_trace+0x2c3/0x5c0 mm/slub.c:2709 [] kmalloc include/linux/slab.h:479 [inline] [] kzalloc include/linux/slab.h:623 [inline] [] kobject_uevent_env+0x2c7/0x1150 lib/kobject_uevent.c:227 [] kobject_uevent+0x1f/0x30 lib/kobject_uevent.c:374 [] kobject_cleanup lib/kobject.c:633 [inline] [] kobject_release+0x229/0x440 lib/kobject.c:675 [] kref_sub include/linux/kref.h:73 [inline] [] kref_put include/linux/kref.h:98 [inline] [] kobject_put+0x72/0xd0 lib/kobject.c:692 [] put_device+0x25/0x30 drivers/base/core.c:1237 [] delete_partition_rcu_cb+0x1d4/0x2f0 block/partition-generic.c:232 [] __rcu_reclaim kernel/rcu/rcu.h:118 [inline] [] rcu_do_batch kernel/rcu/tree.c:2705 [inline] [] invoke_rcu_callbacks kernel/rcu/tree.c:2973 [inline] [] __rcu_process_callbacks kernel/rcu/tree.c:2940 [inline] [] rcu_process_callbacks+0x59c/0x1c70 kernel/rcu/tree.c:2957 [] __do_softirq+0x299/0xe20 kernel/softirq.c:273 [] invoke_softirq kernel/softirq.c:350 [inline] [] irq_exit+0x216/0x2c0 kernel/softirq.c:391 [] exiting_irq arch/x86/include/asm/apic.h:652 [inline] [] smp_apic_timer_interrupt+0x8b/0xc0 arch/x86/kernel/apic/apic.c:926 [] apic_timer_interrupt+0xa5/0xb0 arch/x86/entry/entry_64.S:746 [] ? audit_kill_trees+0x180/0x180 [] fd_install+0x57/0x80 fs/file.c:626 [] do_sys_open+0x45e/0x550 fs/open.c:1043 [] SYSC_open fs/open.c:1055 [inline] [] SyS_open+0x32/0x40 fs/open.c:1050 [] entry_SYSCALL_64_fastpath+0x1e/0x9a In softirq context, we call rcu callback function delete_partition_rcu_cb(), which may allocate memory by kzalloc with GFP_KERNEL flag. If the allocation cannot be satisfied, it may sleep. However, That is not allowed in softirq contex. Although we found this problem on linux 4.4, the latest kernel version seems to have this problem as well. And it is very similar to the previous one: https://lkml.org/lkml/2018/7/9/391 Fix it by using RCU workqueue, which allows sleep. Reviewed-by: Paul E. McKenney Signed-off-by: Yufen Yu Signed-off-by: Jens Axboe --- include/linux/genhd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 70fc838e6773..0c5ee17b4d88 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -129,7 +129,7 @@ struct hd_struct { struct disk_stats dkstats; #endif struct percpu_ref ref; - struct rcu_head rcu_head; + struct rcu_work rcu_work; }; #define GENHD_FL_REMOVABLE 1 -- cgit v1.2.3 From ce5b009cff1961137127edf91f44effd0eec8ffd Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Nov 2018 17:13:56 -0700 Subject: block: improve logic around when to sort a plug list Only do it if we have requests for multiple queues in the same plug. Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 02732cae6080..08d940f85fa0 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1131,6 +1131,7 @@ struct blk_plug { struct list_head mq_list; /* blk-mq requests */ struct list_head cb_list; /* md requires an unplug callback */ unsigned short rq_count; + bool multiple_queues; }; #define BLK_MAX_REQUEST_COUNT 16 #define BLK_PLUG_FLUSH_SIZE (128 * 1024) -- cgit v1.2.3 From d666ba98f849ad44c4405ecc2180390ebe80f4f9 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 27 Nov 2018 17:02:25 -0700 Subject: blk-mq: add mq_ops->commit_rqs() blk-mq passes information to the hardware about any given request being the last that we will issue in this sequence. The point is that hardware can defer costly doorbell type writes to the last request. But if we run into errors issuing a sequence of requests, we may never send the request with bd->last == true set. For that case, we need a hook that tells the hardware that nothing else is coming right now. For failures returned by the drivers ->queue_rq() hook, the driver is responsible for flushing pending requests, if it uses bd->last to optimize that part. This works like before, no changes there. Reviewed-by: Omar Sandoval Reviewed-by: Ming Lei Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 10 ++++++++++ 1 file changed, 10 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index b8de11e0603b..467f1dd21ccf 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -117,6 +117,7 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); +typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *); /* takes rq->cmd_flags as input, returns a hardware type index */ typedef int (rq_flags_to_type_fn)(struct request_queue *, unsigned int); typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); @@ -144,6 +145,15 @@ struct blk_mq_ops { */ queue_rq_fn *queue_rq; + /* + * If a driver uses bd->last to judge when to submit requests to + * hardware, it must define this function. In case of errors that + * make us stop issuing further requests, this hook serves the + * purpose of kicking the hardware (which the last request otherwise + * would have done). + */ + commit_rqs_fn *commit_rqs; + /* * Return a queue map type for the given request/bio flags */ -- cgit v1.2.3 From ea86ea2cdced20057da4d2c32965c1219c238197 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Fri, 30 Nov 2018 13:18:06 -0700 Subject: sbitmap: ammortize cost of clearing bits sbitmap maintains a set of words that we use to set and clear bits, with each bit representing a tag for blk-mq. Even though we spread the bits out and maintain a hint cache, one particular bit allocated will end up being cleared in the exact same spot. This introduces batched clearing of bits. Instead of clearing a given bit, the same bit is set in a cleared/free mask instead. If we fail allocating a bit from a given word, then we check the free mask, and batch move those cleared bits at that time. This trades 64 atomic bitops for 2 cmpxchg(). In a threaded poll test case, half the overhead of getting and clearing tags is removed with this change. On another poll test case with a single thread, performance is unchanged. Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 33 +++++++++++++++++++++++++++------ 1 file changed, 27 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 804a50983ec5..81359d45751e 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -30,14 +30,24 @@ struct seq_file; */ struct sbitmap_word { /** - * @word: The bitmap word itself. + * @depth: Number of bits being used in @word/@cleared */ - unsigned long word; + unsigned long depth; /** - * @depth: Number of bits being used in @word. + * @word: word holding free bits */ - unsigned long depth; + unsigned long word ____cacheline_aligned_in_smp; + + /** + * @cleared: word holding cleared bits + */ + unsigned long cleared ____cacheline_aligned_in_smp; + + /** + * @swap_lock: Held while swapping word <-> cleared + */ + spinlock_t swap_lock; } ____cacheline_aligned_in_smp; /** @@ -310,6 +320,19 @@ static inline void sbitmap_clear_bit(struct sbitmap *sb, unsigned int bitnr) clear_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } +/* + * This one is special, since it doesn't actually clear the bit, rather it + * sets the corresponding bit in the ->cleared mask instead. Paired with + * the caller doing sbitmap_batch_clear() if a given index is full, which + * will clear the previously freed entries in the corresponding ->word. + */ +static inline void sbitmap_deferred_clear_bit(struct sbitmap *sb, unsigned int bitnr) +{ + unsigned long *addr = &sb->map[SB_NR_TO_INDEX(sb, bitnr)].cleared; + + set_bit(SB_NR_TO_BIT(sb, bitnr), addr); +} + static inline void sbitmap_clear_bit_unlock(struct sbitmap *sb, unsigned int bitnr) { @@ -321,8 +344,6 @@ static inline int sbitmap_test_bit(struct sbitmap *sb, unsigned int bitnr) return test_bit(SB_NR_TO_BIT(sb, bitnr), __sbitmap_word(sb, bitnr)); } -unsigned int sbitmap_weight(const struct sbitmap *sb); - /** * sbitmap_show() - Dump &struct sbitmap information to a &struct seq_file. * @sb: Bitmap to show. -- cgit v1.2.3 From 5d2ee7122c73be6a3b6bfe90d237e8aed737cfaa Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 29 Nov 2018 17:36:41 -0700 Subject: sbitmap: optimize wakeup check Even if we have no waiters on any of the sbitmap_queue wait states, we still have to loop every entry to check. We do this for every IO, so the cost adds up. Shift a bit of the cost to the slow path, when we actually have waiters. Wrap prepare_to_wait_exclusive() and finish_wait(), so we can maintain an internal count of how many are currently active. Then we can simply check this count in sbq_wake_ptr() and not have to loop if we don't have any sleepers. Convert the two users of sbitmap with waiting, blk-mq-tag and iSCSI. Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 81359d45751e..92806a2dbab7 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -135,6 +135,11 @@ struct sbitmap_queue { */ struct sbq_wait_state *ws; + /* + * @ws_active: count of currently active ws waitqueues + */ + atomic_t ws_active; + /** * @round_robin: Allocate bits in strict round-robin order. */ @@ -552,4 +557,33 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); */ void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); +struct sbq_wait { + int accounted; + struct wait_queue_entry wait; +}; + +#define DEFINE_SBQ_WAIT(name) \ + struct sbq_wait name = { \ + .accounted = 0, \ + .wait = { \ + .private = current, \ + .func = autoremove_wake_function, \ + .entry = LIST_HEAD_INIT((name).wait.entry), \ + } \ + } + +/* + * Wrapper around prepare_to_wait_exclusive(), which maintains some extra + * internal state. + */ +void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, + struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait, int state); + +/* + * Must be paired with sbitmap_prepare_to_wait(). + */ +void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait); + #endif /* __LINUX_SCALE_BITMAP_H */ -- cgit v1.2.3 From 8c2def893afc60d88160d524acf345765cf0c447 Mon Sep 17 00:00:00 2001 From: Omar Sandoval Date: Mon, 3 Dec 2018 14:45:43 -0800 Subject: sbitmap: fix sbitmap_for_each_set() We need to ignore bits in the cleared mask when iterating over all set bits. Fixes: ea86ea2cdced ("sbitmap: ammortize cost of clearing bits") Reported-by: Jens Axboe@kernel.dk> Signed-off-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 92806a2dbab7..03f50fcedc79 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -265,12 +265,14 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, nr = SB_NR_TO_BIT(sb, start); while (scanned < sb->depth) { - struct sbitmap_word *word = &sb->map[index]; - unsigned int depth = min_t(unsigned int, word->depth - nr, + unsigned long word; + unsigned int depth = min_t(unsigned int, + sb->map[index].depth - nr, sb->depth - scanned); scanned += depth; - if (!word->word) + word = sb->map[index].word & ~sb->map[index].cleared; + if (!word) goto next; /* @@ -280,7 +282,7 @@ static inline void __sbitmap_for_each_set(struct sbitmap *sb, */ depth += nr; while (1) { - nr = find_next_bit(&word->word, depth, nr); + nr = find_next_bit(&word, depth, nr); if (nr >= depth) break; if (!fn(sb, (index << sb->shift) + nr, data)) -- cgit v1.2.3 From e20ba6e1da029136ded295f33076483d65ddf50a Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 2 Dec 2018 17:46:16 +0100 Subject: block: move queues types to the block layer Having another indirect all in the fast path doesn't really help in our post-spectre world. Also having too many queue type is just going to create confusion, so I'd rather manage them centrally. Note that the queue type naming and ordering changes a bit - the first index now is the default queue for everything not explicitly marked, the optional ones are read and poll queues. Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 467f1dd21ccf..57eda7b20243 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -81,8 +81,12 @@ struct blk_mq_queue_map { unsigned int queue_offset; }; -enum { - HCTX_MAX_TYPES = 3, +enum hctx_type { + HCTX_TYPE_DEFAULT, /* all I/O not otherwise accounted for */ + HCTX_TYPE_READ, /* just for READ I/O */ + HCTX_TYPE_POLL, /* polled I/O of any kind */ + + HCTX_MAX_TYPES, }; struct blk_mq_tag_set { @@ -118,8 +122,6 @@ struct blk_mq_queue_data { typedef blk_status_t (queue_rq_fn)(struct blk_mq_hw_ctx *, const struct blk_mq_queue_data *); typedef void (commit_rqs_fn)(struct blk_mq_hw_ctx *); -/* takes rq->cmd_flags as input, returns a hardware type index */ -typedef int (rq_flags_to_type_fn)(struct request_queue *, unsigned int); typedef bool (get_budget_fn)(struct blk_mq_hw_ctx *); typedef void (put_budget_fn)(struct blk_mq_hw_ctx *); typedef enum blk_eh_timer_return (timeout_fn)(struct request *, bool); @@ -154,11 +156,6 @@ struct blk_mq_ops { */ commit_rqs_fn *commit_rqs; - /* - * Return a queue map type for the given request/bio flags - */ - rq_flags_to_type_fn *rq_flags_to_type; - /* * Reserve budget before queue request, once .queue_rq is * run, it is driver's responsibility to release the -- cgit v1.2.3 From 529262d56dbebe6a26df5d2fd24cc0e1bc8579e5 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sun, 2 Dec 2018 17:46:26 +0100 Subject: block: remove ->poll_fn This was intended to support users like nvme multipath, but is just getting in the way and adding another indirect call. Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 08d940f85fa0..0b3874bdbc6a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -283,7 +283,6 @@ static inline unsigned short req_get_ioprio(struct request *req) struct blk_queue_ctx; typedef blk_qc_t (make_request_fn) (struct request_queue *q, struct bio *bio); -typedef int (poll_q_fn) (struct request_queue *q, blk_qc_t, bool spin); struct bio_vec; typedef int (dma_drain_needed_fn)(struct request *); @@ -401,7 +400,6 @@ struct request_queue { struct rq_qos *rq_qos; make_request_fn *make_request_fn; - poll_q_fn *poll_fn; dma_drain_needed_fn *dma_drain_needed; const struct blk_mq_ops *mq_ops; -- cgit v1.2.3 From 6e0de61107f03c3222550d9b548cd331d31d82d1 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Dec 2018 06:50:40 -0700 Subject: blk-mq: remove QUEUE_FLAG_POLL from default MQ flags We only support polling if we have poll queues now, but the flag is being set by default. Remove the default QUEUE_FLAG_POLL setting, we'll set it in blk_mq_init_allocated_queue() if we have poll queues available for this device. Fixes: 6544d229bf43 ("block: enable polling by default if a poll map is initalized") Reported-by: Kirill Tkhai Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 0b3874bdbc6a..81f1b105946b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -606,8 +606,7 @@ struct request_queue { (1 << QUEUE_FLAG_ADD_RANDOM)) #define QUEUE_FLAG_MQ_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_SAME_COMP) | \ - (1 << QUEUE_FLAG_POLL)) + (1 << QUEUE_FLAG_SAME_COMP)) void blk_queue_flag_set(unsigned int flag, struct request_queue *q); void blk_queue_flag_clear(unsigned int flag, struct request_queue *q); -- cgit v1.2.3 From 0fe061b9f03c27d0370888efc22d4b3ac7af90cf Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:26 -0500 Subject: blkcg: fix ref count issue with bio_blkcg() using task_css The bio_blkcg() function turns out to be inconsistent and consequently dangerous to use. The first part returns a blkcg where a reference is owned by the bio meaning it does not need to be rcu protected. However, the third case, the last line, is problematic: return css_to_blkcg(task_css(current, io_cgrp_id)); This can race against task migration and the cgroup dying. It is also semantically different as it must be called rcu protected and is susceptible to failure when trying to get a reference to it. This patch adds association ahead of calling bio_blkcg() rather than after. This makes association a required and explicit step along the code paths for calling bio_blkcg(). In blk-iolatency, association is moved above the bio_blkcg() call to ensure it will not return %NULL. BFQ uses the old bio_blkcg() function, but I do not want to address it in this series due to the complexity. I have created a private version documenting the inconsistency and noting not to use it. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 98 ++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 90 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index a9e2e2037129..f619307171a6 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -227,22 +227,103 @@ int blkg_conf_prep(struct blkcg *blkcg, const struct blkcg_policy *pol, char *input, struct blkg_conf_ctx *ctx); void blkg_conf_finish(struct blkg_conf_ctx *ctx); +/** + * blkcg_css - find the current css + * + * Find the css associated with either the kthread or the current task. + * This may return a dying css, so it is up to the caller to use tryget logic + * to confirm it is alive and well. + */ +static inline struct cgroup_subsys_state *blkcg_css(void) +{ + struct cgroup_subsys_state *css; + + css = kthread_blkcg(); + if (css) + return css; + return task_css(current, io_cgrp_id); +} + +/** + * blkcg_get_css - find and get a reference to the css + * + * Find the css associated with either the kthread or the current task. + * This takes a reference on the blkcg which will need to be managed by the + * caller. + */ +static inline struct cgroup_subsys_state *blkcg_get_css(void) +{ + struct cgroup_subsys_state *css; + + rcu_read_lock(); + + css = kthread_blkcg(); + if (css) { + css_get(css); + } else { + /* + * This is a bit complicated. It is possible task_css() is + * seeing an old css pointer here. This is caused by the + * current thread migrating away from this cgroup and this + * cgroup dying. css_tryget() will fail when trying to take a + * ref on a cgroup that's ref count has hit 0. + * + * Therefore, if it does fail, this means current must have + * been swapped away already and this is waiting for it to + * propagate on the polling cpu. Hence the use of cpu_relax(). + */ + while (true) { + css = task_css(current, io_cgrp_id); + if (likely(css_tryget(css))) + break; + cpu_relax(); + } + } + + rcu_read_unlock(); + + return css; +} static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; } -static inline struct blkcg *bio_blkcg(struct bio *bio) +/** + * __bio_blkcg - internal, inconsistent version to get blkcg + * + * DO NOT USE. + * This function is inconsistent and consequently is dangerous to use. The + * first part of the function returns a blkcg where a reference is owned by the + * bio. This means it does not need to be rcu protected as it cannot go away + * with the bio owning a reference to it. However, the latter potentially gets + * it from task_css(). This can race against task migration and the cgroup + * dying. It is also semantically different as it must be called rcu protected + * and is susceptible to failure when trying to get a reference to it. + * Therefore, it is not ok to assume that *_get() will always succeed on the + * blkcg returned here. + */ +static inline struct blkcg *__bio_blkcg(struct bio *bio) { - struct cgroup_subsys_state *css; + if (bio && bio->bi_css) + return css_to_blkcg(bio->bi_css); + return css_to_blkcg(blkcg_css()); +} +/** + * bio_blkcg - grab the blkcg associated with a bio + * @bio: target bio + * + * This returns the blkcg associated with a bio, %NULL if not associated. + * Callers are expected to either handle %NULL or know association has been + * done prior to calling this. + */ +static inline struct blkcg *bio_blkcg(struct bio *bio) +{ if (bio && bio->bi_css) return css_to_blkcg(bio->bi_css); - css = kthread_blkcg(); - if (css) - return css_to_blkcg(css); - return css_to_blkcg(task_css(current, io_cgrp_id)); + return NULL; } static inline bool blk_cgroup_congested(void) @@ -710,10 +791,10 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, bool throtl = false; rcu_read_lock(); - blkcg = bio_blkcg(bio); /* associate blkcg if bio hasn't attached one */ - bio_associate_blkcg(bio, &blkcg->css); + bio_associate_blkcg(bio, NULL); + blkcg = bio_blkcg(bio); blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { @@ -835,6 +916,7 @@ static inline int blkcg_activate_policy(struct request_queue *q, static inline void blkcg_deactivate_policy(struct request_queue *q, const struct blkcg_policy *pol) { } +static inline struct blkcg *__bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkcg *bio_blkcg(struct bio *bio) { return NULL; } static inline struct blkg_policy_data *blkg_to_pd(struct blkcg_gq *blkg, -- cgit v1.2.3 From b978962ad4f7f9c06e5aa07b2a9b22f6d600456c Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:27 -0500 Subject: blkcg: update blkg_lookup_create() to do locking To know when to create a blkg, the general pattern is to do a blkg_lookup() and if that fails, lock and do the lookup again, and if that fails finally create. It doesn't make much sense for everyone who wants to do creation to write this themselves. This changes blkg_lookup_create() to do locking and implement this pattern. The old blkg_lookup_create() is renamed to __blkg_lookup_create(). If a call site wants to do its own error handling or already owns the queue lock, they can use __blkg_lookup_create(). This will be used in upcoming patches. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Reviewed-by: Liu Bo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index f619307171a6..b3b1a8187d23 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -181,6 +181,8 @@ extern struct cgroup_subsys_state * const blkcg_root_css; struct blkcg_gq *blkg_lookup_slowpath(struct blkcg *blkcg, struct request_queue *q, bool update_hint); +struct blkcg_gq *__blkg_lookup_create(struct blkcg *blkcg, + struct request_queue *q); struct blkcg_gq *blkg_lookup_create(struct blkcg *blkcg, struct request_queue *q); int blkcg_init_queue(struct request_queue *q); @@ -799,7 +801,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg = blkg_lookup(blkcg, q); if (unlikely(!blkg)) { spin_lock_irq(&q->queue_lock); - blkg = blkg_lookup_create(blkcg, q); + blkg = __blkg_lookup_create(blkcg, q); if (IS_ERR(blkg)) blkg = NULL; spin_unlock_irq(&q->queue_lock); -- cgit v1.2.3 From beea9da07d8a6228a7e4a31a83f9478d513bf03f Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:28 -0500 Subject: blkcg: convert blkg_lookup_create() to find closest blkg There are several scenarios where blkg_lookup_create() can fail such as the blkcg dying, request_queue is dying, or simply being OOM. Most handle this by simply falling back to the q->root_blkg and calling it a day. This patch implements the notion of closest blkg. During blkg_lookup_create(), if it fails to create, return the closest blkg found or the q->root_blkg. blkg_try_get_closest() is introduced and used during association so a bio is always attached to a blkg. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index b3b1a8187d23..c08e96e521ed 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -545,6 +545,20 @@ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) return NULL; } +/** + * blkg_try_get_closest - try and get a blkg ref on the closet blkg + * @blkg: blkg to get + * + * This walks up the blkg tree to find the closest non-dying blkg and returns + * the blkg that it did association with as it may not be the passed in blkg. + */ +static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg) +{ + while (!atomic_inc_not_zero(&blkg->refcnt)) + blkg = blkg->parent; + + return blkg; +} void __blkg_release_rcu(struct rcu_head *rcu); @@ -797,15 +811,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, /* associate blkcg if bio hasn't attached one */ bio_associate_blkcg(bio, NULL); blkcg = bio_blkcg(bio); - - blkg = blkg_lookup(blkcg, q); - if (unlikely(!blkg)) { - spin_lock_irq(&q->queue_lock); - blkg = __blkg_lookup_create(blkcg, q); - if (IS_ERR(blkg)) - blkg = NULL; - spin_unlock_irq(&q->queue_lock); - } + blkg = blkg_lookup_create(blkcg, q); throtl = blk_throtl_bio(q, blkg, bio); -- cgit v1.2.3 From 2268c0feb0ffb1c1bb6e1d4d5505d30f485aa77b Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:29 -0500 Subject: blkcg: introduce common blkg association logic There are 3 ways blkg association can happen: association with the current css, with the page css (swap), or from the wbc css (writeback). This patch handles how association is done for the first case where we are associating bsaed on the current css. If there is already a blkg associated, the css will be reused and association will be redone as the request_queue may have changed. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 056fb627edb3..62715a5a4f32 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -511,12 +511,15 @@ static inline int bio_associate_blkcg_from_page(struct bio *bio, #ifdef CONFIG_BLK_CGROUP int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); -int bio_associate_blkg(struct bio *bio, struct blkcg_gq *blkg); +void bio_disassociate_blkg(struct bio *bio); +void bio_associate_blkg(struct bio *bio); void bio_disassociate_task(struct bio *bio); void bio_clone_blkcg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ static inline int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { return 0; } +static inline void bio_disassociate_blkg(struct bio *bio) { } +static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_disassociate_task(struct bio *bio) { } static inline void bio_clone_blkcg_association(struct bio *dst, struct bio *src) { } -- cgit v1.2.3 From 5cdf2e3fea5ee37b66842d76a9b06e6dac0b933d Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:31 -0500 Subject: blkcg: associate blkg when associating a device Previously, blkg association was handled by controller specific code in blk-throttle and blk-iolatency. However, because a blkg represents a relationship between a blkcg and a request_queue, it makes sense to keep the blkg->q and bio->bi_disk->queue consistent. This patch moves association into the bio_set_dev macro(). This should cover the majority of cases where the device is set/changed keeping the two pointers consistent. Fallback code is added to blkcg_bio_issue_check() to catch any missing paths. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 ++ include/linux/blk-cgroup.h | 18 ++++++++++-------- 2 files changed, 12 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 62715a5a4f32..6ee2ea8b378a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -491,12 +491,14 @@ do { \ bio_clear_flag(bio, BIO_THROTTLED);\ (bio)->bi_disk = (bdev)->bd_disk; \ (bio)->bi_partno = (bdev)->bd_partno; \ + bio_associate_blkg(bio); \ } while (0) #define bio_copy_dev(dst, src) \ do { \ (dst)->bi_disk = (src)->bi_disk; \ (dst)->bi_partno = (src)->bi_partno; \ + bio_clone_blkcg_association(dst, src); \ } while (0) #define bio_dev(bio) \ diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index c08e96e521ed..f09752968c2a 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -21,6 +21,7 @@ #include #include #include +#include /* percpu_counter batch for blkg_[rw]stats, per-cpu drift doesn't matter */ #define BLKG_STAT_CPU_BATCH (INT_MAX / 2) @@ -802,21 +803,23 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { - struct blkcg *blkcg; struct blkcg_gq *blkg; bool throtl = false; - rcu_read_lock(); + if (!bio->bi_blkg) { + char b[BDEVNAME_SIZE]; + + WARN_ONCE(1, + "no blkg associated for bio on block-device: %s\n", + bio_devname(bio, b)); + bio_associate_blkg(bio); + } - /* associate blkcg if bio hasn't attached one */ - bio_associate_blkcg(bio, NULL); - blkcg = bio_blkcg(bio); - blkg = blkg_lookup_create(blkcg, q); + blkg = bio->bi_blkg; throtl = blk_throtl_bio(q, blkg, bio); if (!throtl) { - blkg = blkg ?: q->root_blkg; /* * If the bio is flagged with BIO_QUEUE_ENTERED it means this * is a split bio and we would have already accounted for the @@ -828,7 +831,6 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } - rcu_read_unlock(); return !throtl; } -- cgit v1.2.3 From e439bedf6b24264f620cc05627e23a90054bde41 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:32 -0500 Subject: blkcg: consolidate bio_issue_init() to be a part of core bio_issue_init among other things initializes the timestamp for an IO. Rather than have this logic handled by policies, this consolidates it to be on the init paths (normal, clone, bounce clone). Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Liu Bo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index f09752968c2a..8b069c3775ee 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -800,6 +800,12 @@ static inline bool blk_throtl_bio(struct request_queue *q, struct blkcg_gq *blkg struct bio *bio) { return false; } #endif + +static inline void blkcg_bio_issue_init(struct bio *bio) +{ + bio_issue_init(&bio->bi_issue, bio_sectors(bio)); +} + static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { @@ -831,6 +837,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkg_rwstat_add(&blkg->stat_ios, bio->bi_opf, 1); } + blkcg_bio_issue_init(bio); + return !throtl; } @@ -936,6 +944,7 @@ static inline char *blkg_path(struct blkcg_gq *blkg) { return NULL; } static inline void blkg_get(struct blkcg_gq *blkg) { } static inline void blkg_put(struct blkcg_gq *blkg) { } +static inline void blkcg_bio_issue_init(struct bio *bio) { } static inline bool blkcg_bio_issue_check(struct request_queue *q, struct bio *bio) { return true; } -- cgit v1.2.3 From 6a7f6d86a561473032287c8e4583eac5853c6efa Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:33 -0500 Subject: blkcg: associate a blkg for pages being evicted by swap A prior patch in this series added blkg association to bios issued by cgroups. There are two other paths that we want to attribute work back to the appropriate cgroup: swap and writeback. Here we modify the way swap tags bios to include the blkg. Writeback will be tackle in the next patch. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/bio.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 6ee2ea8b378a..f13572c254a7 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -505,10 +505,10 @@ do { \ disk_devt((bio)->bi_disk) #if defined(CONFIG_MEMCG) && defined(CONFIG_BLK_CGROUP) -int bio_associate_blkcg_from_page(struct bio *bio, struct page *page); +void bio_associate_blkg_from_page(struct bio *bio, struct page *page); #else -static inline int bio_associate_blkcg_from_page(struct bio *bio, - struct page *page) { return 0; } +static inline void bio_associate_blkg_from_page(struct bio *bio, + struct page *page) { } #endif #ifdef CONFIG_BLK_CGROUP -- cgit v1.2.3 From fd42df305f804ddc0d5ac028e944784283b2f92d Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:34 -0500 Subject: blkcg: associate writeback bios with a blkg One of the goals of this series is to remove a separate reference to the css of the bio. This can and should be accessed via bio_blkcg(). In this patch, wbc_init_bio() now requires a bio to have a device associated with it. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/bio.h | 5 +++++ include/linux/writeback.h | 5 +++-- 2 files changed, 8 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index f13572c254a7..f0438061a5a3 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -515,6 +515,8 @@ static inline void bio_associate_blkg_from_page(struct bio *bio, int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); void bio_disassociate_blkg(struct bio *bio); void bio_associate_blkg(struct bio *bio); +void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css); void bio_disassociate_task(struct bio *bio); void bio_clone_blkcg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ @@ -522,6 +524,9 @@ static inline int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css) { return 0; } static inline void bio_disassociate_blkg(struct bio *bio) { } static inline void bio_associate_blkg(struct bio *bio) { } +static inline void bio_associate_blkg_from_css(struct bio *bio, + struct cgroup_subsys_state *css) +{ } static inline void bio_disassociate_task(struct bio *bio) { } static inline void bio_clone_blkcg_association(struct bio *dst, struct bio *src) { } diff --git a/include/linux/writeback.h b/include/linux/writeback.h index fdfd04e348f6..738a0c24874f 100644 --- a/include/linux/writeback.h +++ b/include/linux/writeback.h @@ -246,7 +246,8 @@ static inline void wbc_attach_fdatawrite_inode(struct writeback_control *wbc, * * @bio is a part of the writeback in progress controlled by @wbc. Perform * writeback specific initialization. This is used to apply the cgroup - * writeback context. + * writeback context. Must be called after the bio has been associated with + * a device. */ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) { @@ -257,7 +258,7 @@ static inline void wbc_init_bio(struct writeback_control *wbc, struct bio *bio) * regular writeback instead of writing things out itself. */ if (wbc->wb) - bio_associate_blkcg(bio, wbc->wb->blkcg_css); + bio_associate_blkg_from_css(bio, wbc->wb->blkcg_css); } #else /* CONFIG_CGROUP_WRITEBACK */ -- cgit v1.2.3 From db6638d7d177a8bc74c9e539e2e0d7d061c767b1 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:35 -0500 Subject: blkcg: remove bio->bi_css and instead use bio->bi_blkg Prior patches ensured that any bio that interacts with a request_queue is properly associated with a blkg. This makes bio->bi_css unnecessary as blkg maintains a reference to blkcg already. This removes the bio field bi_css and transfers corresponding uses to access via bi_blkg. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/bio.h | 11 ++++------- include/linux/blk-cgroup.h | 8 ++++---- include/linux/blk_types.h | 7 ++++--- 3 files changed, 12 insertions(+), 14 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index f0438061a5a3..84e1c4dc703a 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -498,7 +498,7 @@ do { \ do { \ (dst)->bi_disk = (src)->bi_disk; \ (dst)->bi_partno = (src)->bi_partno; \ - bio_clone_blkcg_association(dst, src); \ + bio_clone_blkg_association(dst, src); \ } while (0) #define bio_dev(bio) \ @@ -512,24 +512,21 @@ static inline void bio_associate_blkg_from_page(struct bio *bio, #endif #ifdef CONFIG_BLK_CGROUP -int bio_associate_blkcg(struct bio *bio, struct cgroup_subsys_state *blkcg_css); void bio_disassociate_blkg(struct bio *bio); void bio_associate_blkg(struct bio *bio); void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); void bio_disassociate_task(struct bio *bio); -void bio_clone_blkcg_association(struct bio *dst, struct bio *src); +void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ -static inline int bio_associate_blkcg(struct bio *bio, - struct cgroup_subsys_state *blkcg_css) { return 0; } static inline void bio_disassociate_blkg(struct bio *bio) { } static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { } static inline void bio_disassociate_task(struct bio *bio) { } -static inline void bio_clone_blkcg_association(struct bio *dst, - struct bio *src) { } +static inline void bio_clone_blkg_association(struct bio *dst, + struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ #ifdef CONFIG_HIGHMEM diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 8b069c3775ee..f11c37f8ce09 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -309,8 +309,8 @@ static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) */ static inline struct blkcg *__bio_blkcg(struct bio *bio) { - if (bio && bio->bi_css) - return css_to_blkcg(bio->bi_css); + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; return css_to_blkcg(blkcg_css()); } @@ -324,8 +324,8 @@ static inline struct blkcg *__bio_blkcg(struct bio *bio) */ static inline struct blkcg *bio_blkcg(struct bio *bio) { - if (bio && bio->bi_css) - return css_to_blkcg(bio->bi_css); + if (bio && bio->bi_blkg) + return bio->bi_blkg->blkcg; return NULL; } diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index c0ba1a038ff3..46c005d601ac 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -174,10 +174,11 @@ struct bio { void *bi_private; #ifdef CONFIG_BLK_CGROUP /* - * Optional css associated with this bio. Put on bio - * release. Read comment on top of bio_associate_current(). + * Represents the association of the css and request_queue for the bio. + * If a bio goes direct to device, it will not have a blkg as it will + * not have a request_queue associated with it. The reference is put + * on release of the bio. */ - struct cgroup_subsys_state *bi_css; struct blkcg_gq *bi_blkg; struct bio_issue bi_issue; #endif -- cgit v1.2.3 From fc5a828bfad628c1092194f2814604943561c52d Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:36 -0500 Subject: blkcg: remove additional reference to the css The previous patch in this series removed carrying around a pointer to the css in blkg. However, the blkg association logic still relied on taking a reference on the css to ensure we wouldn't fail in getting a reference for the blkg. Here the implicit dependency on the css is removed. The association continues to rely on the tryget logic walking up the blkg tree. This streamlines the three ways that association can happen: normal, swap, and writeback. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 41 ----------------------------------------- include/linux/cgroup.h | 2 ++ 2 files changed, 2 insertions(+), 41 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index f11c37f8ce09..284819a4d122 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -247,47 +247,6 @@ static inline struct cgroup_subsys_state *blkcg_css(void) return task_css(current, io_cgrp_id); } -/** - * blkcg_get_css - find and get a reference to the css - * - * Find the css associated with either the kthread or the current task. - * This takes a reference on the blkcg which will need to be managed by the - * caller. - */ -static inline struct cgroup_subsys_state *blkcg_get_css(void) -{ - struct cgroup_subsys_state *css; - - rcu_read_lock(); - - css = kthread_blkcg(); - if (css) { - css_get(css); - } else { - /* - * This is a bit complicated. It is possible task_css() is - * seeing an old css pointer here. This is caused by the - * current thread migrating away from this cgroup and this - * cgroup dying. css_tryget() will fail when trying to take a - * ref on a cgroup that's ref count has hit 0. - * - * Therefore, if it does fail, this means current must have - * been swapped away already and this is waiting for it to - * propagate on the polling cpu. Hence the use of cpu_relax(). - */ - while (true) { - css = task_css(current, io_cgrp_id); - if (likely(css_tryget(css))) - break; - cpu_relax(); - } - } - - rcu_read_unlock(); - - return css; -} - static inline struct blkcg *css_to_blkcg(struct cgroup_subsys_state *css) { return css ? container_of(css, struct blkcg, css) : NULL; diff --git a/include/linux/cgroup.h b/include/linux/cgroup.h index 9d12757a65b0..9968332cceed 100644 --- a/include/linux/cgroup.h +++ b/include/linux/cgroup.h @@ -93,6 +93,8 @@ extern struct css_set init_css_set; bool css_has_online_children(struct cgroup_subsys_state *css); struct cgroup_subsys_state *css_from_id(int id, struct cgroup_subsys *ss); +struct cgroup_subsys_state *cgroup_e_css(struct cgroup *cgroup, + struct cgroup_subsys *ss); struct cgroup_subsys_state *cgroup_get_e_css(struct cgroup *cgroup, struct cgroup_subsys *ss); struct cgroup_subsys_state *css_tryget_online_from_dir(struct dentry *dentry, -- cgit v1.2.3 From 6f70fb66182b02e50deea65e9a3a86b7bf659a39 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:37 -0500 Subject: blkcg: remove bio_disassociate_task() Now that a bio only holds a blkg reference, so clean up is simply putting back that reference. Remove bio_disassociate_task() as it just calls bio_disassociate_blkg() and call the latter directly. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- include/linux/bio.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 84e1c4dc703a..7380b094dcca 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -516,7 +516,6 @@ void bio_disassociate_blkg(struct bio *bio); void bio_associate_blkg(struct bio *bio); void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css); -void bio_disassociate_task(struct bio *bio); void bio_clone_blkg_association(struct bio *dst, struct bio *src); #else /* CONFIG_BLK_CGROUP */ static inline void bio_disassociate_blkg(struct bio *bio) { } @@ -524,7 +523,6 @@ static inline void bio_associate_blkg(struct bio *bio) { } static inline void bio_associate_blkg_from_css(struct bio *bio, struct cgroup_subsys_state *css) { } -static inline void bio_disassociate_task(struct bio *bio) { } static inline void bio_clone_blkg_association(struct bio *dst, struct bio *src) { } #endif /* CONFIG_BLK_CGROUP */ -- cgit v1.2.3 From 7fcf2b033b84e261dca283bc2911aaea4b07b525 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:38 -0500 Subject: blkcg: change blkg reference counting to use percpu_ref Every bio is now associated with a blkg putting blkg_get, blkg_try_get, and blkg_put on the hot path. Switch over the refcnt in blkg to use percpu_ref. Signed-off-by: Dennis Zhou Acked-by: Tejun Heo Reviewed-by: Josef Bacik Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 15 +++++---------- 1 file changed, 5 insertions(+), 10 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 284819a4d122..d19ef15a673d 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -124,7 +124,7 @@ struct blkcg_gq { struct blkcg_gq *parent; /* reference count */ - atomic_t refcnt; + struct percpu_ref refcnt; /* is this blkg online? protected by both blkcg and q locks */ bool online; @@ -487,8 +487,7 @@ static inline int blkg_path(struct blkcg_gq *blkg, char *buf, int buflen) */ static inline void blkg_get(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - atomic_inc(&blkg->refcnt); + percpu_ref_get(&blkg->refcnt); } /** @@ -500,7 +499,7 @@ static inline void blkg_get(struct blkcg_gq *blkg) */ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) { - if (atomic_inc_not_zero(&blkg->refcnt)) + if (percpu_ref_tryget(&blkg->refcnt)) return blkg; return NULL; } @@ -514,23 +513,19 @@ static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) */ static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg) { - while (!atomic_inc_not_zero(&blkg->refcnt)) + while (!percpu_ref_tryget(&blkg->refcnt)) blkg = blkg->parent; return blkg; } -void __blkg_release_rcu(struct rcu_head *rcu); - /** * blkg_put - put a blkg reference * @blkg: blkg to put */ static inline void blkg_put(struct blkcg_gq *blkg) { - WARN_ON_ONCE(atomic_read(&blkg->refcnt) <= 0); - if (atomic_dec_and_test(&blkg->refcnt)) - call_rcu(&blkg->rcu_head, __blkg_release_rcu); + percpu_ref_put(&blkg->refcnt); } /** -- cgit v1.2.3 From 7754f669ffde3919e398a9e591cd7510d6cf4e73 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Wed, 5 Dec 2018 12:10:39 -0500 Subject: blkcg: rename blkg_try_get() to blkg_tryget() blkg reference counting now uses percpu_ref rather than atomic_t. Let's make this consistent with css_tryget. This renames blkg_try_get to blkg_tryget and now returns a bool rather than the blkg or %NULL. Signed-off-by: Dennis Zhou Reviewed-by: Josef Bacik Acked-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index d19ef15a673d..752de1becb5c 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -491,27 +491,25 @@ static inline void blkg_get(struct blkcg_gq *blkg) } /** - * blkg_try_get - try and get a blkg reference + * blkg_tryget - try and get a blkg reference * @blkg: blkg to get * * This is for use when doing an RCU lookup of the blkg. We may be in the midst * of freeing this blkg, so we can only use it if the refcnt is not zero. */ -static inline struct blkcg_gq *blkg_try_get(struct blkcg_gq *blkg) +static inline bool blkg_tryget(struct blkcg_gq *blkg) { - if (percpu_ref_tryget(&blkg->refcnt)) - return blkg; - return NULL; + return percpu_ref_tryget(&blkg->refcnt); } /** - * blkg_try_get_closest - try and get a blkg ref on the closet blkg + * blkg_tryget_closest - try and get a blkg ref on the closet blkg * @blkg: blkg to get * * This walks up the blkg tree to find the closest non-dying blkg and returns * the blkg that it did association with as it may not be the passed in blkg. */ -static inline struct blkcg_gq *blkg_try_get_closest(struct blkcg_gq *blkg) +static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) { while (!percpu_ref_tryget(&blkg->refcnt)) blkg = blkg->parent; -- cgit v1.2.3 From 4705de735b3383792c84a92e57508d6865caa85f Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Thu, 6 Dec 2018 12:49:38 -0500 Subject: blkcg: put back rcu lock in blkcg_bio_issue_check() I was a little overzealous in removing the rcu_read_lock() call from blkcg_bio_issue_check() and it broke blk-throttle. Put it back. Fixes: e35403a034bf ("blkcg: associate blkg when associating a device") Signed-off-by: Dennis Zhou Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index 752de1becb5c..bf13ecb0fe4f 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -764,6 +764,8 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, struct blkcg_gq *blkg; bool throtl = false; + rcu_read_lock(); + if (!bio->bi_blkg) { char b[BDEVNAME_SIZE]; @@ -791,6 +793,7 @@ static inline bool blkcg_bio_issue_check(struct request_queue *q, blkcg_bio_issue_init(bio); + rcu_read_unlock(); return !throtl; } -- cgit v1.2.3 From 12b2117161ddbdcdb69777404c5aa2a9fe6ad7d5 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Nov 2018 10:28:12 -0700 Subject: nvme: introduce ctrl attributes enumeration We are growing more controller attributes, so use a proper enumeration for it. For now just add the 128-bit hostid which we support. Reviewed-by: Chaitanya Kulkarni Reviewed-by: Hannes Reinecke Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 818dbe9331be..753c83a5c01f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -198,6 +198,10 @@ enum { NVME_PS_FLAGS_NON_OP_STATE = 1 << 1, }; +enum nvme_ctrl_attr { + NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), +}; + struct nvme_id_ctrl { __le16 vid; __le16 ssvid; -- cgit v1.2.3 From 6e3ca03ee934572d5de4fb2224c01e12c4d422c8 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 2 Nov 2018 10:28:15 -0700 Subject: nvme: support traffic based keep-alive If the controller supports traffic based keep alive, we restart the keep alive timer if any admin or io commands was completed during the kato period. This prevents a possible starvation of keep alive commands in the presence of heavy traffic as in such case, we already have a health indication from the host perspective. Only set a comp_seen indicator in case the controller supports keep alive to minimize the overhead for pci controllers. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 753c83a5c01f..429c4cf90899 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -200,6 +200,7 @@ enum { enum nvme_ctrl_attr { NVME_CTRL_ATTR_HID_128_BIT = (1 << 0), + NVME_CTRL_ATTR_TBKAS = (1 << 6), }; struct nvme_id_ctrl { -- cgit v1.2.3 From 7114ddeb40c0ccc584d86df598da4054ca4cd79f Mon Sep 17 00:00:00 2001 From: Jay Sternberg Date: Mon, 12 Nov 2018 13:56:34 -0800 Subject: nvmet: change aen mask functions to use bit numbers Functions nvmet_aen_disabled and nvmet_clear_aen were using values not bit numbers ie 1 << 9 not 9 for bit function clear_bit and test_and_set_bit. Signed-off-by: Jay Sternberg Reviewed-by: Phil Cayton Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 429c4cf90899..d6cfa194be80 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -489,9 +489,15 @@ enum { }; enum { - NVME_AEN_CFG_NS_ATTR = 1 << 8, - NVME_AEN_CFG_FW_ACT = 1 << 9, - NVME_AEN_CFG_ANA_CHANGE = 1 << 11, + NVME_AEN_BIT_NS_ATTR = 8, + NVME_AEN_BIT_FW_ACT = 9, + NVME_AEN_BIT_ANA_CHANGE = 11, +}; + +enum { + NVME_AEN_CFG_NS_ATTR = 1 << NVME_AEN_BIT_NS_ATTR, + NVME_AEN_CFG_FW_ACT = 1 << NVME_AEN_BIT_FW_ACT, + NVME_AEN_CFG_ANA_CHANGE = 1 << NVME_AEN_BIT_ANA_CHANGE, }; struct nvme_lba_range_type { -- cgit v1.2.3 From f301c2b1368905340133ff8ef4485befdd0b7e2d Mon Sep 17 00:00:00 2001 From: Jay Sternberg Date: Mon, 12 Nov 2018 13:56:37 -0800 Subject: nvmet: add defines for discovery change async events Add AEN/AER values as defined by the specification Signed-off-by: Jay Sternberg Reviewed-by: Sagi Grimberg Reviewed-by: Christoph Hellwig Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index d6cfa194be80..77d320d32ee5 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -486,18 +486,21 @@ enum { NVME_AER_NOTICE_NS_CHANGED = 0x00, NVME_AER_NOTICE_FW_ACT_STARTING = 0x01, NVME_AER_NOTICE_ANA = 0x03, + NVME_AER_NOTICE_DISC_CHANGED = 0xf0, }; enum { NVME_AEN_BIT_NS_ATTR = 8, NVME_AEN_BIT_FW_ACT = 9, NVME_AEN_BIT_ANA_CHANGE = 11, + NVME_AEN_BIT_DISC_CHANGE = 31, }; enum { NVME_AEN_CFG_NS_ATTR = 1 << NVME_AEN_BIT_NS_ATTR, NVME_AEN_CFG_FW_ACT = 1 << NVME_AEN_BIT_FW_ACT, NVME_AEN_CFG_ANA_CHANGE = 1 << NVME_AEN_BIT_ANA_CHANGE, + NVME_AEN_CFG_DISC_CHANGE = 1 << NVME_AEN_BIT_DISC_CHANGE, }; struct nvme_lba_range_type { -- cgit v1.2.3 From 6e2e312ea7ff73acfafaa5c9851e151e9483c761 Mon Sep 17 00:00:00 2001 From: James Smart Date: Wed, 14 Nov 2018 15:57:46 -0800 Subject: nvmet-fc: remove the IN_ISR deferred scheduling options All target lldd's call the cmd receive and op completions in non-isr thread contexts. As such the IN_ISR options are not necessary. Remove the functionality and flags, which also removes cpu assignments to queues. Signed-off-by: James Smart Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme-fc-driver.h | 16 ---------------- 1 file changed, 16 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme-fc-driver.h b/include/linux/nvme-fc-driver.h index f4ab3b1925ac..91745cc3704c 100644 --- a/include/linux/nvme-fc-driver.h +++ b/include/linux/nvme-fc-driver.h @@ -648,22 +648,6 @@ enum { * sequence in one LLDD operation. Errors during Data * sequence transmit must not allow RSP sequence to be sent. */ - NVMET_FCTGTFEAT_CMD_IN_ISR = (1 << 1), - /* Bit 2: When 0, the LLDD is calling the cmd rcv handler - * in a non-isr context, allowing the transport to finish - * op completion in the calling context. When 1, the LLDD - * is calling the cmd rcv handler in an ISR context, - * requiring the transport to transition to a workqueue - * for op completion. - */ - NVMET_FCTGTFEAT_OPDONE_IN_ISR = (1 << 2), - /* Bit 3: When 0, the LLDD is calling the op done handler - * in a non-isr context, allowing the transport to finish - * op completion in the calling context. When 1, the LLDD - * is calling the op done handler in an ISR context, - * requiring the transport to transition to a workqueue - * for op completion. - */ }; -- cgit v1.2.3 From e6a622fd6d66b83779357e3400f487fc159a7d83 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 19 Nov 2018 14:11:12 -0800 Subject: nvmet: support fabrics sq flow control Technical proposal 8005 "fabrics SQ flow control" introduces a mode where a host and controller agree to omit sq_head pointer updates when sending nvme completions. In case the host indicated desire to operate in this mode (connect attribute) the controller will return back a connect completion with sq_head value of 0xffff as indication that it will omit sq_head pointer updates. This mode saves us an atomic update in the I/O path. Reviewed-by: Hannes Reinecke [hch: suggested better implementation] Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 77d320d32ee5..e7d731776f62 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1044,6 +1044,10 @@ struct nvmf_disc_rsp_page_hdr { struct nvmf_disc_rsp_page_entry entries[0]; }; +enum { + NVME_CONNECT_DISABLE_SQFLOW = (1 << 2), +}; + struct nvmf_connect_command { __u8 opcode; __u8 resv1; -- cgit v1.2.3 From 0445e1b5a2fed4612b7f72d9a56889c026b60aa9 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 19 Nov 2018 14:11:13 -0800 Subject: nvmet: don't override treq upon modification. Only override the allowed parts of it. Reviewed-by: Hannes Reinecke Signed-off-by: Sagi Grimberg [hch: slight tweak to the NVME_TREQ_SECURE_CHANNEL_MASK definition] Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index e7d731776f62..4fc48071e5ea 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -61,6 +61,8 @@ enum { NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ NVMF_TREQ_REQUIRED = 1, /* Required */ NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ +#define NVME_TREQ_SECURE_CHANNEL_MASK \ + (NVMF_TREQ_REQUIRED | NVMF_TREQ_NOT_REQUIRED) }; /* RDMA QP Service Type codes for Discovery Log Page entry TSAS -- cgit v1.2.3 From 9b95d2fb857f242aacbf4e205656818b0ef067e1 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 20 Nov 2018 10:34:19 +0100 Subject: nvmet: expose support for fabrics SQ flow control disable in treq Technical Proposal introduces an indication for SQ flow control disable support. Expose it since we are able to operate in this mode. Reviewed-by: Hannes Reinecke Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4fc48071e5ea..c03973c215ad 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -58,11 +58,13 @@ enum { /* Transport Requirements codes for Discovery Log Page entry TREQ field */ enum { - NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ - NVMF_TREQ_REQUIRED = 1, /* Required */ - NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ + NVMF_TREQ_NOT_SPECIFIED = 0, /* Not specified */ + NVMF_TREQ_REQUIRED = 1, /* Required */ + NVMF_TREQ_NOT_REQUIRED = 2, /* Not Required */ #define NVME_TREQ_SECURE_CHANNEL_MASK \ (NVMF_TREQ_REQUIRED | NVMF_TREQ_NOT_REQUIRED) + + NVMF_TREQ_DISABLE_SQFLOW = (1 << 2), /* Supports SQ flow control disable */ }; /* RDMA QP Service Type codes for Discovery Log Page entry TSAS -- cgit v1.2.3 From 49cd84b6f8b677ef45731ed56ddb802cdbb94c9e Mon Sep 17 00:00:00 2001 From: Keith Busch Date: Tue, 27 Nov 2018 09:40:57 -0700 Subject: nvme: implement Enhanced Command Retry A controller may have an internal state that is not able to successfully process commands for a short duration. In such states, an immediate command requeue is expected to fail. The driver may exceed its max retry count, which permanently ends the command in failure when the same command would succeed after waiting for the controller to be ready. NVMe ratified TP 4033 provides a delay hint in the completion status code for failed commands. Implement the retry delay based on the command completion status and the controller's requested delay. Note that requeued commands are handled per request_queue, not per individual request. If multiple commands fail, the controller should consistently report the desired delay time for retryable commands in all CQEs, otherwise the requeue list may be kicked too soon. Signed-off-by: Keith Busch Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/nvme.h | 17 ++++++++++++++++- 1 file changed, 16 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index c03973c215ad..88812cb15be0 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -223,7 +223,11 @@ struct nvme_id_ctrl { __le32 rtd3e; __le32 oaes; __le32 ctratt; - __u8 rsvd100[156]; + __u8 rsvd100[28]; + __le16 crdt1; + __le16 crdt2; + __le16 crdt3; + __u8 rsvd134[122]; __le16 oacs; __u8 acl; __u8 aerl; @@ -756,6 +760,15 @@ enum { NVME_HOST_MEM_RETURN = (1 << 1), }; +struct nvme_feat_host_behavior { + __u8 acre; + __u8 resv1[511]; +}; + +enum { + NVME_ENABLE_ACRE = 1, +}; + /* Admin commands */ enum nvme_admin_opcode { @@ -810,6 +823,7 @@ enum { NVME_FEAT_RRL = 0x12, NVME_FEAT_PLM_CONFIG = 0x13, NVME_FEAT_PLM_WINDOW = 0x14, + NVME_FEAT_HOST_BEHAVIOR = 0x16, NVME_FEAT_SW_PROGRESS = 0x80, NVME_FEAT_HOST_ID = 0x81, NVME_FEAT_RESV_MASK = 0x82, @@ -1265,6 +1279,7 @@ enum { NVME_SC_ANA_TRANSITION = 0x303, NVME_SC_HOST_PATH_ERROR = 0x370, + NVME_SC_CRD = 0x1800, NVME_SC_DNR = 0x4000, }; -- cgit v1.2.3 From 112f158f66cbe25fd561a5dfe9c3826e06abf757 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Thu, 6 Dec 2018 11:41:18 -0500 Subject: block: stop passing 'cpu' to all percpu stats methods All of part_stat_* and related methods are used with preempt disabled, so there is no need to pass cpu around to allow of them. Just call smp_processor_id() as needed. Suggested-by: Jens Axboe Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/genhd.h | 26 +++++++++++++------------- 1 file changed, 13 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 0c5ee17b4d88..1677cd2a4c4e 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -295,8 +295,8 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, #define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) #define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) -#define __part_stat_add(cpu, part, field, addnd) \ - (per_cpu_ptr((part)->dkstats, (cpu))->field += (addnd)) +#define __part_stat_add(part, field, addnd) \ + (per_cpu_ptr((part)->dkstats, smp_processor_id())->field += (addnd)) #define part_stat_read(part, field) \ ({ \ @@ -333,7 +333,7 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_lock() ({ rcu_read_lock(); 0; }) #define part_stat_unlock() rcu_read_unlock() -#define __part_stat_add(cpu, part, field, addnd) \ +#define __part_stat_add(part, field, addnd) \ ((part)->dkstats.field += addnd) #define part_stat_read(part, field) ((part)->dkstats.field) @@ -362,19 +362,19 @@ static inline void free_part_stats(struct hd_struct *part) part_stat_read(part, field[STAT_WRITE]) + \ part_stat_read(part, field[STAT_DISCARD])) -#define part_stat_add(cpu, part, field, addnd) do { \ - __part_stat_add((cpu), (part), field, addnd); \ +#define part_stat_add(part, field, addnd) do { \ + __part_stat_add((part), field, addnd); \ if ((part)->partno) \ - __part_stat_add((cpu), &part_to_disk((part))->part0, \ + __part_stat_add(&part_to_disk((part))->part0, \ field, addnd); \ } while (0) -#define part_stat_dec(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, -1) -#define part_stat_inc(cpu, gendiskp, field) \ - part_stat_add(cpu, gendiskp, field, 1) -#define part_stat_sub(cpu, gendiskp, field, subnd) \ - part_stat_add(cpu, gendiskp, field, -subnd) +#define part_stat_dec(gendiskp, field) \ + part_stat_add(gendiskp, field, -1) +#define part_stat_inc(gendiskp, field) \ + part_stat_add(gendiskp, field, 1) +#define part_stat_sub(gendiskp, field, subnd) \ + part_stat_add(gendiskp, field, -subnd) void part_in_flight(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]); @@ -399,7 +399,7 @@ static inline void free_part_info(struct hd_struct *part) } /* block/blk-core.c */ -extern void part_round_stats(struct request_queue *q, int cpu, struct hd_struct *part); +extern void part_round_stats(struct request_queue *q, struct hd_struct *part); /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, -- cgit v1.2.3 From 5b18b5a737600fd20ba2045f320d5926ebbf341a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 6 Dec 2018 11:41:19 -0500 Subject: block: delete part_round_stats and switch to less precise counting We want to convert to per-cpu in_flight counters. The function part_round_stats needs the in_flight counter every jiffy, it would be too costly to sum all the percpu variables every jiffy, so it must be deleted. part_round_stats is used to calculate two counters - time_in_queue and io_ticks. time_in_queue can be calculated without part_round_stats, by adding the duration of the I/O when the I/O ends (the value is almost as exact as the previously calculated value, except that time for in-progress I/Os is not counted). io_ticks can be approximated by increasing the value when I/O is started or ended and the jiffies value has changed. If the I/Os take less than a jiffy, the value is as exact as the previously calculated value. If the I/Os take more than a jiffy, io_ticks can drift behind the previously calculated value. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 1677cd2a4c4e..838c2a7a40c5 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -398,8 +398,7 @@ static inline void free_part_info(struct hd_struct *part) kfree(part->info); } -/* block/blk-core.c */ -extern void part_round_stats(struct request_queue *q, struct hd_struct *part); +void update_io_ticks(struct hd_struct *part, unsigned long now); /* block/genhd.c */ extern void device_add_disk(struct device *parent, struct gendisk *disk, -- cgit v1.2.3 From 1226b8dd0e91331cfab500f305b2c264445a0392 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 6 Dec 2018 11:41:20 -0500 Subject: block: switch to per-cpu in-flight counters Now when part_round_stats is gone, we can switch to per-cpu in-flight counters. We use the local-atomic type local_t, so that if part_inc_in_flight or part_dec_in_flight is reentrantly called from an interrupt, the value will be correct. The other counters could be corrupted due to reentrant interrupt, but the corruption only results in slight counter skew - the in_flight counter must be exact, so it needs local_t. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/genhd.h | 29 ++++++++++++++++++++++------- 1 file changed, 22 insertions(+), 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 838c2a7a40c5..636b4f687e35 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_BLOCK @@ -89,6 +90,7 @@ struct disk_stats { unsigned long merges[NR_STAT_GROUPS]; unsigned long io_ticks; unsigned long time_in_queue; + local_t in_flight[2]; }; #define PARTITION_META_INFO_VOLNAMELTH 64 @@ -122,7 +124,6 @@ struct hd_struct { int make_it_fail; #endif unsigned long stamp; - atomic_t in_flight[2]; #ifdef CONFIG_SMP struct disk_stats __percpu *dkstats; #else @@ -295,8 +296,11 @@ extern struct hd_struct *disk_map_sector_rcu(struct gendisk *disk, #define part_stat_lock() ({ rcu_read_lock(); get_cpu(); }) #define part_stat_unlock() do { put_cpu(); rcu_read_unlock(); } while (0) -#define __part_stat_add(part, field, addnd) \ - (per_cpu_ptr((part)->dkstats, smp_processor_id())->field += (addnd)) +#define part_stat_get_cpu(part, field, cpu) \ + (per_cpu_ptr((part)->dkstats, (cpu))->field) + +#define part_stat_get(part, field) \ + part_stat_get_cpu(part, field, smp_processor_id()) #define part_stat_read(part, field) \ ({ \ @@ -333,10 +337,9 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_lock() ({ rcu_read_lock(); 0; }) #define part_stat_unlock() rcu_read_unlock() -#define __part_stat_add(part, field, addnd) \ - ((part)->dkstats.field += addnd) - -#define part_stat_read(part, field) ((part)->dkstats.field) +#define part_stat_get(part, field) ((part)->dkstats.field) +#define part_stat_get_cpu(part, field, cpu) part_stat_get(part, field) +#define part_stat_read(part, field) part_stat_get(part, field) static inline void part_stat_set_all(struct hd_struct *part, int value) { @@ -362,6 +365,9 @@ static inline void free_part_stats(struct hd_struct *part) part_stat_read(part, field[STAT_WRITE]) + \ part_stat_read(part, field[STAT_DISCARD])) +#define __part_stat_add(part, field, addnd) \ + (part_stat_get(part, field) += (addnd)) + #define part_stat_add(part, field, addnd) do { \ __part_stat_add((part), field, addnd); \ if ((part)->partno) \ @@ -376,6 +382,15 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_sub(gendiskp, field, subnd) \ part_stat_add(gendiskp, field, -subnd) +#define part_stat_local_dec(gendiskp, field) \ + local_dec(&(part_stat_get(gendiskp, field))) +#define part_stat_local_inc(gendiskp, field) \ + local_inc(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read(gendiskp, field) \ + local_read(&(part_stat_get(gendiskp, field))) +#define part_stat_local_read_cpu(gendiskp, field, cpu) \ + local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) + void part_in_flight(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]); void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, -- cgit v1.2.3 From e016b78201a2d9ff40f3f0da072292689af24c7f Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 6 Dec 2018 11:41:21 -0500 Subject: block: return just one value from part_in_flight The previous patches deleted all the code that needed the second value returned from part_in_flight - now the kernel only uses the first value. Consequently, part_in_flight (and blk_mq_in_flight) may be changed so that it only returns one value. This patch just refactors the code, there's no functional change. Signed-off-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/genhd.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/genhd.h b/include/linux/genhd.h index 636b4f687e35..06c0fd594097 100644 --- a/include/linux/genhd.h +++ b/include/linux/genhd.h @@ -391,8 +391,7 @@ static inline void free_part_stats(struct hd_struct *part) #define part_stat_local_read_cpu(gendiskp, field, cpu) \ local_read(&(part_stat_get_cpu(gendiskp, field, cpu))) -void part_in_flight(struct request_queue *q, struct hd_struct *part, - unsigned int inflight[2]); +unsigned int part_in_flight(struct request_queue *q, struct hd_struct *part); void part_in_flight_rw(struct request_queue *q, struct hd_struct *part, unsigned int inflight[2]); void part_dec_in_flight(struct request_queue *q, struct hd_struct *part, -- cgit v1.2.3 From 24828d0536bbedc9b265f2b01ffca99de3f6a7c7 Mon Sep 17 00:00:00 2001 From: Igor Konopko Date: Tue, 11 Dec 2018 20:16:24 +0100 Subject: lightnvm: dynamic DMA pool entry size MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently lightnvm and pblk uses single DMA pool, for which the entry size always is equal to PAGE_SIZE. The contents of each entry allocated from the DMA pool consists of a PPA list (8bytes * 64), leaving 56bytes * 64 space for metadata. Since the metadata field can be bigger, such as 128 bytes, the static size does not cover this use-case. This patch adds support for I/O metadata above 56 bytes by changing DMA pool size based on device meta size and allows pblk to use OOB metadata >=16B. Reviewed-by: Javier González Signed-off-by: Igor Konopko Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 2fdeac1a420d..7afedaddbd15 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -90,7 +90,7 @@ typedef int (nvm_get_chk_meta_fn)(struct nvm_dev *, sector_t, int, struct nvm_chk_meta *); typedef int (nvm_submit_io_fn)(struct nvm_dev *, struct nvm_rq *); typedef int (nvm_submit_io_sync_fn)(struct nvm_dev *, struct nvm_rq *); -typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *); +typedef void *(nvm_create_dma_pool_fn)(struct nvm_dev *, char *, int); typedef void (nvm_destroy_dma_pool_fn)(void *); typedef void *(nvm_dev_dma_alloc_fn)(struct nvm_dev *, void *, gfp_t, dma_addr_t *); -- cgit v1.2.3 From a16816b9e462e8ee86a908606bde54b53cfeca80 Mon Sep 17 00:00:00 2001 From: Igor Konopko Date: Tue, 11 Dec 2018 20:16:25 +0100 Subject: lightnvm: disable interleaved metadata MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Currently pblk only check the size of I/O metadata and does not take into account if this metadata is in a separate buffer or interleaved in a single metadata buffer. In reality only the first scenario is supported, where second mode will break pblk functionality during any IO operation. This patch prevents pblk to be instantiated in case device only supports interleaved metadata. Reviewed-by: Javier González Signed-off-by: Igor Konopko Signed-off-by: Matias Bjørling Signed-off-by: Jens Axboe --- include/linux/lightnvm.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/lightnvm.h b/include/linux/lightnvm.h index 7afedaddbd15..5d865a5d5cdc 100644 --- a/include/linux/lightnvm.h +++ b/include/linux/lightnvm.h @@ -357,6 +357,7 @@ struct nvm_geo { u32 clba; /* sectors per chunk */ u16 csecs; /* sector size */ u16 sos; /* out-of-band area size */ + bool ext; /* metadata in extended data buffer */ /* device write constrains */ u32 ws_min; /* minimum write size */ -- cgit v1.2.3 From 0273ac349f08f4ff9ef88aaaf9c9f2aa6e87d2be Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Tue, 11 Dec 2018 18:03:08 -0500 Subject: blkcg: handle dying request_queue when associating a blkg Between v3 [1] and v4 [2] of the blkg association series, the association point moved from generic_make_request_checks(), which is called after the request enters the queue, to bio_set_dev(), which is when the bio is formed before submit_bio(). When the request_queue goes away, the blkgs supporting the request_queue are destroyed and then the q->root_blkg is set to %NULL. This patch adds a %NULL check to blkg_tryget_closest() to prevent the NPE caused by the above. It also adds a guard to see if the request_queue is dying when creating a blkg to prevent creating a blkg for a dead request_queue. [1] https://lore.kernel.org/lkml/20180911184137.35897-1-dennisszhou@gmail.com/ [2] https://lore.kernel.org/lkml/20181126211946.77067-1-dennis@kernel.org/ Fixes: 5cdf2e3fea5e ("blkcg: associate blkg when associating a device") Reported-and-tested-by: Ming Lei Reviewed-by: Bart Van Assche Signed-off-by: Dennis Zhou Signed-off-by: Jens Axboe --- include/linux/blk-cgroup.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-cgroup.h b/include/linux/blk-cgroup.h index bf13ecb0fe4f..f025fd1e22e6 100644 --- a/include/linux/blk-cgroup.h +++ b/include/linux/blk-cgroup.h @@ -511,7 +511,7 @@ static inline bool blkg_tryget(struct blkcg_gq *blkg) */ static inline struct blkcg_gq *blkg_tryget_closest(struct blkcg_gq *blkg) { - while (!percpu_ref_tryget(&blkg->refcnt)) + while (blkg && !percpu_ref_tryget(&blkg->refcnt)) blkg = blkg->parent; return blkg; -- cgit v1.2.3 From cb002d074dabfaa2248507fd9478d16a542e4f1e Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 3 Dec 2018 17:52:07 -0800 Subject: iov_iter: pass void csum pointer to csum_and_copy_to_iter The single caller to csum_and_copy_to_iter is skb_copy_and_csum_datagram and we are trying to unite its logic with skb_copy_datagram_iter by passing a callback to the copy function that we want to apply. Thus, we need to make the checksum pointer private to the function. Acked-by: David S. Miller Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/uio.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 55ce99ddb912..41d1f8d3313d 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -266,7 +266,7 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) { i->count = count; } -size_t csum_and_copy_to_iter(const void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); +size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); -- cgit v1.2.3 From d05f443554b3c7dc6d46e3ba9c3c4de468875d4f Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 3 Dec 2018 17:52:09 -0800 Subject: iov_iter: introduce hash_and_copy_to_iter helper Allow consumers that want to use iov iterator helpers and also update a predefined hash calculation online when copying data. This is useful when copying incoming network buffers to a local iterator and calculate a digest on the incoming stream. nvme-tcp host driver that will be introduced in following patches is the first consumer via skb_copy_and_hash_datagram_iter. Acked-by: David S. Miller Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/uio.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/uio.h b/include/linux/uio.h index 41d1f8d3313d..ecf584f6b82d 100644 --- a/include/linux/uio.h +++ b/include/linux/uio.h @@ -11,6 +11,7 @@ #include #include +#include #include struct page; @@ -269,6 +270,8 @@ static inline void iov_iter_reexpand(struct iov_iter *i, size_t count) size_t csum_and_copy_to_iter(const void *addr, size_t bytes, void *csump, struct iov_iter *i); size_t csum_and_copy_from_iter(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); bool csum_and_copy_from_iter_full(void *addr, size_t bytes, __wsum *csum, struct iov_iter *i); +size_t hash_and_copy_to_iter(const void *addr, size_t bytes, void *hashp, + struct iov_iter *i); int import_iovec(int type, const struct iovec __user * uvector, unsigned nr_segs, unsigned fast_segs, -- cgit v1.2.3 From 65d69e2505bb64f6a8d7f417f6e46e2a351174c6 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 3 Dec 2018 17:52:10 -0800 Subject: datagram: introduce skb_copy_and_hash_datagram_iter helper Introduce a helper to copy datagram into an iovec iterator but also update a predefined hash. This is useful for consumers of skb_copy_datagram_iter to also support inflight data digest without having to finish to copy and only then traverse the iovec and calculate the digest hash. Acked-by: David S. Miller Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/skbuff.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 0d1b2c3f127b..b96c809c29eb 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3325,6 +3325,9 @@ static inline int skb_copy_datagram_msg(const struct sk_buff *from, int offset, } int skb_copy_and_csum_datagram_msg(struct sk_buff *skb, int hlen, struct msghdr *msg); +int skb_copy_and_hash_datagram_iter(const struct sk_buff *skb, int offset, + struct iov_iter *to, int len, + struct ahash_request *hash); int skb_copy_datagram_from_iter(struct sk_buff *skb, int offset, struct iov_iter *from, int len); int zerocopy_sg_from_iter(struct sk_buff *skb, struct iov_iter *frm); -- cgit v1.2.3 From fc221d05447aa6db686a6724dd08aa6cce0924d1 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Mon, 3 Dec 2018 17:52:14 -0800 Subject: nvme-tcp: Add protocol header Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme-tcp.h | 189 +++++++++++++++++++++++++++++++++++++++++++++++ include/linux/nvme.h | 1 + 2 files changed, 190 insertions(+) create mode 100644 include/linux/nvme-tcp.h (limited to 'include/linux') diff --git a/include/linux/nvme-tcp.h b/include/linux/nvme-tcp.h new file mode 100644 index 000000000000..03d87c0550a9 --- /dev/null +++ b/include/linux/nvme-tcp.h @@ -0,0 +1,189 @@ +// SPDX-License-Identifier: GPL-2.0 +/* + * NVMe over Fabrics TCP protocol header. + * Copyright (c) 2018 Lightbits Labs. All rights reserved. + */ + +#ifndef _LINUX_NVME_TCP_H +#define _LINUX_NVME_TCP_H + +#include + +#define NVME_TCP_DISC_PORT 8009 +#define NVME_TCP_ADMIN_CCSZ SZ_8K +#define NVME_TCP_DIGEST_LENGTH 4 + +enum nvme_tcp_pfv { + NVME_TCP_PFV_1_0 = 0x0, +}; + +enum nvme_tcp_fatal_error_status { + NVME_TCP_FES_INVALID_PDU_HDR = 0x01, + NVME_TCP_FES_PDU_SEQ_ERR = 0x02, + NVME_TCP_FES_HDR_DIGEST_ERR = 0x03, + NVME_TCP_FES_DATA_OUT_OF_RANGE = 0x04, + NVME_TCP_FES_R2T_LIMIT_EXCEEDED = 0x05, + NVME_TCP_FES_DATA_LIMIT_EXCEEDED = 0x05, + NVME_TCP_FES_UNSUPPORTED_PARAM = 0x06, +}; + +enum nvme_tcp_digest_option { + NVME_TCP_HDR_DIGEST_ENABLE = (1 << 0), + NVME_TCP_DATA_DIGEST_ENABLE = (1 << 1), +}; + +enum nvme_tcp_pdu_type { + nvme_tcp_icreq = 0x0, + nvme_tcp_icresp = 0x1, + nvme_tcp_h2c_term = 0x2, + nvme_tcp_c2h_term = 0x3, + nvme_tcp_cmd = 0x4, + nvme_tcp_rsp = 0x5, + nvme_tcp_h2c_data = 0x6, + nvme_tcp_c2h_data = 0x7, + nvme_tcp_r2t = 0x9, +}; + +enum nvme_tcp_pdu_flags { + NVME_TCP_F_HDGST = (1 << 0), + NVME_TCP_F_DDGST = (1 << 1), + NVME_TCP_F_DATA_LAST = (1 << 2), + NVME_TCP_F_DATA_SUCCESS = (1 << 3), +}; + +/** + * struct nvme_tcp_hdr - nvme tcp pdu common header + * + * @type: pdu type + * @flags: pdu specific flags + * @hlen: pdu header length + * @pdo: pdu data offset + * @plen: pdu wire byte length + */ +struct nvme_tcp_hdr { + __u8 type; + __u8 flags; + __u8 hlen; + __u8 pdo; + __le32 plen; +}; + +/** + * struct nvme_tcp_icreq_pdu - nvme tcp initialize connection request pdu + * + * @hdr: pdu generic header + * @pfv: pdu version format + * @hpda: host pdu data alignment (dwords, 0's based) + * @digest: digest types enabled + * @maxr2t: maximum r2ts per request supported + */ +struct nvme_tcp_icreq_pdu { + struct nvme_tcp_hdr hdr; + __le16 pfv; + __u8 hpda; + __u8 digest; + __le32 maxr2t; + __u8 rsvd2[112]; +}; + +/** + * struct nvme_tcp_icresp_pdu - nvme tcp initialize connection response pdu + * + * @hdr: pdu common header + * @pfv: pdu version format + * @cpda: controller pdu data alignment (dowrds, 0's based) + * @digest: digest types enabled + * @maxdata: maximum data capsules per r2t supported + */ +struct nvme_tcp_icresp_pdu { + struct nvme_tcp_hdr hdr; + __le16 pfv; + __u8 cpda; + __u8 digest; + __le32 maxdata; + __u8 rsvd[112]; +}; + +/** + * struct nvme_tcp_term_pdu - nvme tcp terminate connection pdu + * + * @hdr: pdu common header + * @fes: fatal error status + * @fei: fatal error information + */ +struct nvme_tcp_term_pdu { + struct nvme_tcp_hdr hdr; + __le16 fes; + __le32 fei; + __u8 rsvd[8]; +}; + +/** + * struct nvme_tcp_cmd_pdu - nvme tcp command capsule pdu + * + * @hdr: pdu common header + * @cmd: nvme command + */ +struct nvme_tcp_cmd_pdu { + struct nvme_tcp_hdr hdr; + struct nvme_command cmd; +}; + +/** + * struct nvme_tcp_rsp_pdu - nvme tcp response capsule pdu + * + * @hdr: pdu common header + * @hdr: nvme-tcp generic header + * @cqe: nvme completion queue entry + */ +struct nvme_tcp_rsp_pdu { + struct nvme_tcp_hdr hdr; + struct nvme_completion cqe; +}; + +/** + * struct nvme_tcp_r2t_pdu - nvme tcp ready-to-transfer pdu + * + * @hdr: pdu common header + * @command_id: nvme command identifier which this relates to + * @ttag: transfer tag (controller generated) + * @r2t_offset: offset from the start of the command data + * @r2t_length: length the host is allowed to send + */ +struct nvme_tcp_r2t_pdu { + struct nvme_tcp_hdr hdr; + __u16 command_id; + __u16 ttag; + __le32 r2t_offset; + __le32 r2t_length; + __u8 rsvd[4]; +}; + +/** + * struct nvme_tcp_data_pdu - nvme tcp data pdu + * + * @hdr: pdu common header + * @command_id: nvme command identifier which this relates to + * @ttag: transfer tag (controller generated) + * @data_offset: offset from the start of the command data + * @data_length: length of the data stream + */ +struct nvme_tcp_data_pdu { + struct nvme_tcp_hdr hdr; + __u16 command_id; + __u16 ttag; + __le32 data_offset; + __le32 data_length; + __u8 rsvd[4]; +}; + +union nvme_tcp_pdu { + struct nvme_tcp_icreq_pdu icreq; + struct nvme_tcp_icresp_pdu icresp; + struct nvme_tcp_cmd_pdu cmd; + struct nvme_tcp_rsp_pdu rsp; + struct nvme_tcp_r2t_pdu r2t; + struct nvme_tcp_data_pdu data; +}; + +#endif /* _LINUX_NVME_TCP_H */ diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 88812cb15be0..4d7907e3771e 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -52,6 +52,7 @@ enum { enum { NVMF_TRTYPE_RDMA = 1, /* RDMA */ NVMF_TRTYPE_FC = 2, /* Fibre Channel */ + NVMF_TRTYPE_TCP = 3, /* TCP/IP */ NVMF_TRTYPE_LOOP = 254, /* Reserved for host usage */ NVMF_TRTYPE_MAX, }; -- cgit v1.2.3 From b7c8f3663d0e0773aca3324c26bce3ca8343ec14 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 12 Dec 2018 15:11:37 -0800 Subject: nvme: remove nvme_common command cdw10 array This is a preparation patch which removes the nvme common command cdw10 array and replace with individual fields. This is needed for the nvmet error log page implementation make is error log page entry offset assignment easier. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index 4d7907e3771e..b94fe8fadc4f 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -662,7 +662,12 @@ struct nvme_common_command { __le32 cdw2[2]; __le64 metadata; union nvme_data_ptr dptr; - __le32 cdw10[6]; + __le32 cdw10; + __le32 cdw11; + __le32 cdw12; + __le32 cdw13; + __le32 cdw14; + __le32 cdw15; }; struct nvme_rw_command { -- cgit v1.2.3 From b34de7cee0a65f2557bb05447fbe2cc7a9c46750 Mon Sep 17 00:00:00 2001 From: Chaitanya Kulkarni Date: Wed, 12 Dec 2018 15:11:38 -0800 Subject: nvme: add error log page slot definition This patch adds the NVMe error slot definition from the spec. Signed-off-by: Chaitanya Kulkarni Reviewed-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/nvme.h | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'include/linux') diff --git a/include/linux/nvme.h b/include/linux/nvme.h index b94fe8fadc4f..bbcc83886899 100644 --- a/include/linux/nvme.h +++ b/include/linux/nvme.h @@ -1168,6 +1168,20 @@ struct nvme_command { }; }; +struct nvme_error_slot { + __le64 error_count; + __le16 sqid; + __le16 cmdid; + __le16 status_field; + __le16 param_error_location; + __le64 lba; + __le32 nsid; + __u8 vs; + __u8 resv[3]; + __le64 cs; + __u8 resv2[24]; +}; + static inline bool nvme_is_write(struct nvme_command *cmd) { /* -- cgit v1.2.3 From e42b3867de4bd5ee3a1849afb68a1fa8627f7282 Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Tue, 11 Dec 2018 23:38:54 -0800 Subject: blk-mq-rdma: pass in queue map to blk_mq_rdma_map_queues Will be used by nvme-rdma for queue map separation support. Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/blk-mq-rdma.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq-rdma.h b/include/linux/blk-mq-rdma.h index b4ade198007d..7b6ecf9ac4c3 100644 --- a/include/linux/blk-mq-rdma.h +++ b/include/linux/blk-mq-rdma.h @@ -4,7 +4,7 @@ struct blk_mq_tag_set; struct ib_device; -int blk_mq_rdma_map_queues(struct blk_mq_tag_set *set, +int blk_mq_rdma_map_queues(struct blk_mq_queue_map *map, struct ib_device *dev, int first_vec); #endif /* _LINUX_BLK_MQ_RDMA_H */ -- cgit v1.2.3 From cc56694f132a8f5fa9334e3afe990de8c3378866 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 17 Dec 2018 09:46:00 +0800 Subject: blk-mq-debugfs: support rq_qos blk-mq-debugfs has been proved as very helpful for debug some tough issues, such as IO hang. We have seen blk-wbt related IO hang several times, even inside Red Hat BZ, there is such report not sovled yet, so this patch adds support debugfs on rq_qos. Cc: Bart Van Assche Cc: Omar Sandoval Cc: Christoph Hellwig Cc: Josef Bacik Signed-off-by: Ming Lei Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 81f1b105946b..45552e6eae1e 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -560,6 +560,7 @@ struct request_queue { #ifdef CONFIG_BLK_DEBUG_FS struct dentry *debugfs_dir; struct dentry *sched_debugfs_dir; + struct dentry *rqos_debugfs_dir; #endif bool mq_sysfs_init_done; -- cgit v1.2.3 From 13369816cb648f897ce9cbf57e55eeb742ce4eb3 Mon Sep 17 00:00:00 2001 From: Dennis Zhou Date: Mon, 17 Dec 2018 11:03:51 -0500 Subject: block: fix blk-iolatency accounting underflow The blk-iolatency controller measures the time from rq_qos_throttle() to rq_qos_done_bio() and attributes this time to the first bio that needs to create the request. This means if a bio is plug-mergeable or bio-mergeable, it gets to bypass the blk-iolatency controller. The recent series [1], to tag all bios w/ blkgs undermined how iolatency was determining which bios it was charging and should process in rq_qos_done_bio(). Because all bios are being tagged, this caused the atomic_t for the struct rq_wait inflight count to underflow and result in a stall. This patch adds a new flag BIO_TRACKED to let controllers know that a bio is going through the rq_qos path. blk-iolatency now checks if this flag is set to see if it should process the bio in rq_qos_done_bio(). Overloading BLK_QUEUE_ENTERED works, but makes the flag rules confusing. BIO_THROTTLED was another candidate, but the flag is set for all bios that have gone through blk-throttle code. Overloading a flag comes with the burden of making sure that when either implementation changes, a change in setting rules for one doesn't cause a bug in the other. So here, we unfortunately opt for adding a new flag. [1] https://lore.kernel.org/lkml/20181205171039.73066-1-dennis@kernel.org/ Fixes: 5cdf2e3fea5e ("blkcg: associate blkg when associating a device") Signed-off-by: Dennis Zhou Cc: Josef Bacik Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 46c005d601ac..fc99474ac968 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -228,6 +228,7 @@ struct bio { #define BIO_TRACE_COMPLETION 10 /* bio_endio() should trace the final completion * of this bio. */ #define BIO_QUEUE_ENTERED 11 /* can use blk_queue_enter_live() */ +#define BIO_TRACKED 12 /* set if bio goes through the rq_qos path */ /* See BVEC_POOL_OFFSET below before adding new flags */ -- cgit v1.2.3 From 3c94d83cb352627f221d971b05f163c17527de74 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 17 Dec 2018 21:11:17 -0700 Subject: blk-mq: change blk_mq_queue_busy() to blk_mq_queue_inflight() There's a single user of this function, dm, and dm just wants to check if IO is inflight, not that it's just allocated. This fixes a hang with srp/002 in blktests with dm, where it tries to suspend but waits for inflight IO to finish first. As it checks for just allocated requests, this fails. Tested-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blk-mq.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index 57eda7b20243..d3c0a0d2680b 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -257,7 +257,7 @@ void blk_mq_flush_plug_list(struct blk_plug *plug, bool from_schedule); void blk_mq_free_request(struct request *rq); bool blk_mq_can_queue(struct blk_mq_hw_ctx *); -bool blk_mq_queue_busy(struct request_queue *q); +bool blk_mq_queue_inflight(struct request_queue *q); enum { /* return when out of requests */ -- cgit v1.2.3 From 7b7ab780a048699d2b9f416bf2d5c089d8d1028c Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Fri, 14 Dec 2018 11:06:06 -0800 Subject: block: make request_to_qc_t public block consumers will need it for polling requests that are sent with blk_execute_rq_nowait. Also, get rid of blk_tag_to_qc_t and open-code it instead. Reviewed-by: Jens Axboe Signed-off-by: Sagi Grimberg Signed-off-by: Christoph Hellwig --- include/linux/blk-mq.h | 10 ++++++++++ include/linux/blk_types.h | 11 ----------- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/blk-mq.h b/include/linux/blk-mq.h index d3c0a0d2680b..0e030f5f76b6 100644 --- a/include/linux/blk-mq.h +++ b/include/linux/blk-mq.h @@ -357,4 +357,14 @@ static inline void *blk_mq_rq_to_pdu(struct request *rq) for ((i) = 0; (i) < (hctx)->nr_ctx && \ ({ ctx = (hctx)->ctxs[(i)]; 1; }); (i)++) +static inline blk_qc_t request_to_qc_t(struct blk_mq_hw_ctx *hctx, + struct request *rq) +{ + if (rq->tag != -1) + return rq->tag | (hctx->queue_num << BLK_QC_T_SHIFT); + + return rq->internal_tag | (hctx->queue_num << BLK_QC_T_SHIFT) | + BLK_QC_T_INTERNAL; +} + #endif diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index fc99474ac968..5c7e7f859a24 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -425,17 +425,6 @@ static inline bool blk_qc_t_valid(blk_qc_t cookie) return cookie != BLK_QC_T_NONE; } -static inline blk_qc_t blk_tag_to_qc_t(unsigned int tag, unsigned int queue_num, - bool internal) -{ - blk_qc_t ret = tag | (queue_num << BLK_QC_T_SHIFT); - - if (internal) - ret |= BLK_QC_T_INTERNAL; - - return ret; -} - static inline unsigned int blk_qc_t_to_queue_num(blk_qc_t cookie) { return (cookie & ~BLK_QC_T_INTERNAL) >> BLK_QC_T_SHIFT; -- cgit v1.2.3 From 9f6b7ef6c3ebe35be77b0ae3cf12e4d25ae80420 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 20 Dec 2018 08:49:00 -0700 Subject: sbitmap: add helpers for add/del wait queue handling After commit 5d2ee7122c73, users of sbitmap that need wait queue handling must use the provided helpers. But we only added prepare_to_wait()/finish_wait() style helpers, add the equivalent add_wait_queue/list_del wrappers as we.. This is needed to ensure kyber plays by the sbitmap waitqueue rules. Tested-by: Ming Lei Reviewed-by: Omar Sandoval Signed-off-by: Jens Axboe --- include/linux/sbitmap.h | 16 ++++++++++++++-- 1 file changed, 14 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sbitmap.h b/include/linux/sbitmap.h index 03f50fcedc79..14d558146aea 100644 --- a/include/linux/sbitmap.h +++ b/include/linux/sbitmap.h @@ -560,13 +560,13 @@ void sbitmap_queue_wake_up(struct sbitmap_queue *sbq); void sbitmap_queue_show(struct sbitmap_queue *sbq, struct seq_file *m); struct sbq_wait { - int accounted; + struct sbitmap_queue *sbq; /* if set, sbq_wait is accounted */ struct wait_queue_entry wait; }; #define DEFINE_SBQ_WAIT(name) \ struct sbq_wait name = { \ - .accounted = 0, \ + .sbq = NULL, \ .wait = { \ .private = current, \ .func = autoremove_wake_function, \ @@ -588,4 +588,16 @@ void sbitmap_prepare_to_wait(struct sbitmap_queue *sbq, void sbitmap_finish_wait(struct sbitmap_queue *sbq, struct sbq_wait_state *ws, struct sbq_wait *sbq_wait); +/* + * Wrapper around add_wait_queue(), which maintains some extra internal state + */ +void sbitmap_add_wait_queue(struct sbitmap_queue *sbq, + struct sbq_wait_state *ws, + struct sbq_wait *sbq_wait); + +/* + * Must be paired with sbitmap_add_wait_queue() + */ +void sbitmap_del_wait_queue(struct sbq_wait *sbq_wait); + #endif /* __LINUX_SCALE_BITMAP_H */ -- cgit v1.2.3