From 3cca6dc1c81e2407928dc4c6105252146fd3924f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 2 Mar 2011 11:08:00 -0500 Subject: block: add API for delaying work/request_fn a little bit Currently we use plugging for that, but as plugging is going away, we need an alternative mechanism. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++++ 1 file changed, 6 insertions(+) (limited to 'include') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e3ee74fc5903..f55b2a8b6610 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -300,6 +300,11 @@ struct request_queue unsigned long unplug_delay; /* After this many jiffies */ struct work_struct unplug_work; + /* + * Delayed queue handling + */ + struct delayed_work delay_work; + struct backing_dev_info backing_dev_info; /* @@ -677,6 +682,7 @@ extern int blk_insert_cloned_request(struct request_queue *q, extern void blk_plug_device(struct request_queue *); extern void blk_plug_device_unlocked(struct request_queue *); extern int blk_remove_plug(struct request_queue *); +extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, unsigned int, void __user *); -- cgit v1.2.3 From 73c101011926c5832e6e141682180c4debe2cf45 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Tue, 8 Mar 2011 13:19:51 +0100 Subject: block: initial patch for on-stack per-task plugging This patch adds support for creating a queuing context outside of the queue itself. This enables us to batch up pieces of IO before grabbing the block device queue lock and submitting them to the IO scheduler. The context is created on the stack of the process and assigned in the task structure, so that we can auto-unplug it if we hit a schedule event. The current queue plugging happens implicitly if IO is submitted to an empty device, yet callers have to remember to unplug that IO when they are going to wait for it. This is an ugly API and has caused bugs in the past. Additionally, it requires hacks in the vm (->sync_page() callback) to handle that logic. By switching to an explicit plugging scheme we make the API a lot nicer and can get rid of the ->sync_page() hack in the vm. Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 ++ include/linux/blkdev.h | 42 ++++++++++++++++++++++++++++++++++++++++++ include/linux/elevator.h | 1 + include/linux/sched.h | 6 ++++++ 4 files changed, 51 insertions(+) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index dddedfc0af81..16b286473042 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -152,6 +152,7 @@ enum rq_flag_bits { __REQ_IO_STAT, /* account I/O stat */ __REQ_MIXED_MERGE, /* merge of different types, fail separately */ __REQ_SECURE, /* secure discard (used with __REQ_DISCARD) */ + __REQ_ON_PLUG, /* on plug list */ __REQ_NR_BITS, /* stops here */ }; @@ -193,5 +194,6 @@ enum rq_flag_bits { #define REQ_IO_STAT (1 << __REQ_IO_STAT) #define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) #define REQ_SECURE (1 << __REQ_SECURE) +#define REQ_ON_PLUG (1 << __REQ_ON_PLUG) #endif /* __LINUX_BLK_TYPES_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index f55b2a8b6610..5873037eeb91 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -871,6 +871,31 @@ struct request_queue *blk_alloc_queue(gfp_t); struct request_queue *blk_alloc_queue_node(gfp_t, int); extern void blk_put_queue(struct request_queue *); +struct blk_plug { + unsigned long magic; + struct list_head list; + unsigned int should_sort; +}; + +extern void blk_start_plug(struct blk_plug *); +extern void blk_finish_plug(struct blk_plug *); +extern void __blk_flush_plug(struct task_struct *, struct blk_plug *); + +static inline void blk_flush_plug(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + if (unlikely(plug)) + __blk_flush_plug(tsk, plug); +} + +static inline bool blk_needs_flush_plug(struct task_struct *tsk) +{ + struct blk_plug *plug = tsk->plug; + + return plug && !list_empty(&plug->list); +} + /* * tag stuff */ @@ -1294,6 +1319,23 @@ static inline long nr_blockdev_pages(void) return 0; } +static inline void blk_start_plug(struct list_head *list) +{ +} + +static inline void blk_finish_plug(struct list_head *list) +{ +} + +static inline void blk_flush_plug(struct task_struct *tsk) +{ +} + +static inline bool blk_needs_flush_plug(struct task_struct *tsk) +{ + return false; +} + #endif /* CONFIG_BLOCK */ #endif diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 39b68edb388d..8857cf9adbb7 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -105,6 +105,7 @@ extern void elv_add_request(struct request_queue *, struct request *, int, int); extern void __elv_add_request(struct request_queue *, struct request *, int, int); extern void elv_insert(struct request_queue *, struct request *, int); extern int elv_merge(struct request_queue *, struct request **, struct bio *); +extern int elv_try_merge(struct request *, struct bio *); extern void elv_merge_requests(struct request_queue *, struct request *, struct request *); extern void elv_merged_request(struct request_queue *, struct request *, int); diff --git a/include/linux/sched.h b/include/linux/sched.h index 777d8a5ed06b..96ac22643742 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -99,6 +99,7 @@ struct robust_list_head; struct bio_list; struct fs_struct; struct perf_event_context; +struct blk_plug; /* * List of flags we want to share for kernel threads, @@ -1429,6 +1430,11 @@ struct task_struct { /* stacked block device info */ struct bio_list *bio_list; +#ifdef CONFIG_BLOCK +/* stack plugging */ + struct blk_plug *plug; +#endif + /* VM state */ struct reclaim_state *reclaim_state; -- cgit v1.2.3 From 7eaceaccab5f40bbfda044629a6298616aeaed50 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 10 Mar 2011 08:52:07 +0100 Subject: block: remove per-queue plugging Code has been converted over to the new explicit on-stack plugging, and delay users have been converted to use the new API for that. So lets kill off the old plugging along with aops->sync_page(). Signed-off-by: Jens Axboe --- include/linux/backing-dev.h | 16 ---------------- include/linux/blkdev.h | 31 +++++++------------------------ include/linux/buffer_head.h | 1 - include/linux/device-mapper.h | 5 ----- include/linux/elevator.h | 7 ++----- include/linux/fs.h | 1 - include/linux/pagemap.h | 12 ------------ include/linux/swap.h | 2 -- 8 files changed, 9 insertions(+), 66 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev.h b/include/linux/backing-dev.h index 4ce34fa937d4..96f4094b706d 100644 --- a/include/linux/backing-dev.h +++ b/include/linux/backing-dev.h @@ -66,8 +66,6 @@ struct backing_dev_info { unsigned int capabilities; /* Device capabilities */ congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ - void (*unplug_io_fn)(struct backing_dev_info *, struct page *); - void *unplug_io_data; char *name; @@ -251,7 +249,6 @@ int bdi_set_max_ratio(struct backing_dev_info *bdi, unsigned int max_ratio); extern struct backing_dev_info default_backing_dev_info; extern struct backing_dev_info noop_backing_dev_info; -void default_unplug_io_fn(struct backing_dev_info *bdi, struct page *page); int writeback_in_progress(struct backing_dev_info *bdi); @@ -336,17 +333,4 @@ static inline int bdi_sched_wait(void *word) return 0; } -static inline void blk_run_backing_dev(struct backing_dev_info *bdi, - struct page *page) -{ - if (bdi && bdi->unplug_io_fn) - bdi->unplug_io_fn(bdi, page); -} - -static inline void blk_run_address_space(struct address_space *mapping) -{ - if (mapping) - blk_run_backing_dev(mapping->backing_dev_info, NULL); -} - #endif /* _LINUX_BACKING_DEV_H */ diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5873037eeb91..64ab2a1bb167 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -196,7 +196,6 @@ typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); typedef void (unprep_rq_fn) (struct request_queue *, struct request *); -typedef void (unplug_fn) (struct request_queue *); struct bio_vec; struct bvec_merge_data { @@ -279,7 +278,6 @@ struct request_queue make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; unprep_rq_fn *unprep_rq_fn; - unplug_fn *unplug_fn; merge_bvec_fn *merge_bvec_fn; softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; @@ -292,14 +290,6 @@ struct request_queue sector_t end_sector; struct request *boundary_rq; - /* - * Auto-unplugging state - */ - struct timer_list unplug_timer; - int unplug_thresh; /* After this many requests */ - unsigned long unplug_delay; /* After this many jiffies */ - struct work_struct unplug_work; - /* * Delayed queue handling */ @@ -399,14 +389,13 @@ struct request_queue #define QUEUE_FLAG_ASYNCFULL 4 /* write queue has been filled */ #define QUEUE_FLAG_DEAD 5 /* queue being torn down */ #define QUEUE_FLAG_REENTER 6 /* Re-entrancy avoidance */ -#define QUEUE_FLAG_PLUGGED 7 /* queue is plugged */ -#define QUEUE_FLAG_ELVSWITCH 8 /* don't use elevator, just do FIFO */ -#define QUEUE_FLAG_BIDI 9 /* queue supports bidi requests */ -#define QUEUE_FLAG_NOMERGES 10 /* disable merge attempts */ -#define QUEUE_FLAG_SAME_COMP 11 /* force complete on same CPU */ -#define QUEUE_FLAG_FAIL_IO 12 /* fake timeout */ -#define QUEUE_FLAG_STACKABLE 13 /* supports request stacking */ -#define QUEUE_FLAG_NONROT 14 /* non-rotational device (SSD) */ +#define QUEUE_FLAG_ELVSWITCH 7 /* don't use elevator, just do FIFO */ +#define QUEUE_FLAG_BIDI 8 /* queue supports bidi requests */ +#define QUEUE_FLAG_NOMERGES 9 /* disable merge attempts */ +#define QUEUE_FLAG_SAME_COMP 10 /* force complete on same CPU */ +#define QUEUE_FLAG_FAIL_IO 11 /* fake timeout */ +#define QUEUE_FLAG_STACKABLE 12 /* supports request stacking */ +#define QUEUE_FLAG_NONROT 13 /* non-rotational device (SSD) */ #define QUEUE_FLAG_VIRT QUEUE_FLAG_NONROT /* paravirt device */ #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ @@ -484,7 +473,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) __clear_bit(flag, &q->queue_flags); } -#define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) #define blk_queue_nomerges(q) test_bit(QUEUE_FLAG_NOMERGES, &(q)->queue_flags) @@ -679,9 +667,6 @@ extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, extern void blk_rq_unprep_clone(struct request *rq); extern int blk_insert_cloned_request(struct request_queue *q, struct request *rq); -extern void blk_plug_device(struct request_queue *); -extern void blk_plug_device_unlocked(struct request_queue *); -extern int blk_remove_plug(struct request_queue *); extern void blk_delay_queue(struct request_queue *, unsigned long); extern void blk_recount_segments(struct request_queue *, struct bio *); extern int scsi_cmd_ioctl(struct request_queue *, struct gendisk *, fmode_t, @@ -726,7 +711,6 @@ extern int blk_execute_rq(struct request_queue *, struct gendisk *, struct request *, int); extern void blk_execute_rq_nowait(struct request_queue *, struct gendisk *, struct request *, int, rq_end_io_fn *); -extern void blk_unplug(struct request_queue *q); static inline struct request_queue *bdev_get_queue(struct block_device *bdev) { @@ -863,7 +847,6 @@ extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bd extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); -extern void generic_unplug_device(struct request_queue *); extern long nr_blockdev_pages(void); int blk_get_queue(struct request_queue *); diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 68d1fe7b877c..f5df23561b96 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -219,7 +219,6 @@ int generic_cont_expand_simple(struct inode *inode, loff_t size); int block_commit_write(struct page *page, unsigned from, unsigned to); int block_page_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf, get_block_t get_block); -void block_sync_page(struct page *); sector_t generic_block_bmap(struct address_space *, sector_t, get_block_t *); int block_truncate_page(struct address_space *, loff_t, get_block_t *); int nobh_write_begin(struct address_space *, loff_t, unsigned, unsigned, diff --git a/include/linux/device-mapper.h b/include/linux/device-mapper.h index 272496d1fae4..e2768834f397 100644 --- a/include/linux/device-mapper.h +++ b/include/linux/device-mapper.h @@ -285,11 +285,6 @@ void dm_table_add_target_callbacks(struct dm_table *t, struct dm_target_callback */ int dm_table_complete(struct dm_table *t); -/* - * Unplug all devices in a table. - */ -void dm_table_unplug_all(struct dm_table *t); - /* * Table reference counting. */ diff --git a/include/linux/elevator.h b/include/linux/elevator.h index 8857cf9adbb7..ec6f72b84477 100644 --- a/include/linux/elevator.h +++ b/include/linux/elevator.h @@ -20,7 +20,6 @@ typedef void (elevator_bio_merged_fn) (struct request_queue *, typedef int (elevator_dispatch_fn) (struct request_queue *, int); typedef void (elevator_add_req_fn) (struct request_queue *, struct request *); -typedef int (elevator_queue_empty_fn) (struct request_queue *); typedef struct request *(elevator_request_list_fn) (struct request_queue *, struct request *); typedef void (elevator_completed_req_fn) (struct request_queue *, struct request *); typedef int (elevator_may_queue_fn) (struct request_queue *, int); @@ -46,7 +45,6 @@ struct elevator_ops elevator_activate_req_fn *elevator_activate_req_fn; elevator_deactivate_req_fn *elevator_deactivate_req_fn; - elevator_queue_empty_fn *elevator_queue_empty_fn; elevator_completed_req_fn *elevator_completed_req_fn; elevator_request_list_fn *elevator_former_req_fn; @@ -101,8 +99,8 @@ struct elevator_queue */ extern void elv_dispatch_sort(struct request_queue *, struct request *); extern void elv_dispatch_add_tail(struct request_queue *, struct request *); -extern void elv_add_request(struct request_queue *, struct request *, int, int); -extern void __elv_add_request(struct request_queue *, struct request *, int, int); +extern void elv_add_request(struct request_queue *, struct request *, int); +extern void __elv_add_request(struct request_queue *, struct request *, int); extern void elv_insert(struct request_queue *, struct request *, int); extern int elv_merge(struct request_queue *, struct request **, struct bio *); extern int elv_try_merge(struct request *, struct bio *); @@ -112,7 +110,6 @@ extern void elv_merged_request(struct request_queue *, struct request *, int); extern void elv_bio_merged(struct request_queue *q, struct request *, struct bio *); extern void elv_requeue_request(struct request_queue *, struct request *); -extern int elv_queue_empty(struct request_queue *); extern struct request *elv_former_request(struct request_queue *, struct request *); extern struct request *elv_latter_request(struct request_queue *, struct request *); extern int elv_register_queue(struct request_queue *q); diff --git a/include/linux/fs.h b/include/linux/fs.h index bd3215940c37..9f2cf69911b8 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -583,7 +583,6 @@ typedef int (*read_actor_t)(read_descriptor_t *, struct page *, struct address_space_operations { int (*writepage)(struct page *page, struct writeback_control *wbc); int (*readpage)(struct file *, struct page *); - void (*sync_page)(struct page *); /* Write back some dirty pages from this mapping. */ int (*writepages)(struct address_space *, struct writeback_control *); diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 9c66e994540f..e112b8db2f3c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -298,7 +298,6 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, extern void __lock_page(struct page *page); extern int __lock_page_killable(struct page *page); -extern void __lock_page_nosync(struct page *page); extern int __lock_page_or_retry(struct page *page, struct mm_struct *mm, unsigned int flags); extern void unlock_page(struct page *page); @@ -341,17 +340,6 @@ static inline int lock_page_killable(struct page *page) return 0; } -/* - * lock_page_nosync should only be used if we can't pin the page's inode. - * Doesn't play quite so well with block device plugging. - */ -static inline void lock_page_nosync(struct page *page) -{ - might_sleep(); - if (!trylock_page(page)) - __lock_page_nosync(page); -} - /* * lock_page_or_retry - Lock the page, unless this would block and the * caller indicated that it can handle a retry. diff --git a/include/linux/swap.h b/include/linux/swap.h index 4d559325d919..9ee321833b21 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -299,8 +299,6 @@ extern void mem_cgroup_get_shmem_target(struct inode *inode, pgoff_t pgoff, struct page **pagep, swp_entry_t *ent); #endif -extern void swap_unplug_io_fn(struct backing_dev_info *, struct page *); - #ifdef CONFIG_SWAP /* linux/mm/page_io.c */ extern int swap_readpage(struct page *); -- cgit v1.2.3 From 721a9602e6607417c6bc15b18e97a2f35266c690 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Mar 2011 11:56:30 +0100 Subject: block: kill off REQ_UNPLUG With the plugging now being explicitly controlled by the submitter, callers need not pass down unplugging hints to the block layer. If they want to unplug, it's because they manually plugged on their own - in which case, they should just unplug at will. Signed-off-by: Jens Axboe --- include/linux/blk_types.h | 2 -- include/linux/fs.h | 28 +++++++++------------------- 2 files changed, 9 insertions(+), 21 deletions(-) (limited to 'include') diff --git a/include/linux/blk_types.h b/include/linux/blk_types.h index 16b286473042..be50d9e70a7d 100644 --- a/include/linux/blk_types.h +++ b/include/linux/blk_types.h @@ -128,7 +128,6 @@ enum rq_flag_bits { __REQ_NOIDLE, /* don't anticipate more IO after this one */ /* bio only flags */ - __REQ_UNPLUG, /* unplug the immediately after submission */ __REQ_RAHEAD, /* read ahead, can fail anytime */ __REQ_THROTTLED, /* This bio has already been subjected to * throttling rules. Don't do it again. */ @@ -172,7 +171,6 @@ enum rq_flag_bits { REQ_NOIDLE | REQ_FLUSH | REQ_FUA) #define REQ_CLONE_MASK REQ_COMMON_MASK -#define REQ_UNPLUG (1 << __REQ_UNPLUG) #define REQ_RAHEAD (1 << __REQ_RAHEAD) #define REQ_THROTTLED (1 << __REQ_THROTTLED) diff --git a/include/linux/fs.h b/include/linux/fs.h index 9f2cf69911b8..543e226ea6a3 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -135,16 +135,10 @@ struct inodes_stat_t { * block layer could (in theory) choose to ignore this * request if it runs into resource problems. * WRITE A normal async write. Device will be plugged. - * WRITE_SYNC_PLUG Synchronous write. Identical to WRITE, but passes down + * WRITE_SYNC Synchronous write. Identical to WRITE, but passes down * the hint that someone will be waiting on this IO - * shortly. The device must still be unplugged explicitly, - * WRITE_SYNC_PLUG does not do this as we could be - * submitting more writes before we actually wait on any - * of them. - * WRITE_SYNC Like WRITE_SYNC_PLUG, but also unplugs the device - * immediately after submission. The write equivalent - * of READ_SYNC. - * WRITE_ODIRECT_PLUG Special case write for O_DIRECT only. + * shortly. The write equivalent of READ_SYNC. + * WRITE_ODIRECT Special case write for O_DIRECT only. * WRITE_FLUSH Like WRITE_SYNC but with preceding cache flush. * WRITE_FUA Like WRITE_SYNC but data is guaranteed to be on * non-volatile media on completion. @@ -160,18 +154,14 @@ struct inodes_stat_t { #define WRITE RW_MASK #define READA RWA_MASK -#define READ_SYNC (READ | REQ_SYNC | REQ_UNPLUG) +#define READ_SYNC (READ | REQ_SYNC) #define READ_META (READ | REQ_META) -#define WRITE_SYNC_PLUG (WRITE | REQ_SYNC | REQ_NOIDLE) -#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG) -#define WRITE_ODIRECT_PLUG (WRITE | REQ_SYNC) +#define WRITE_SYNC (WRITE | REQ_SYNC | REQ_NOIDLE) +#define WRITE_ODIRECT (WRITE | REQ_SYNC) #define WRITE_META (WRITE | REQ_META) -#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ - REQ_FLUSH) -#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ - REQ_FUA) -#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_UNPLUG | \ - REQ_FLUSH | REQ_FUA) +#define WRITE_FLUSH (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH) +#define WRITE_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FUA) +#define WRITE_FLUSH_FUA (WRITE | REQ_SYNC | REQ_NOIDLE | REQ_FLUSH | REQ_FUA) #define SEL_IN 1 #define SEL_OUT 2 -- cgit v1.2.3