From e2e1a148bc45855816ae6b4692ce29d0020fa22e Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 9 Jun 2010 10:42:09 +0200 Subject: block: add sysfs knob for turning off disk entropy contributions There are two reasons for doing this: - On SSD disks, the completion times aren't as random as they are for rotational drives. So it's questionable whether they should contribute to the random pool in the first place. - Calling add_disk_randomness() has a lot of overhead. This adds /sys/block//queue/add_random that will allow you to switch off on a per-device basis. The default setting is on, so there should be no functional changes from this patch. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 09a840264d6f..b8224ea4a5de 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -467,11 +467,13 @@ struct request_queue #define QUEUE_FLAG_IO_STAT 15 /* do IO stats */ #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ +#define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ (1 << QUEUE_FLAG_STACKABLE) | \ - (1 << QUEUE_FLAG_SAME_COMP)) + (1 << QUEUE_FLAG_SAME_COMP) | \ + (1 << QUEUE_FLAG_ADD_RANDOM)) static inline int queue_is_locked(struct request_queue *q) { @@ -596,6 +598,7 @@ enum { test_bit(QUEUE_FLAG_NOXMERGES, &(q)->queue_flags) #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) +#define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) #define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) -- cgit v1.2.3 From bfe172310e58225f0d07f9354b683abacbd6a0d8 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Mon, 31 May 2010 15:59:03 +0900 Subject: block: kill ISA_DMA_THRESHOLD usage block uses ISA_DMA_THRESHOLD for BLK_BOUNCE_ISA. Only SCSI uses ISA_DMA_THRESHOLD for ancient drivers with non-zero unchecked_isa_dma. Nowadays drivers (and subsystems) use dma_mask properly instead of ISA_DMA_THRESHOLD. Documentation/scsi/scsi_mid_low_api.txt says: unchecked_isa_dma - 1=>only use bottom 16 MB of ram (ISA DMA addressing restriction), 0=>can use full 32 bit (or better) DMA address space So block simply uses DMA_BIT_MASK(24) for BLK_BOUNCE_ISA for SCSI. Signed-off-by: FUJITA Tomonori Acked-by: James Bottomley Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index b8224ea4a5de..d7ae241a9e55 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -712,7 +712,7 @@ extern unsigned long blk_max_low_pfn, blk_max_pfn; #define BLK_BOUNCE_HIGH -1ULL #endif #define BLK_BOUNCE_ANY (-1ULL) -#define BLK_BOUNCE_ISA (ISA_DMA_THRESHOLD) +#define BLK_BOUNCE_ISA (DMA_BIT_MASK(24)) /* * default timeout for SG_IO if none specified -- cgit v1.2.3 From 33659ebbae262228eef4e0fe990f393d1f0ed941 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 7 Aug 2010 18:17:56 +0200 Subject: block: remove wrappers for request type/flags Remove all the trivial wrappers for the cmd_type and cmd_flags fields in struct requests. This allows much easier grepping for different request types instead of unwinding through macros. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 41 +++++++++++++---------------------------- 1 file changed, 13 insertions(+), 28 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index d7ae241a9e55..3ecd28ef9ba4 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -604,33 +604,20 @@ enum { test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) -#define blk_fs_request(rq) ((rq)->cmd_type == REQ_TYPE_FS) -#define blk_pc_request(rq) ((rq)->cmd_type == REQ_TYPE_BLOCK_PC) -#define blk_special_request(rq) ((rq)->cmd_type == REQ_TYPE_SPECIAL) -#define blk_sense_request(rq) ((rq)->cmd_type == REQ_TYPE_SENSE) - -#define blk_failfast_dev(rq) ((rq)->cmd_flags & REQ_FAILFAST_DEV) -#define blk_failfast_transport(rq) ((rq)->cmd_flags & REQ_FAILFAST_TRANSPORT) -#define blk_failfast_driver(rq) ((rq)->cmd_flags & REQ_FAILFAST_DRIVER) -#define blk_noretry_request(rq) (blk_failfast_dev(rq) || \ - blk_failfast_transport(rq) || \ - blk_failfast_driver(rq)) -#define blk_rq_started(rq) ((rq)->cmd_flags & REQ_STARTED) -#define blk_rq_io_stat(rq) ((rq)->cmd_flags & REQ_IO_STAT) -#define blk_rq_quiet(rq) ((rq)->cmd_flags & REQ_QUIET) - -#define blk_account_rq(rq) (blk_rq_started(rq) && (blk_fs_request(rq) || blk_discard_rq(rq))) - -#define blk_pm_suspend_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND) -#define blk_pm_resume_request(rq) ((rq)->cmd_type == REQ_TYPE_PM_RESUME) +#define blk_noretry_request(rq) \ + ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ + REQ_FAILFAST_DRIVER)) + +#define blk_account_rq(rq) \ + (((rq)->cmd_flags & REQ_STARTED) && \ + ((rq)->cmd_type == REQ_TYPE_FS || \ + ((rq)->cmd_flags & REQ_DISCARD))) + #define blk_pm_request(rq) \ - (blk_pm_suspend_request(rq) || blk_pm_resume_request(rq)) + ((rq)->cmd_type == REQ_TYPE_PM_SUSPEND || \ + (rq)->cmd_type == REQ_TYPE_PM_RESUME) #define blk_rq_cpu_valid(rq) ((rq)->cpu != -1) -#define blk_sorted_rq(rq) ((rq)->cmd_flags & REQ_SORTED) -#define blk_barrier_rq(rq) ((rq)->cmd_flags & REQ_HARDBARRIER) -#define blk_fua_rq(rq) ((rq)->cmd_flags & REQ_FUA) -#define blk_discard_rq(rq) ((rq)->cmd_flags & REQ_DISCARD) #define blk_bidi_rq(rq) ((rq)->next_rq != NULL) /* rq->queuelist of dequeued request must be list_empty() */ #define blk_queued_rq(rq) (!list_empty(&(rq)->queuelist)) @@ -652,9 +639,6 @@ static inline bool rq_is_sync(struct request *rq) return rw_is_sync(rq->cmd_flags); } -#define rq_is_meta(rq) ((rq)->cmd_flags & REQ_RW_META) -#define rq_noidle(rq) ((rq)->cmd_flags & REQ_NOIDLE) - static inline int blk_queue_full(struct request_queue *q, int sync) { if (sync) @@ -687,7 +671,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ - (blk_discard_rq(rq) || blk_fs_request((rq)))) + (((rq)->cmd_flags & REQ_DISCARD) || \ + (rq)->cmd_type == REQ_TYPE_FS)) /* * q->prep_rq_fn return values -- cgit v1.2.3 From 7b6d91daee5cac6402186ff224c3af39d79f4a0e Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Sat, 7 Aug 2010 18:20:39 +0200 Subject: block: unify flags for struct bio and struct request Remove the current bio flags and reuse the request flags for the bio, too. This allows to more easily trace the type of I/O from the filesystem down to the block driver. There were two flags in the bio that were missing in the requests: BIO_RW_UNPLUG and BIO_RW_AHEAD. Also I've renamed two request flags that had a superflous RW in them. Note that the flags are in bio.h despite having the REQ_ name - as blkdev.h includes bio.h that is the only way to go for now. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 66 +------------------------------------------------- 1 file changed, 1 insertion(+), 65 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3ecd28ef9ba4..3fc0f5908619 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -84,70 +84,6 @@ enum { REQ_LB_OP_FLUSH = 0x41, /* flush request */ }; -/* - * request type modified bits. first four bits match BIO_RW* bits, important - */ -enum rq_flag_bits { - __REQ_RW, /* not set, read. set, write */ - __REQ_FAILFAST_DEV, /* no driver retries of device errors */ - __REQ_FAILFAST_TRANSPORT, /* no driver retries of transport errors */ - __REQ_FAILFAST_DRIVER, /* no driver retries of driver errors */ - /* above flags must match BIO_RW_* */ - __REQ_DISCARD, /* request to discard sectors */ - __REQ_SORTED, /* elevator knows about this request */ - __REQ_SOFTBARRIER, /* may not be passed by ioscheduler */ - __REQ_HARDBARRIER, /* may not be passed by drive either */ - __REQ_FUA, /* forced unit access */ - __REQ_NOMERGE, /* don't touch this for merging */ - __REQ_STARTED, /* drive already may have started this one */ - __REQ_DONTPREP, /* don't call prep for this one */ - __REQ_QUEUED, /* uses queueing */ - __REQ_ELVPRIV, /* elevator private data attached */ - __REQ_FAILED, /* set if the request failed */ - __REQ_QUIET, /* don't worry about errors */ - __REQ_PREEMPT, /* set for "ide_preempt" requests */ - __REQ_ORDERED_COLOR, /* is before or after barrier */ - __REQ_RW_SYNC, /* request is sync (sync write or read) */ - __REQ_ALLOCED, /* request came from our alloc pool */ - __REQ_RW_META, /* metadata io request */ - __REQ_COPY_USER, /* contains copies of user pages */ - __REQ_INTEGRITY, /* integrity metadata has been remapped */ - __REQ_NOIDLE, /* Don't anticipate more IO after this one */ - __REQ_IO_STAT, /* account I/O stat */ - __REQ_MIXED_MERGE, /* merge of different types, fail separately */ - __REQ_NR_BITS, /* stops here */ -}; - -#define REQ_RW (1 << __REQ_RW) -#define REQ_FAILFAST_DEV (1 << __REQ_FAILFAST_DEV) -#define REQ_FAILFAST_TRANSPORT (1 << __REQ_FAILFAST_TRANSPORT) -#define REQ_FAILFAST_DRIVER (1 << __REQ_FAILFAST_DRIVER) -#define REQ_DISCARD (1 << __REQ_DISCARD) -#define REQ_SORTED (1 << __REQ_SORTED) -#define REQ_SOFTBARRIER (1 << __REQ_SOFTBARRIER) -#define REQ_HARDBARRIER (1 << __REQ_HARDBARRIER) -#define REQ_FUA (1 << __REQ_FUA) -#define REQ_NOMERGE (1 << __REQ_NOMERGE) -#define REQ_STARTED (1 << __REQ_STARTED) -#define REQ_DONTPREP (1 << __REQ_DONTPREP) -#define REQ_QUEUED (1 << __REQ_QUEUED) -#define REQ_ELVPRIV (1 << __REQ_ELVPRIV) -#define REQ_FAILED (1 << __REQ_FAILED) -#define REQ_QUIET (1 << __REQ_QUIET) -#define REQ_PREEMPT (1 << __REQ_PREEMPT) -#define REQ_ORDERED_COLOR (1 << __REQ_ORDERED_COLOR) -#define REQ_RW_SYNC (1 << __REQ_RW_SYNC) -#define REQ_ALLOCED (1 << __REQ_ALLOCED) -#define REQ_RW_META (1 << __REQ_RW_META) -#define REQ_COPY_USER (1 << __REQ_COPY_USER) -#define REQ_INTEGRITY (1 << __REQ_INTEGRITY) -#define REQ_NOIDLE (1 << __REQ_NOIDLE) -#define REQ_IO_STAT (1 << __REQ_IO_STAT) -#define REQ_MIXED_MERGE (1 << __REQ_MIXED_MERGE) - -#define REQ_FAILFAST_MASK (REQ_FAILFAST_DEV | REQ_FAILFAST_TRANSPORT | \ - REQ_FAILFAST_DRIVER) - #define BLK_MAX_CDB 16 /* @@ -631,7 +567,7 @@ enum { */ static inline bool rw_is_sync(unsigned int rw_flags) { - return !(rw_flags & REQ_RW) || (rw_flags & REQ_RW_SYNC); + return !(rw_flags & REQ_WRITE) || (rw_flags & REQ_SYNC); } static inline bool rq_is_sync(struct request *rq) -- cgit v1.2.3 From 66ac0280197981f88774e74b60c8e5f9f07c1dba Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 18 Jun 2010 16:59:42 +0200 Subject: block: don't allocate a payload for discard request Allocating a fixed payload for discard requests always was a horrible hack, and it's not coming to byte us when adding support for discard in DM/MD. So change the code to leave the allocation of a payload to the lowlevel driver. Unfortunately that means we'll need another hack, which allows us to update the various block layer length fields indicating that we have a payload. Instead of hiding this in sd.c, which we already partially do for UNMAP support add a documented helper in the core block layer for it. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3fc0f5908619..204fbe22354d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -705,6 +705,8 @@ extern struct request *blk_make_request(struct request_queue *, struct bio *, gfp_t); extern void blk_insert_request(struct request_queue *, struct request *, int, void *); extern void blk_requeue_request(struct request_queue *, struct request *); +extern void blk_add_request_payload(struct request *rq, struct page *page, + unsigned int len); extern int blk_rq_check_limits(struct request_queue *q, struct request *rq); extern int blk_lld_busy(struct request_queue *q); extern int blk_rq_prep_clone(struct request *rq, struct request *rq_src, -- cgit v1.2.3 From 28018c242a4ec7017bbbf81d2d3952f820a27118 Mon Sep 17 00:00:00 2001 From: James Bottomley Date: Thu, 1 Jul 2010 19:49:17 +0900 Subject: block: implement an unprep function corresponding directly to prep Reviewed-by: FUJITA Tomonori Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 204fbe22354d..6bba04c7ec48 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -200,6 +200,7 @@ struct request_pm_state typedef void (request_fn_proc) (struct request_queue *q); typedef int (make_request_fn) (struct request_queue *q, struct bio *bio); typedef int (prep_rq_fn) (struct request_queue *, struct request *); +typedef void (unprep_rq_fn) (struct request_queue *, struct request *); typedef void (unplug_fn) (struct request_queue *); struct bio_vec; @@ -282,6 +283,7 @@ struct request_queue request_fn_proc *request_fn; make_request_fn *make_request_fn; prep_rq_fn *prep_rq_fn; + unprep_rq_fn *unprep_rq_fn; unplug_fn *unplug_fn; merge_bvec_fn *merge_bvec_fn; prepare_flush_fn *prepare_flush_fn; @@ -841,6 +843,7 @@ extern void blk_complete_request(struct request *); extern void __blk_complete_request(struct request *); extern void blk_abort_request(struct request *); extern void blk_abort_queue(struct request_queue *); +extern void blk_unprep_request(struct request *); /* * Access functions for manipulating queue properties @@ -885,6 +888,7 @@ extern int blk_queue_dma_drain(struct request_queue *q, extern void blk_queue_lld_busy(struct request_queue *q, lld_busy_fn *fn); extern void blk_queue_segment_boundary(struct request_queue *, unsigned long); extern void blk_queue_prep_rq(struct request_queue *, prep_rq_fn *pfn); +extern void blk_queue_unprep_rq(struct request_queue *, unprep_rq_fn *ufn); extern void blk_queue_merge_bvec(struct request_queue *, merge_bvec_fn *); extern void blk_queue_dma_alignment(struct request_queue *, int); extern void blk_queue_update_dma_alignment(struct request_queue *, int); -- cgit v1.2.3 From 00fff26539bfe3fad21c164fc4002d9ede056fb0 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Sat, 3 Jul 2010 17:45:40 +0900 Subject: block: remove q->prepare_flush_fn completely This removes q->prepare_flush_fn completely (changes the blk_queue_ordered API). Signed-off-by: FUJITA Tomonori Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6bba04c7ec48..3a2c5d9a9288 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -212,7 +212,6 @@ struct bvec_merge_data { }; typedef int (merge_bvec_fn) (struct request_queue *, struct bvec_merge_data *, struct bio_vec *); -typedef void (prepare_flush_fn) (struct request_queue *, struct request *); typedef void (softirq_done_fn)(struct request *); typedef int (dma_drain_needed_fn)(struct request *); typedef int (lld_busy_fn) (struct request_queue *q); @@ -286,7 +285,6 @@ struct request_queue unprep_rq_fn *unprep_rq_fn; unplug_fn *unplug_fn; merge_bvec_fn *merge_bvec_fn; - prepare_flush_fn *prepare_flush_fn; softirq_done_fn *softirq_done_fn; rq_timed_out_fn *rq_timed_out_fn; dma_drain_needed_fn *dma_drain_needed; @@ -896,7 +894,7 @@ extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern int blk_queue_ordered(struct request_queue *, unsigned, prepare_flush_fn *); +extern int blk_queue_ordered(struct request_queue *, unsigned); extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); -- cgit v1.2.3 From a89f5c899db3c6be4bb426e4efb72ecee29a93b5 Mon Sep 17 00:00:00 2001 From: FUJITA Tomonori Date: Tue, 6 Jul 2010 09:03:18 +0200 Subject: block: remove unused REQ_TYPE_LINUX_BLOCK Nobody uses REQ_TYPE_LINUX_BLOCK (and its REQ_LB_OP_*). Signed-off-by: FUJITA Tomonori Acked-by: Jeff Garzik Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 3a2c5d9a9288..baf5258f5985 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -60,7 +60,6 @@ enum rq_cmd_type_bits { REQ_TYPE_PM_RESUME, /* resume request */ REQ_TYPE_PM_SHUTDOWN, /* shutdown request */ REQ_TYPE_SPECIAL, /* driver defined type */ - REQ_TYPE_LINUX_BLOCK, /* generic block layer message */ /* * for ATA/ATAPI devices. this really doesn't belong here, ide should * use REQ_TYPE_SPECIAL and use rq->cmd[0] with the range of driver @@ -70,20 +69,6 @@ enum rq_cmd_type_bits { REQ_TYPE_ATA_PC, }; -/* - * For request of type REQ_TYPE_LINUX_BLOCK, rq->cmd[0] is the opcode being - * sent down (similar to how REQ_TYPE_BLOCK_PC means that ->cmd[] holds a - * SCSI cdb. - * - * 0x00 -> 0x3f are driver private, to be used for whatever purpose they need, - * typically to differentiate REQ_TYPE_SPECIAL requests. - * - */ -enum { - REQ_LB_OP_EJECT = 0x40, /* eject request */ - REQ_LB_OP_FLUSH = 0x41, /* flush request */ -}; - #define BLK_MAX_CDB 16 /* -- cgit v1.2.3 From 8a6cfeb6deca3a8fefd639d898b0d163c0b5d368 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 8 Jul 2010 10:18:46 +0200 Subject: block: push down BKL into .locked_ioctl As a preparation for the removal of the big kernel lock in the block layer, this removes the BKL from the common ioctl handling code, moving it into every single driver still using it. Signed-off-by: Arnd Bergmann Acked-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index baf5258f5985..a8b05fc80c6d 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1246,7 +1246,6 @@ static inline int blk_integrity_rq(struct request *rq) struct block_device_operations { int (*open) (struct block_device *, fmode_t); int (*release) (struct gendisk *, fmode_t); - int (*locked_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*compat_ioctl) (struct block_device *, fmode_t, unsigned, unsigned long); int (*direct_access) (struct block_device *, sector_t, -- cgit v1.2.3 From edca4a380584a65a16839bdee33ec82244f0f88e Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Tue, 3 Aug 2010 12:54:51 +0200 Subject: block: disallow FS recursion from sb_issue_discard allocation Filesystems can call sb_issue_discard on a memory reclaim path (e.g. ext4 calls sb_issue_discard during journal commit). Use GFP_NOFS in sb_issue_discard to avoid recursing back into the FS. Reported-by: Mikulas Patocka Signed-off-by: Mike Snitzer Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index a8b05fc80c6d..89c855c5655c 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -933,7 +933,7 @@ static inline int sb_issue_discard(struct super_block *sb, { block <<= (sb->s_blocksize_bits - 9); nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_KERNEL, + return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); } -- cgit v1.2.3 From 8d57a98ccd0b4489003473979da8f5a1363ba7a3 Mon Sep 17 00:00:00 2001 From: Adrian Hunter Date: Wed, 11 Aug 2010 14:17:49 -0700 Subject: block: add secure discard Secure discard is the same as discard except that all copies of the discarded sectors (perhaps created by garbage collection) must also be erased. Signed-off-by: Adrian Hunter Acked-by: Jens Axboe Cc: Kyungmin Park Cc: Madhusudhan Chikkature Cc: Christoph Hellwig Cc: Ben Gardiner Cc: Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/blkdev.h | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 89c855c5655c..2c54906f678f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -389,6 +389,7 @@ struct request_queue #define QUEUE_FLAG_DISCARD 16 /* supports DISCARD */ #define QUEUE_FLAG_NOXMERGES 17 /* No extended merges */ #define QUEUE_FLAG_ADD_RANDOM 18 /* Contributes to random pool */ +#define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ (1 << QUEUE_FLAG_CLUSTER) | \ @@ -524,6 +525,8 @@ enum { #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) +#define blk_queue_secdiscard(q) (blk_queue_discard(q) && \ + test_bit(QUEUE_FLAG_SECDISCARD, &(q)->queue_flags)) #define blk_noretry_request(rq) \ ((rq)->cmd_flags & (REQ_FAILFAST_DEV|REQ_FAILFAST_TRANSPORT| \ @@ -918,10 +921,12 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } enum{ BLKDEV_WAIT, /* wait for completion */ - BLKDEV_BARRIER, /*issue request with barrier */ + BLKDEV_BARRIER, /* issue request with barrier */ + BLKDEV_SECURE, /* secure discard */ }; #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) #define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) +#define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, unsigned long); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, -- cgit v1.2.3 From 0da2f50944976e890ccc9436ab88c0da87788d02 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: ide: remove unnecessary blk_queue_flushing() test in do_ide_request() Unplugging from a request function doesn't really help much (it's already in the request_fn) and soon block layer will be updated to mix barrier sequence with other commands, so there's no need to treat queue flushing any differently. ide was the only user of blk_queue_flushing(). Remove it. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Acked-by: David S. Miller Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678f..015375c7d031 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -521,7 +521,6 @@ enum { #define blk_queue_nonrot(q) test_bit(QUEUE_FLAG_NONROT, &(q)->queue_flags) #define blk_queue_io_stat(q) test_bit(QUEUE_FLAG_IO_STAT, &(q)->queue_flags) #define blk_queue_add_random(q) test_bit(QUEUE_FLAG_ADD_RANDOM, &(q)->queue_flags) -#define blk_queue_flushing(q) ((q)->ordseq) #define blk_queue_stackable(q) \ test_bit(QUEUE_FLAG_STACKABLE, &(q)->queue_flags) #define blk_queue_discard(q) test_bit(QUEUE_FLAG_DISCARD, &(q)->queue_flags) -- cgit v1.2.3 From 6958f145459ca7ad9715024de97445addacb8510 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: kill QUEUE_ORDERED_BY_TAG Nobody is making meaningful use of ORDERED_BY_TAG now and queue draining for barrier requests will be removed soon which will render the advantage of tag ordering moot. Kill ORDERED_BY_TAG. The following users are affected. * brd: converted to ORDERED_DRAIN. * virtio_blk: ORDERED_TAG path was already marked deprecated. Removed. * xen-blkfront: ORDERED_TAG case dropped. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 17 +---------------- 1 file changed, 1 insertion(+), 16 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 015375c7d031..7077bc0d6138 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -470,12 +470,7 @@ enum { * DRAIN : ordering by draining is enough * DRAIN_FLUSH : ordering by draining w/ pre and post flushes * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - * TAG : ordering by tag is enough - * TAG_FLUSH : ordering by tag w/ pre and post flushes - * TAG_FUA : ordering by tag w/ pre flush and FUA write */ - QUEUE_ORDERED_BY_DRAIN = 0x01, - QUEUE_ORDERED_BY_TAG = 0x02, QUEUE_ORDERED_DO_PREFLUSH = 0x10, QUEUE_ORDERED_DO_BAR = 0x20, QUEUE_ORDERED_DO_POSTFLUSH = 0x40, @@ -483,8 +478,7 @@ enum { QUEUE_ORDERED_NONE = 0x00, - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_BY_DRAIN | - QUEUE_ORDERED_DO_BAR, + QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_POSTFLUSH, @@ -492,15 +486,6 @@ enum { QUEUE_ORDERED_DO_PREFLUSH | QUEUE_ORDERED_DO_FUA, - QUEUE_ORDERED_TAG = QUEUE_ORDERED_BY_TAG | - QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_TAG_FLUSH = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_TAG_FUA = QUEUE_ORDERED_TAG | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - /* * Ordered operation sequence */ -- cgit v1.2.3 From 4913efe456c987057e5d36a3f0a55422a9072cae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: deprecate barrier and replace blk_queue_ordered() with blk_queue_flush() Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA requests. Deprecate barrier. All REQ_HARDBARRIERs are failed with -EOPNOTSUPP and blk_queue_ordered() is replaced with simpler blk_queue_flush(). blk_queue_flush() takes combinations of REQ_FLUSH and FUA. If a device has write cache and can flush it, it should set REQ_FLUSH. If the device can handle FUA writes, it should also set REQ_FUA. All blk_queue_ordered() users are converted. * ORDERED_DRAIN is mapped to 0 which is the default value. * ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH. * ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA. Signed-off-by: Tejun Heo Acked-by: Boaz Harrosh Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: FUJITA Tomonori Cc: Geert Uytterhoeven Cc: David S. Miller Cc: Alasdair G Kergon Cc: Pierre Ossman Cc: Stefan Weinhuber Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7077bc0d6138..e97911d4dec3 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -355,8 +355,10 @@ struct request_queue struct blk_trace *blk_trace; #endif /* - * reserved for flush operations + * for flush operations */ + unsigned int flush_flags; + unsigned int ordered, next_ordered, ordseq; int orderr, ordcolor; struct request pre_flush_rq, bar_rq, post_flush_rq; @@ -865,8 +867,8 @@ extern void blk_queue_update_dma_alignment(struct request_queue *, int); extern void blk_queue_softirq_done(struct request_queue *, softirq_done_fn *); extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); +extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern int blk_queue_ordered(struct request_queue *, unsigned); extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); -- cgit v1.2.3 From dd831006d5be7f74c3fe7aef82380c51c3637960 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: misc cleanups in barrier code Make the following cleanups in preparation of barrier/flush update. * blk_do_ordered() declaration is moved from include/linux/blkdev.h to block/blk.h. * blk_do_ordered() now returns pointer to struct request, with %NULL meaning "try the next request" and ERR_PTR(-EAGAIN) "try again later". The third case will be dropped with further changes. * In the initialization of proxy barrier request, data direction is already set by init_request_from_bio(). Drop unnecessary explicit REQ_WRITE setting and move init_request_from_bio() above REQ_FUA flag setting. * add_request() is collapsed into __make_request(). These changes don't make any functional difference. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index e97911d4dec3..996549d71923 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -869,7 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern bool blk_do_ordered(struct request_queue *, struct request **); extern unsigned blk_ordered_cur_seq(struct request_queue *); extern unsigned blk_ordered_req_seq(struct request *); extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); -- cgit v1.2.3 From 28e7d1845216538303bb95d679d8fd4de50e2f1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: drop barrier ordering by queue draining Filesystems will take all the responsibilities for ordering requests around commit writes and will only indicate how the commit writes themselves should be handled by block layers. This patch drops barrier ordering by queue draining from block layer. Ordering by draining implementation was somewhat invasive to request handling. List of notable changes follow. * Each queue has 1 bit color which is flipped on each barrier issue. This is used to track whether a given request is issued before the current barrier or not. REQ_ORDERED_COLOR flag and coloring implementation in __elv_add_request() are removed. * Requests which shouldn't be processed yet for draining were stalled by returning -EAGAIN from blk_do_ordered() according to the test result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq(). This logic is removed. * Draining completion logic in elv_completed_request() removed. * All barrier sequence requests were queued to request queue and then trckled to lower layer according to progress and thus maintaining request orders during requeue was necessary. This is replaced by queueing the next request in the barrier sequence only after the current one is complete from blk_ordered_complete_seq(), which removes the need for multiple proxy requests in struct request_queue and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of elv_insert(). * As barriers no longer have ordering constraints, there's no need to dump the whole elevator onto the dispatch queue on each barrier. Insert barriers at the front instead. * If other barrier requests come to the front of the dispatch queue while one is already in progress, they are stored in q->pending_barriers and restored to dispatch queue one-by-one after each barrier completion from blk_ordered_complete_seq(). Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 19 ++++++++----------- 1 file changed, 8 insertions(+), 11 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 996549d71923..20a3710a481b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -360,9 +360,10 @@ struct request_queue unsigned int flush_flags; unsigned int ordered, next_ordered, ordseq; - int orderr, ordcolor; - struct request pre_flush_rq, bar_rq, post_flush_rq; + int orderr; + struct request bar_rq; struct request *orig_bar_rq; + struct list_head pending_barriers; struct mutex sysfs_lock; @@ -491,12 +492,11 @@ enum { /* * Ordered operation sequence */ - QUEUE_ORDSEQ_STARTED = 0x01, /* flushing in progress */ - QUEUE_ORDSEQ_DRAIN = 0x02, /* waiting for the queue to be drained */ - QUEUE_ORDSEQ_PREFLUSH = 0x04, /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = 0x08, /* original barrier req in progress */ - QUEUE_ORDSEQ_POSTFLUSH = 0x10, /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = 0x20, + QUEUE_ORDSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_ORDSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_ORDSEQ_BAR = (1 << 2), /* barrier write in progress */ + QUEUE_ORDSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_ORDSEQ_DONE = (1 << 4), }; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) @@ -869,9 +869,6 @@ extern void blk_queue_rq_timed_out(struct request_queue *, rq_timed_out_fn *); extern void blk_queue_rq_timeout(struct request_queue *, unsigned int); extern void blk_queue_flush(struct request_queue *q, unsigned int flush); extern struct backing_dev_info *blk_get_backing_dev_info(struct block_device *bdev); -extern unsigned blk_ordered_cur_seq(struct request_queue *); -extern unsigned blk_ordered_req_seq(struct request *); -extern bool blk_ordered_complete_seq(struct request_queue *, unsigned, int); extern int blk_rq_map_sg(struct request_queue *, struct request *, struct scatterlist *); extern void blk_dump_rq_flags(struct request *, char *); -- cgit v1.2.3 From dd4c133f387c48f526022860ad70354637a80f4c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: rename barrier/ordered to flush With ordering requirements dropped, barrier and ordered are misnomers. Now all block layer does is sequencing FLUSH and FUA. Rename them to flush. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 24 ++++++++++++------------ 1 file changed, 12 insertions(+), 12 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 20a3710a481b..1cd83ec077db 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,13 +357,13 @@ struct request_queue /* * for flush operations */ + unsigned int ordered, next_ordered; unsigned int flush_flags; - - unsigned int ordered, next_ordered, ordseq; - int orderr; - struct request bar_rq; - struct request *orig_bar_rq; - struct list_head pending_barriers; + unsigned int flush_seq; + int flush_err; + struct request flush_rq; + struct request *orig_flush_rq; + struct list_head pending_flushes; struct mutex sysfs_lock; @@ -490,13 +490,13 @@ enum { QUEUE_ORDERED_DO_FUA, /* - * Ordered operation sequence + * FLUSH/FUA sequences. */ - QUEUE_ORDSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_ORDSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_ORDSEQ_BAR = (1 << 2), /* barrier write in progress */ - QUEUE_ORDSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_ORDSEQ_DONE = (1 << 4), + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), }; #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) -- cgit v1.2.3 From 4fed947cb311e5aa51781d316cefca836352f6ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests Now that the backend conversion is complete, export sequenced FLUSH/FUA capability through REQ_FLUSH/FUA flags. REQ_FLUSH means the device cache should be flushed before executing the request. REQ_FUA means that the data in the request should be on non-volatile media on completion. Block layer will choose the correct way of implementing the semantics and execute it. The request may be passed to the device directly if the device can handle it; otherwise, it will be sequenced using one or more proxy requests. Devices will never see REQ_FLUSH and/or FUA which it doesn't support. Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are never failed with -EOPNOTSUPP. If the underlying device doesn't support FLUSH/FUA, the block layer simply make those noop. IOW, it no longer distinguishes between writeback cache which doesn't support cache flush and writethrough/no cache. Devices which have WB cache w/o flush are very difficult to come by these days and there's nothing much we can do anyway, so it doesn't make sense to require everyone to implement -EOPNOTSUPP handling. This will simplify filesystems and block drivers as they can drop -EOPNOTSUPP retry logic for barriers. * QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into blk-flush.c. * REQ_FLUSH w/o data can also be directly passed to drivers without sequencing but some drivers assume that zero length requests don't have rq->bio which isn't true for these requests requiring the use of proxy requests. * REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are copied from bio to request. * WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and WRITE_FLUSH_FUA are added. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 38 ++------------------------------------ 1 file changed, 2 insertions(+), 36 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1cd83ec077db..8ef705f800ab 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -357,7 +357,6 @@ struct request_queue /* * for flush operations */ - unsigned int ordered, next_ordered; unsigned int flush_flags; unsigned int flush_seq; int flush_err; @@ -465,40 +464,6 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) __clear_bit(flag, &q->queue_flags); } -enum { - /* - * Hardbarrier is supported with one of the following methods. - * - * NONE : hardbarrier unsupported - * DRAIN : ordering by draining is enough - * DRAIN_FLUSH : ordering by draining w/ pre and post flushes - * DRAIN_FUA : ordering by draining w/ pre flush and FUA write - */ - QUEUE_ORDERED_DO_PREFLUSH = 0x10, - QUEUE_ORDERED_DO_BAR = 0x20, - QUEUE_ORDERED_DO_POSTFLUSH = 0x40, - QUEUE_ORDERED_DO_FUA = 0x80, - - QUEUE_ORDERED_NONE = 0x00, - - QUEUE_ORDERED_DRAIN = QUEUE_ORDERED_DO_BAR, - QUEUE_ORDERED_DRAIN_FLUSH = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_POSTFLUSH, - QUEUE_ORDERED_DRAIN_FUA = QUEUE_ORDERED_DRAIN | - QUEUE_ORDERED_DO_PREFLUSH | - QUEUE_ORDERED_DO_FUA, - - /* - * FLUSH/FUA sequences. - */ - QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ - QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ - QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ - QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ - QUEUE_FSEQ_DONE = (1 << 4), -}; - #define blk_queue_plugged(q) test_bit(QUEUE_FLAG_PLUGGED, &(q)->queue_flags) #define blk_queue_tagged(q) test_bit(QUEUE_FLAG_QUEUED, &(q)->queue_flags) #define blk_queue_stopped(q) test_bit(QUEUE_FLAG_STOPPED, &(q)->queue_flags) @@ -578,7 +543,8 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER) + (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ + REQ_FLUSH | REQ_FUA) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ -- cgit v1.2.3 From 2cf6d26a354ab6362e301b5a323832b02867df47 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:10 -0400 Subject: block: pass gfp_mask and flags to sb_issue_discard We'll need to get rid of the BLKDEV_IFL_BARRIER flag, and to facilitate that and to make the interface less confusing pass all flags explicitly. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8ef705f800ab..6b305eb4a343 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -881,13 +881,12 @@ extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); -static inline int sb_issue_discard(struct super_block *sb, - sector_t block, sector_t nr_blocks) +static inline int sb_issue_discard(struct super_block *sb, sector_t block, + sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { - block <<= (sb->s_blocksize_bits - 9); - nr_blocks <<= (sb->s_blocksize_bits - 9); - return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, - BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); + return blkdev_issue_discard(sb->s_bdev, block << (sb->s_blocksize_bits - 9), + nr_blocks << (sb->s_blocksize_bits - 9), + gfp_mask, flags); } extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -- cgit v1.2.3 From 8c5553678237b7121355108e03c36086037d8975 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:22 -0400 Subject: block: remove the BLKDEV_IFL_BARRIER flag Remove support for barriers on discards, which is unused now. Also remove the DISCARD_NOBARRIER I/O type in favour of just setting the rw flags up locally in blkdev_issue_discard. tj: Also remove DISCARD_SECURE and use REQ_SECURE directly. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 6b305eb4a343..cfcb3a610605 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -869,11 +869,9 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, } enum{ BLKDEV_WAIT, /* wait for completion */ - BLKDEV_BARRIER, /* issue request with barrier */ BLKDEV_SECURE, /* secure discard */ }; #define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) -#define BLKDEV_IFL_BARRIER (1 << BLKDEV_BARRIER) #define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, unsigned long); -- cgit v1.2.3 From 13f05c8d8e98bbdce89158bfdb2e380940695a88 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Fri, 10 Sep 2010 20:50:10 +0200 Subject: block/scsi: Provide a limit on the number of integrity segments Some controllers have a hardware limit on the number of protection information scatter-gather list segments they can handle. Introduce a max_integrity_segments limit in the block layer and provide a new scsi_host_template setting that allows HBA drivers to provide a value suitable for the hardware. Add support for honoring the integrity segment limit when merging both bios and requests. Signed-off-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 33 +++++++++++++++++++++++++++++---- 1 file changed, 29 insertions(+), 4 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678f..7e661106270a 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -124,6 +124,9 @@ struct request { * physical address coalescing is performed. */ unsigned short nr_phys_segments; +#if defined(CONFIG_BLK_DEV_INTEGRITY) + unsigned short nr_integrity_segments; +#endif unsigned short ioprio; @@ -243,6 +246,7 @@ struct queue_limits { unsigned short logical_block_size; unsigned short max_segments; + unsigned short max_integrity_segments; unsigned char misaligned; unsigned char discard_misaligned; @@ -1213,8 +1217,13 @@ struct blk_integrity { extern int blk_integrity_register(struct gendisk *, struct blk_integrity *); extern void blk_integrity_unregister(struct gendisk *); extern int blk_integrity_compare(struct gendisk *, struct gendisk *); -extern int blk_rq_map_integrity_sg(struct request *, struct scatterlist *); -extern int blk_rq_count_integrity_sg(struct request *); +extern int blk_rq_map_integrity_sg(struct request_queue *, struct bio *, + struct scatterlist *); +extern int blk_rq_count_integrity_sg(struct request_queue *, struct bio *); +extern int blk_integrity_merge_rq(struct request_queue *, struct request *, + struct request *); +extern int blk_integrity_merge_bio(struct request_queue *, struct request *, + struct bio *); static inline struct blk_integrity *bdev_get_integrity(struct block_device *bdev) @@ -1235,16 +1244,32 @@ static inline int blk_integrity_rq(struct request *rq) return bio_integrity(rq->bio); } +static inline void blk_queue_max_integrity_segments(struct request_queue *q, + unsigned int segs) +{ + q->limits.max_integrity_segments = segs; +} + +static inline unsigned short +queue_max_integrity_segments(struct request_queue *q) +{ + return q->limits.max_integrity_segments; +} + #else /* CONFIG_BLK_DEV_INTEGRITY */ #define blk_integrity_rq(rq) (0) -#define blk_rq_count_integrity_sg(a) (0) -#define blk_rq_map_integrity_sg(a, b) (0) +#define blk_rq_count_integrity_sg(a, b) (0) +#define blk_rq_map_integrity_sg(a, b, c) (0) #define bdev_get_integrity(a) (0) #define blk_get_integrity(a) (0) #define blk_integrity_compare(a, b) (0) #define blk_integrity_register(a, b) (0) #define blk_integrity_unregister(a) do { } while (0); +#define blk_queue_max_integrity_segments(a, b) do { } while (0); +#define queue_max_integrity_segments(a) (0) +#define blk_integrity_merge_rq(a, b, c) (0) +#define blk_integrity_merge_bio(a, b, c) (0) #endif /* CONFIG_BLK_DEV_INTEGRITY */ -- cgit v1.2.3 From 144177991ca624841ddbd1e7edff958fc0f6d1fe Mon Sep 17 00:00:00 2001 From: Namhyung Kim Date: Wed, 15 Sep 2010 13:08:27 +0200 Subject: block: fix an address space warning in blk-map.c Change type of 2nd parameter of blk_rq_aligned() into unsigned long and remove unnecessary casting. Now we can call it with 'uaddr' instead of 'ubuf' in __blk_rq_map_user() so that it can remove following warnings from sparse: block/blk-map.c:57:31: warning: incorrect type in argument 2 (different address spaces) block/blk-map.c:57:31: expected void *addr block/blk-map.c:57:31: got void [noderef] *ubuf However blk_rq_map_kern() needs one more local variable to handle it. Signed-off-by: Namhyung Kim Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7e661106270a..780824edac16 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1097,11 +1097,11 @@ static inline int queue_dma_alignment(struct request_queue *q) return q ? q->dma_alignment : 511; } -static inline int blk_rq_aligned(struct request_queue *q, void *addr, +static inline int blk_rq_aligned(struct request_queue *q, unsigned long addr, unsigned int len) { unsigned int alignment = queue_dma_alignment(q) | q->dma_pad_mask; - return !((unsigned long)addr & alignment) && !(len & alignment); + return !(addr & alignment) && !(len & alignment); } /* assumes size > 256 */ -- cgit v1.2.3 From e43473b7f223ec866f7db273697e76c337c390f9 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 15 Sep 2010 17:06:35 -0400 Subject: blkio: Core implementation of throttle policy o Actual implementation of throttling policy in block layer. Currently it implements READ and WRITE bytes per second throttling logic. IOPS throttling comes in later patches. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 780824edac16..1341df5806df 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -371,6 +371,11 @@ struct request_queue #if defined(CONFIG_BLK_DEV_BSG) struct bsg_class_device bsg_dev; #endif + +#ifdef CONFIG_BLK_DEV_THROTTLING + /* Throttle data */ + struct throtl_data *td; +#endif }; #define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ @@ -1131,6 +1136,7 @@ static inline void put_dev_sector(Sector p) struct work_struct; int kblockd_schedule_work(struct request_queue *q, struct work_struct *work); +int kblockd_schedule_delayed_work(struct request_queue *q, struct delayed_work *dwork, unsigned long delay); #ifdef CONFIG_BLK_CGROUP /* @@ -1174,6 +1180,24 @@ static inline uint64_t rq_io_start_time_ns(struct request *req) } #endif +#ifdef CONFIG_BLK_DEV_THROTTLING +extern int blk_throtl_init(struct request_queue *q); +extern void blk_throtl_exit(struct request_queue *q); +extern int blk_throtl_bio(struct request_queue *q, struct bio **bio); +extern void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay); +extern void throtl_shutdown_timer_wq(struct request_queue *q); +#else /* CONFIG_BLK_DEV_THROTTLING */ +static inline int blk_throtl_bio(struct request_queue *q, struct bio **bio) +{ + return 0; +} + +static inline int blk_throtl_init(struct request_queue *q) { return 0; } +static inline int blk_throtl_exit(struct request_queue *q) { return 0; } +static inline void throtl_schedule_delayed_work(struct request_queue *q, unsigned long delay) {} +static inline void throtl_shutdown_timer_wq(struct request_queue *q) {} +#endif /* CONFIG_BLK_DEV_THROTTLING */ + #define MODULE_ALIAS_BLOCKDEV(major,minor) \ MODULE_ALIAS("block-major-" __stringify(major) "-" __stringify(minor)) #define MODULE_ALIAS_BLOCKDEV_MAJOR(major) \ -- cgit v1.2.3 From dd3932eddf428571762596e17b65f5dc92ca361b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Sep 2010 20:51:46 +0200 Subject: block: remove BLKDEV_IFL_WAIT All the blkdev_issue_* helpers can only sanely be used for synchronous caller. To issue cache flushes or barriers asynchronously the caller needs to set up a bio by itself with a completion callback to move the asynchronous state machine ahead. So drop the BLKDEV_IFL_WAIT flag that is always specified when calling blkdev_issue_* and also remove the now unused flags argument to blkdev_issue_flush and blkdev_issue_zeroout. For blkdev_issue_discard we need to keep it for the secure discard flag, which gains a more descriptive name and loses the bitops vs flag confusion. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 14 +++++--------- 1 file changed, 5 insertions(+), 9 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index cfcb3a610605..accbd0e5c893 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -867,18 +867,14 @@ static inline struct request *blk_map_queue_find_tag(struct blk_queue_tag *bqt, return NULL; return bqt->tag_index[tag]; } -enum{ - BLKDEV_WAIT, /* wait for completion */ - BLKDEV_SECURE, /* secure discard */ -}; -#define BLKDEV_IFL_WAIT (1 << BLKDEV_WAIT) -#define BLKDEV_IFL_SECURE (1 << BLKDEV_SECURE) -extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *, - unsigned long); + +#define BLKDEV_DISCARD_SECURE 0x01 /* secure discard */ + +extern int blkdev_issue_flush(struct block_device *, gfp_t, sector_t *); extern int blkdev_issue_discard(struct block_device *bdev, sector_t sector, sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); extern int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags); + sector_t nr_sects, gfp_t gfp_mask); static inline int sb_issue_discard(struct super_block *sb, sector_t block, sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) { -- cgit v1.2.3 From 892b6f90db81cccb723d5d92f4fddc2d68b206e1 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 13 Oct 2010 21:18:03 +0200 Subject: block: Ensure physical block size is unsigned int Physical block size was declared unsigned int to accomodate the maximum size reported by READ CAPACITY(16). Make sure we use the right type in the related functions. Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 1341df5806df..8f3dd981b973 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -860,7 +860,7 @@ extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); extern void blk_queue_max_discard_sectors(struct request_queue *q, unsigned int max_discard_sectors); extern void blk_queue_logical_block_size(struct request_queue *, unsigned short); -extern void blk_queue_physical_block_size(struct request_queue *, unsigned short); +extern void blk_queue_physical_block_size(struct request_queue *, unsigned int); extern void blk_queue_alignment_offset(struct request_queue *q, unsigned int alignment); extern void blk_limits_io_min(struct queue_limits *limits, unsigned int min); @@ -1013,7 +1013,7 @@ static inline unsigned int queue_physical_block_size(struct request_queue *q) return q->limits.physical_block_size; } -static inline int bdev_physical_block_size(struct block_device *bdev) +static inline unsigned int bdev_physical_block_size(struct block_device *bdev) { return queue_physical_block_size(bdev_get_queue(bdev)); } -- cgit v1.2.3 From 7681bfeeccff5efa9eb29bf09249a3c400b15327 Mon Sep 17 00:00:00 2001 From: Yasuaki Ishimatsu Date: Tue, 19 Oct 2010 09:05:00 +0200 Subject: block: fix accounting bug on cross partition merges /proc/diskstats would display a strange output as follows. $ cat /proc/diskstats |grep sda 8 0 sda 90524 7579 102154 20464 0 0 0 0 0 14096 20089 8 1 sda1 19085 1352 21841 4209 0 0 0 0 4294967064 15689 4293424691 ~~~~~~~~~~ 8 2 sda2 71252 3624 74891 15950 0 0 0 0 232 23995 1562390 8 3 sda3 54 487 2188 92 0 0 0 0 0 88 92 8 4 sda4 4 0 8 0 0 0 0 0 0 0 0 8 5 sda5 81 2027 2130 138 0 0 0 0 0 87 137 Its reason is the wrong way of accounting hd_struct->in_flight. When a bio is merged into a request belongs to different partition by ELEVATOR_FRONT_MERGE. The detailed root cause is as follows. Assuming that there are two partition, sda1 and sda2. 1. A request for sda2 is in request_queue. Hence sda1's hd_struct->in_flight is 0 and sda2's one is 1. | hd_struct->in_flight --------------------------- sda1 | 0 sda2 | 1 --------------------------- 2. A bio belongs to sda1 is issued and is merged into the request mentioned on step1 by ELEVATOR_BACK_MERGE. The first sector of the request is changed from sda2 region to sda1 region. However the two partition's hd_struct->in_flight are not changed. | hd_struct->in_flight --------------------------- sda1 | 0 sda2 | 1 --------------------------- 3. The request is finished and blk_account_io_done() is called. In this case, sda2's hd_struct->in_flight, not a sda1's one, is decremented. | hd_struct->in_flight --------------------------- sda1 | -1 sda2 | 1 --------------------------- The patch fixes the problem by caching the partition lookup inside the request structure, hence making sure that the increment and decrement will always happen on the same partition struct. This also speeds up IO with accounting enabled, since it cuts down on the number of lookups we have to do. When reloading partition tables, quiesce IO to ensure that no request references to the partition struct exists. When it is safe to free the partition table, the IO for that device is restarted again. Signed-off-by: Yasuaki Ishimatsu Cc: stable@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 8f3dd981b973..16f7f1be1acf 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -115,6 +115,7 @@ struct request { void *elevator_private3; struct gendisk *rq_disk; + struct hd_struct *part; unsigned long start_time; #ifdef CONFIG_BLK_CGROUP unsigned long long start_time_ns; -- cgit v1.2.3 From f253b86b4ad1b3220544e75880510fd455ebd23f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Sun, 24 Oct 2010 22:06:02 +0200 Subject: Revert "block: fix accounting bug on cross partition merges" This reverts commit 7681bfeeccff5efa9eb29bf09249a3c400b15327. Conflicts: include/linux/genhd.h It has numerous issues with the cleanup path and non-elevator devices. Revert it for now so we can come up with a clean version without rushing things. Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 009b80e49f53..646b462d04df 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -115,7 +115,6 @@ struct request { void *elevator_private3; struct gendisk *rq_disk; - struct hd_struct *part; unsigned long start_time; #ifdef CONFIG_BLK_CGROUP unsigned long long start_time_ns; -- cgit v1.2.3 From e6fa0be699449d28a20e815bfe9ce26725ec4962 Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Wed, 27 Oct 2010 21:30:04 -0400 Subject: Add helper function for blkdev_issue_zeroout (sb_issue_discard) This is done the same way as helper sb_issue_discard for blkdev_issue_discard. Signed-off-by: Lukas Czerner Signed-off-by: "Theodore Ts'o" --- include/linux/blkdev.h | 8 ++++++++ 1 file changed, 8 insertions(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 2c54906f678f..e5cb4d029689 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -941,6 +941,14 @@ static inline int sb_issue_discard(struct super_block *sb, return blkdev_issue_discard(sb->s_bdev, block, nr_blocks, GFP_NOFS, BLKDEV_IFL_WAIT | BLKDEV_IFL_BARRIER); } +static inline int sb_issue_zeroout(struct super_block *sb, sector_t block, + sector_t nr_blocks, gfp_t gfp_mask, unsigned long flags) +{ + return blkdev_issue_zeroout(sb->s_bdev, + block << (sb->s_blocksize_bits - 9), + nr_blocks << (sb->s_blocksize_bits - 9), + gfp_mask, flags); +} extern int blk_verify_command(unsigned char *cmd, fmode_t has_write_perm); -- cgit v1.2.3 From 02e031cbc843b010e72fcc05c76113c688b2860f Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 10 Nov 2010 14:54:09 +0100 Subject: block: remove REQ_HARDBARRIER REQ_HARDBARRIER is dead now, so remove the leftovers. What's left at this point is: - various checks inside the block layer. - sanity checks in bio based drivers. - now unused bio_empty_barrier helper. - Xen blockfront use of BLKIF_OP_WRITE_BARRIER - it's dead for a while, but Xen really needs to sort out it's barrier situaton. - setting of ordered tags in uas - dead code copied from old scsi drivers. - scsi different retry for barriers - it's dead and should have been removed when flushes were converted to FS requests. - blktrace handling of barriers - removed. Someone who knows blktrace better should add support for REQ_FLUSH and REQ_FUA, though. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 5027a599077d..aae86fd10c4f 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -552,8 +552,7 @@ static inline void blk_clear_queue_full(struct request_queue *q, int sync) * it already be started by driver. */ #define RQ_NOMERGE_FLAGS \ - (REQ_NOMERGE | REQ_STARTED | REQ_HARDBARRIER | REQ_SOFTBARRIER | \ - REQ_FLUSH | REQ_FUA) + (REQ_NOMERGE | REQ_STARTED | REQ_SOFTBARRIER | REQ_FLUSH | REQ_FUA) #define rq_mergeable(rq) \ (!((rq)->cmd_flags & RQ_NOMERGE_FLAGS) && \ (((rq)->cmd_flags & REQ_DISCARD) || \ -- cgit v1.2.3 From e692cb668fdd5a712c6ed2a2d6f2a36ee83997b4 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Wed, 1 Dec 2010 19:41:49 +0100 Subject: block: Deprecate QUEUE_FLAG_CLUSTER and use queue_limits instead When stacking devices, a request_queue is not always available. This forced us to have a no_cluster flag in the queue_limits that could be used as a carrier until the request_queue had been set up for a metadevice. There were several problems with that approach. First of all it was up to the stacking device to remember to set queue flag after stacking had completed. Also, the queue flag and the queue limits had to be kept in sync at all times. We got that wrong, which could lead to us issuing commands that went beyond the max scatterlist limit set by the driver. The proper fix is to avoid having two flags for tracking the same thing. We deprecate QUEUE_FLAG_CLUSTER and use the queue limit directly in the block layer merging functions. The queue_limit 'no_cluster' is turned into 'cluster' to avoid double negatives and to ease stacking. Clustering defaults to being enabled as before. The queue flag logic is removed from the stacking function, and explicitly setting the cluster flag is no longer necessary in DM and MD. Reported-by: Ed Lin Signed-off-by: Martin K. Petersen Acked-by: Mike Snitzer Cc: stable@kernel.org Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index aae86fd10c4f..95aeeeb49e8b 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -250,7 +250,7 @@ struct queue_limits { unsigned char misaligned; unsigned char discard_misaligned; - unsigned char no_cluster; + unsigned char cluster; signed char discard_zeroes_data; }; @@ -380,7 +380,6 @@ struct request_queue #endif }; -#define QUEUE_FLAG_CLUSTER 0 /* cluster several segments into 1 */ #define QUEUE_FLAG_QUEUED 1 /* uses generic tag queueing */ #define QUEUE_FLAG_STOPPED 2 /* queue is stopped */ #define QUEUE_FLAG_SYNCFULL 3 /* read queue has been filled */ @@ -403,7 +402,6 @@ struct request_queue #define QUEUE_FLAG_SECDISCARD 19 /* supports SECDISCARD */ #define QUEUE_FLAG_DEFAULT ((1 << QUEUE_FLAG_IO_STAT) | \ - (1 << QUEUE_FLAG_CLUSTER) | \ (1 << QUEUE_FLAG_STACKABLE) | \ (1 << QUEUE_FLAG_SAME_COMP) | \ (1 << QUEUE_FLAG_ADD_RANDOM)) @@ -510,6 +508,11 @@ static inline void queue_flag_clear(unsigned int flag, struct request_queue *q) #define rq_data_dir(rq) ((rq)->cmd_flags & 1) +static inline unsigned int blk_queue_cluster(struct request_queue *q) +{ + return q->limits.cluster; +} + /* * We regard a request as sync, if either a read or a sync write */ -- cgit v1.2.3 From 72d4cd9f38b5ed96b75df4c622be25e1c2648dd3 Mon Sep 17 00:00:00 2001 From: Mike Snitzer Date: Fri, 17 Dec 2010 08:34:20 +0100 Subject: block: max hardware sectors limit wrapper Implement blk_limits_max_hw_sectors() and make blk_queue_max_hw_sectors() a wrapper around it. DM needs this to avoid setting queue_limits' max_hw_sectors and max_sectors directly. dm_set_device_limits() now leverages blk_limits_max_hw_sectors() logic to establish the appropriate max_hw_sectors minimum (PAGE_SIZE). Fixes issue where DM was incorrectly setting max_sectors rather than max_hw_sectors (which caused dm_merge_bvec()'s max_hw_sectors check to be ineffective). Signed-off-by: Mike Snitzer Cc: stable@kernel.org Acked-by: Martin K. Petersen Signed-off-by: Jens Axboe --- include/linux/blkdev.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux/blkdev.h') diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 95aeeeb49e8b..36ab42c9bb99 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -808,6 +808,7 @@ extern struct request_queue *blk_init_allocated_queue(struct request_queue *, extern void blk_cleanup_queue(struct request_queue *); extern void blk_queue_make_request(struct request_queue *, make_request_fn *); extern void blk_queue_bounce_limit(struct request_queue *, u64); +extern void blk_limits_max_hw_sectors(struct queue_limits *, unsigned int); extern void blk_queue_max_hw_sectors(struct request_queue *, unsigned int); extern void blk_queue_max_segments(struct request_queue *, unsigned short); extern void blk_queue_max_segment_size(struct request_queue *, unsigned int); -- cgit v1.2.3