From b203886edbcaac3ca427cf4dbcb50b18bdb346fd Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 28 Jun 2008 08:31:50 +1000 Subject: md: kill STRIPE_OP_MOD_DMA in raid5 offload From: Dan Williams This micro-optimization allowed the raid code to skip a re-read of the parity block after checking parity. It took advantage of the fact that xor-offload-engines have their own internal result buffer and can check parity without writing to memory. Remove it for the following reasons: 1/ It is a layering violation for MD to need to manage the DMA and non-DMA paths within async_xor_zero_sum 2/ Bad precedent to toggle the 'ops' flags outside the lock 3/ Hard to realize a performance gain as reads will not need an updated parity block and writes will dirty it anyways. Signed-off-by: Dan Williams Signed-off-by: Neil Brown --- include/linux/raid/raid5.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux/raid/raid5.h') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index f0827d31ae6f..4ecae31a3dcb 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -267,10 +267,8 @@ struct r6_state { /* modifiers to the base operations * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back - * STRIPE_OP_MOD_DMA_CHECK - parity is not corrupted by the check */ #define STRIPE_OP_MOD_REPAIR_PD 7 -#define STRIPE_OP_MOD_DMA_CHECK 8 /* * Plugging: -- cgit v1.2.3 From 2b7497f0e0a0b9cf21d822e427d5399b2056501a Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 28 Jun 2008 08:31:52 +1000 Subject: md: kill STRIPE_OP_IO flag From: Dan Williams The R5_Want{Read,Write} flags already gate i/o. So, this flag is superfluous and we can unconditionally call ops_run_io(). Signed-off-by: Dan Williams Signed-off-by: Neil Brown --- include/linux/raid/raid5.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux/raid/raid5.h') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 4ecae31a3dcb..1301195abf4b 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -263,7 +263,6 @@ struct r6_state { #define STRIPE_OP_BIODRAIN 3 #define STRIPE_OP_POSTXOR 4 #define STRIPE_OP_CHECK 5 -#define STRIPE_OP_IO 6 /* modifiers to the base operations * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back -- cgit v1.2.3 From ecc65c9b3f9b9d740a5deade3d85b39be56401b6 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 28 Jun 2008 08:31:57 +1000 Subject: md: replace STRIPE_OP_CHECK with 'check_states' From: Dan Williams The STRIPE_OP_* flags record the state of stripe operations which are performed outside the stripe lock. Their use in indicating which operations need to be run is straightforward; however, interpolating what the next state of the stripe should be based on a given combination of these flags is not straightforward, and has led to bugs. An easier to read implementation with minimal degrees of freedom is needed. Towards this goal, this patch introduces explicit states to replace what was previously interpolated from the STRIPE_OP_* flags. For now this only converts the handle_parity_checks5 path, removing a user of the ops.{pending,ack,complete,count} fields of struct stripe_operations. This conversion also found a remaining issue with the current code. There is a small window for a drive to fail between when we schedule a repair and when the parity calculation for that repair completes. When this happens we will writeback to 'failed_num' when we really want to write back to 'pd_idx'. Signed-off-by: Dan Williams Signed-off-by: Neil Brown --- include/linux/raid/raid5.h | 46 ++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 40 insertions(+), 6 deletions(-) (limited to 'include/linux/raid/raid5.h') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 1301195abf4b..2c96d5fd54bf 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -158,6 +158,41 @@ * the compute block completes. */ +/* + * Operations state - intermediate states that are visible outside of sh->lock + * In general _idle indicates nothing is running, _run indicates a data + * processing operation is active, and _result means the data processing result + * is stable and can be acted upon. For simple operations like biofill and + * compute that only have an _idle and _run state they are indicated with + * sh->state flags (STRIPE_BIOFILL_RUN and STRIPE_COMPUTE_RUN) + */ +/** + * enum check_states - handles syncing / repairing a stripe + * @check_state_idle - check operations are quiesced + * @check_state_run - check operation is running + * @check_state_result - set outside lock when check result is valid + * @check_state_compute_run - check failed and we are repairing + * @check_state_compute_result - set outside lock when compute result is valid + */ +enum check_states { + check_state_idle = 0, + check_state_run, /* parity check */ + check_state_check_result, + check_state_compute_run, /* parity repair */ + check_state_compute_result, +}; + +/** + * enum reconstruct_states - handles writing or expanding a stripe + */ +enum reconstruct_states { + reconstruct_state_idle = 0, + reconstruct_state_drain_run, /* write */ + reconstruct_state_run, /* expand */ + reconstruct_state_drain_result, + reconstruct_state_result, +}; + struct stripe_head { struct hlist_node hash; struct list_head lru; /* inactive_list or handle_list */ @@ -169,6 +204,7 @@ struct stripe_head { spinlock_t lock; int bm_seq; /* sequence number for bitmap flushes */ int disks; /* disks in stripe */ + enum check_states check_state; /* stripe_operations * @pending - pending ops flags (set for request->issue->complete) * @ack - submitted ops flags (set for issue->complete) @@ -202,6 +238,7 @@ struct stripe_head_state { int locked, uptodate, to_read, to_write, failed, written; int to_fill, compute, req_compute, non_overwrite; int failed_num; + unsigned long ops_request; }; /* r6_state - extra state data only relevant to r6 */ @@ -254,8 +291,10 @@ struct r6_state { #define STRIPE_EXPAND_READY 11 #define STRIPE_IO_STARTED 12 /* do not count towards 'bypass_count' */ #define STRIPE_FULL_WRITE 13 /* all blocks are set to be overwritten */ +#define STRIPE_BIOFILL_RUN 14 +#define STRIPE_COMPUTE_RUN 15 /* - * Operations flags (in issue order) + * Operation request flags */ #define STRIPE_OP_BIOFILL 0 #define STRIPE_OP_COMPUTE_BLK 1 @@ -264,11 +303,6 @@ struct r6_state { #define STRIPE_OP_POSTXOR 4 #define STRIPE_OP_CHECK 5 -/* modifiers to the base operations - * STRIPE_OP_MOD_REPAIR_PD - compute the parity block and write it back - */ -#define STRIPE_OP_MOD_REPAIR_PD 7 - /* * Plugging: * -- cgit v1.2.3 From 600aa10993012ff2dd5617720dac081e4f992017 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 28 Jun 2008 08:32:05 +1000 Subject: md: replace STRIPE_OP_{BIODRAIN,PREXOR,POSTXOR} with 'reconstruct_states' From: Dan Williams Track the state of reconstruct operations (recalculating the parity block usually due to incoming writes, or as part of array expansion) Reduces the scope of the STRIPE_OP_{BIODRAIN,PREXOR,POSTXOR} flags to only tracking whether a reconstruct operation has been requested via the ops_request field of struct stripe_head_state. This is the final step in the removal of ops.{pending,ack,complete,count}, i.e. the STRIPE_OP_{BIODRAIN,PREXOR,POSTXOR} flags only request an operation and do not track the state of the operation. Signed-off-by: Dan Williams Signed-off-by: Neil Brown --- include/linux/raid/raid5.h | 9 +-------- 1 file changed, 1 insertion(+), 8 deletions(-) (limited to 'include/linux/raid/raid5.h') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 2c96d5fd54bf..5f3e674b87dd 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -205,19 +205,12 @@ struct stripe_head { int bm_seq; /* sequence number for bitmap flushes */ int disks; /* disks in stripe */ enum check_states check_state; + enum reconstruct_states reconstruct_state; /* stripe_operations - * @pending - pending ops flags (set for request->issue->complete) - * @ack - submitted ops flags (set for issue->complete) - * @complete - completed ops flags (set for complete) * @target - STRIPE_OP_COMPUTE_BLK target - * @count - raid5_runs_ops is set to run when this is non-zero */ struct stripe_operations { - unsigned long pending; - unsigned long ack; - unsigned long complete; int target; - int count; u32 zero_sum_result; } ops; struct r5dev { -- cgit v1.2.3 From d8ee0728b5b30d7a6f62c399a95e953616d31f23 Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Sat, 28 Jun 2008 08:32:06 +1000 Subject: md: replace R5_WantPrexor with R5_WantDrain, add 'prexor' reconstruct_states From: Dan Williams Currently ops_run_biodrain and other locations have extra logic to determine which blocks are processed in the prexor and non-prexor cases. This can be eliminated if handle_write_operations5 flags the blocks to be processed in all cases via R5_Wantdrain. The presence of the prexor operation is tracked in sh->reconstruct_state. Signed-off-by: Dan Williams Signed-off-by: Neil Brown --- include/linux/raid/raid5.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux/raid/raid5.h') diff --git a/include/linux/raid/raid5.h b/include/linux/raid/raid5.h index 5f3e674b87dd..3b2672792457 100644 --- a/include/linux/raid/raid5.h +++ b/include/linux/raid/raid5.h @@ -187,8 +187,10 @@ enum check_states { */ enum reconstruct_states { reconstruct_state_idle = 0, + reconstruct_state_prexor_drain_run, /* prexor-write */ reconstruct_state_drain_run, /* write */ reconstruct_state_run, /* expand */ + reconstruct_state_prexor_drain_result, reconstruct_state_drain_result, reconstruct_state_result, }; @@ -258,9 +260,7 @@ struct r6_state { #define R5_Wantfill 12 /* dev->toread contains a bio that needs * filling */ -#define R5_Wantprexor 13 /* distinguish blocks ready for rmw from - * other "towrites" - */ +#define R5_Wantdrain 13 /* dev->towrite needs to be drained */ /* * Write method */ -- cgit v1.2.3