From 288f422ec13667de40b278535d2a5fb5c77352c4 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 27 May 2010 15:07:43 +0200 Subject: drbd: Track all IO requests on the TL, not writes only With that the drbd_fail_pending_reads() function becomes obsolete. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 37 ------------------------------------- 1 file changed, 37 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 081522d3c742..88a5e1f4ec1d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3666,41 +3666,6 @@ static void drbdd(struct drbd_conf *mdev) } } -static void drbd_fail_pending_reads(struct drbd_conf *mdev) -{ - struct hlist_head *slot; - struct hlist_node *pos; - struct hlist_node *tmp; - struct drbd_request *req; - int i; - - /* - * Application READ requests - */ - spin_lock_irq(&mdev->req_lock); - for (i = 0; i < APP_R_HSIZE; i++) { - slot = mdev->app_reads_hash+i; - hlist_for_each_entry_safe(req, pos, tmp, slot, colision) { - /* it may (but should not any longer!) - * be on the work queue; if that assert triggers, - * we need to also grab the - * spin_lock_irq(&mdev->data.work.q_lock); - * and list_del_init here. */ - D_ASSERT(list_empty(&req->w.list)); - /* It would be nice to complete outside of spinlock. - * But this is easier for now. */ - _req_mod(req, connection_lost_while_pending); - } - } - for (i = 0; i < APP_R_HSIZE; i++) - if (!hlist_empty(mdev->app_reads_hash+i)) - dev_warn(DEV, "ASSERT FAILED: app_reads_hash[%d].first: " - "%p, should be NULL\n", i, mdev->app_reads_hash[i].first); - - memset(mdev->app_reads_hash, 0, APP_R_HSIZE*sizeof(void *)); - spin_unlock_irq(&mdev->req_lock); -} - void drbd_flush_workqueue(struct drbd_conf *mdev) { struct drbd_wq_barrier barr; @@ -3770,8 +3735,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (!mdev->state.susp) tl_clear(mdev); - drbd_fail_pending_reads(mdev); - dev_info(DEV, "Connection closed\n"); drbd_md_sync(mdev); -- cgit v1.2.3 From 11b58e73a3a3d1bbb582370d59f9b2c4d0136b42 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 12 May 2010 17:08:26 +0200 Subject: drbd: factored tl_restart() out of tl_clear(). If IO was frozen for a temporal network outage, resend the content of the transfer-log into the newly established connection. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 88a5e1f4ec1d..8daa920c40a4 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -776,9 +776,6 @@ static int drbd_connect(struct drbd_conf *mdev) D_ASSERT(!mdev->data.socket); - if (test_and_clear_bit(CREATE_BARRIER, &mdev->flags)) - dev_err(DEV, "CREATE_BARRIER flag was set in drbd_connect - now cleared!\n"); - if (drbd_request_state(mdev, NS(conn, C_WF_CONNECTION)) < SS_SUCCESS) return -2; -- cgit v1.2.3 From 18a50fa213d46d5592f6542c91ab4c4760cf346c Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 21 Jun 2010 14:14:15 +0200 Subject: drbd: Now we need to handle the ed_uuid of an diskless, unconnected primary correctly Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 8daa920c40a4..72bc1a130645 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3104,6 +3104,11 @@ static int receive_uuids(struct drbd_conf *mdev, struct p_header *h) drbd_md_sync(mdev); } put_ldev(mdev); + } else if (mdev->state.disk < D_INCONSISTENT && + mdev->state.role == R_PRIMARY) { + /* I am a diskless primary, the peer just created a new current UUID + for me. */ + drbd_set_ed_uuid(mdev, p_uuid[UI_CURRENT]); } /* Before we test for the disk state, we should wait until an eventually -- cgit v1.2.3 From 87f7be4cf88e93069f4cc63baf2ce70fdfc59c63 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 11 Jun 2010 13:56:33 +0200 Subject: drbd: Run the fence-peer helper asynchronously Since we can not thaw the transfer log, the next logical step is to allow reconnects while the fence-peer handler runs. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 8 ++------ 1 file changed, 2 insertions(+), 6 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 72bc1a130645..101ad186244c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3747,12 +3747,8 @@ static void drbd_disconnect(struct drbd_conf *mdev) put_ldev(mdev); } - if (mdev->state.role == R_PRIMARY) { - if (fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) { - enum drbd_disk_state nps = drbd_try_outdate_peer(mdev); - drbd_request_state(mdev, NS(pdsk, nps)); - } - } + if (mdev->state.role == R_PRIMARY && fp >= FP_RESOURCE && mdev->state.pdsk >= D_UNKNOWN) + drbd_try_outdate_peer_async(mdev); spin_lock_irq(&mdev->req_lock); os = mdev->state; -- cgit v1.2.3 From 481c6f503213ab14f69ce88fff7b1ece325522f2 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 22 Jun 2010 14:03:27 +0200 Subject: drbd: Ensure that the peer was not rebootet in the meantime before resending TL Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 101ad186244c..3a8131a26559 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3265,7 +3265,18 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) ns.disk = mdev->new_state_tmp.disk; - + if (ns.pdsk == D_CONSISTENT && ns.susp && nconn == C_CONNECTED && oconn < C_CONNECTED && + test_bit(NEW_CUR_UUID, &mdev->flags)) { + /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this + for temporal network outages! */ + spin_unlock_irq(&mdev->req_lock); + dev_err(DEV, "Aborting Connect, can not thaw IO with an only Consistent peer\n"); + tl_clear(mdev); + drbd_uuid_new_current(mdev); + clear_bit(NEW_CUR_UUID, &mdev->flags); + drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); + return FALSE; + } rv = _drbd_set_state(mdev, ns, CS_VERBOSE | CS_HARD, NULL); ns = mdev->state; spin_unlock_irq(&mdev->req_lock); -- cgit v1.2.3 From 65d922c33ebd359db25d5846929b2eafc4238fcc Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 16 Jun 2010 16:18:09 +0200 Subject: drbd: Do not do a hard state change when establishing a connection [bugz 304] Make sure the state engine can deny two primaries to connect Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 3a8131a26559..224c79ed16e9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3188,6 +3188,7 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) enum drbd_conns nconn, oconn; union drbd_state ns, peer_state; enum drbd_disk_state real_peer_disk; + enum chg_state_flags cs_flags; int rv; ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) @@ -3265,6 +3266,7 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) ns.disk = mdev->new_state_tmp.disk; + cs_flags = CS_VERBOSE + (oconn < C_CONNECTED && nconn >= C_CONNECTED ? 0 : CS_HARD); if (ns.pdsk == D_CONSISTENT && ns.susp && nconn == C_CONNECTED && oconn < C_CONNECTED && test_bit(NEW_CUR_UUID, &mdev->flags)) { /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this @@ -3277,7 +3279,7 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) drbd_force_state(mdev, NS2(conn, C_PROTOCOL_ERROR, susp, 0)); return FALSE; } - rv = _drbd_set_state(mdev, ns, CS_VERBOSE | CS_HARD, NULL); + rv = _drbd_set_state(mdev, ns, cs_flags, NULL); ns = mdev->state; spin_unlock_irq(&mdev->req_lock); -- cgit v1.2.3 From 84dfb9f564208a0331131d1ab922382c7d61a553 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 23 Jun 2010 11:20:05 +0200 Subject: drbd: Fixed a deadlock, probably only affected UP machines After disconnect (most likely mdev->net_cnt == 0) and we are still in an unstable state (!drbd_state_is_stable()). When we get an IO request in drbd_get_max_buffers() (called from __inc_ap_bio_cond(), called from inc_ap_bio()) we wake up misc_wait. Misc_wait is also used in inc_ap_bio() to sleep until the outcome of __inc_ap_bio_cond() changes. => Busy loop! Solution: Have a dedicated wait queue for get_net_conf() and put_net_conf(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 224c79ed16e9..22d74d79ba42 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3775,7 +3775,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (os.conn == C_DISCONNECTING) { struct hlist_head *h; - wait_event(mdev->misc_wait, atomic_read(&mdev->net_cnt) == 0); + wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); /* we must not free the tl_hash * while application io is still on the fly */ -- cgit v1.2.3 From f70b3511599c49a3dc20ae349d6cdc5af47659df Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Thu, 24 Jun 2010 14:34:40 +0200 Subject: drbd: Do not try to free tl_hash in drbd_disconnect() when IO is suspended We may not free tl_hash when IO is suspended, since we can not wait until ap_bio_cnt reaches zero. We can do this after susp reched 0, since then tl_clear was called Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 60 +++++++++++++++++++++++--------------- 1 file changed, 36 insertions(+), 24 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 22d74d79ba42..5e49ee75d3c9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3691,6 +3691,36 @@ void drbd_flush_workqueue(struct drbd_conf *mdev) wait_for_completion(&barr.done); } +void drbd_free_tl_hash(struct drbd_conf *mdev) +{ + struct hlist_head *h; + + spin_lock_irq(&mdev->req_lock); + + if (!mdev->tl_hash || mdev->state.conn != C_STANDALONE) { + spin_unlock_irq(&mdev->req_lock); + return; + } + /* paranoia code */ + for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) + if (h->first) + dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", + (int)(h - mdev->ee_hash), h->first); + kfree(mdev->ee_hash); + mdev->ee_hash = NULL; + mdev->ee_hash_s = 0; + + /* paranoia code */ + for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) + if (h->first) + dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", + (int)(h - mdev->tl_hash), h->first); + kfree(mdev->tl_hash); + mdev->tl_hash = NULL; + mdev->tl_hash_s = 0; + spin_unlock_irq(&mdev->req_lock); +} + static void drbd_disconnect(struct drbd_conf *mdev) { enum drbd_fencing_p fp; @@ -3774,32 +3804,14 @@ static void drbd_disconnect(struct drbd_conf *mdev) spin_unlock_irq(&mdev->req_lock); if (os.conn == C_DISCONNECTING) { - struct hlist_head *h; wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); - /* we must not free the tl_hash - * while application io is still on the fly */ - wait_event(mdev->misc_wait, atomic_read(&mdev->ap_bio_cnt) == 0); - - spin_lock_irq(&mdev->req_lock); - /* paranoia code */ - for (h = mdev->ee_hash; h < mdev->ee_hash + mdev->ee_hash_s; h++) - if (h->first) - dev_err(DEV, "ASSERT FAILED ee_hash[%u].first == %p, expected NULL\n", - (int)(h - mdev->ee_hash), h->first); - kfree(mdev->ee_hash); - mdev->ee_hash = NULL; - mdev->ee_hash_s = 0; - - /* paranoia code */ - for (h = mdev->tl_hash; h < mdev->tl_hash + mdev->tl_hash_s; h++) - if (h->first) - dev_err(DEV, "ASSERT FAILED tl_hash[%u] == %p, expected NULL\n", - (int)(h - mdev->tl_hash), h->first); - kfree(mdev->tl_hash); - mdev->tl_hash = NULL; - mdev->tl_hash_s = 0; - spin_unlock_irq(&mdev->req_lock); + if (!mdev->state.susp) { + /* we must not free the tl_hash + * while application io is still on the fly */ + wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); + drbd_free_tl_hash(mdev); + } crypto_free_hash(mdev->cram_hmac_tfm); mdev->cram_hmac_tfm = NULL; -- cgit v1.2.3 From 8e26f9ccb9be00fdb33551a34c8f6029e89ab79f Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 6 Jul 2010 17:25:54 +0200 Subject: drbd: New sync_param packet, that includes the parameters of the new controller Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 18 +++++++++++++++--- 1 file changed, 15 insertions(+), 3 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5e49ee75d3c9..34bea972f734 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2805,7 +2805,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) { int ok = TRUE; - struct p_rs_param_89 *p = (struct p_rs_param_89 *)h; + struct p_rs_param_95 *p = (struct p_rs_param_95 *)h; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; @@ -2814,7 +2814,8 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) + SHARED_SECRET_MAX - : /* 89 */ sizeof(struct p_rs_param_89); + : apv <= 94 ? sizeof(struct p_rs_param_89) + : /* apv >= 95 */ sizeof(struct p_rs_param_95); if (h->length > exp_max_sz) { dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", @@ -2825,10 +2826,14 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) if (apv <= 88) { header_size = sizeof(struct p_rs_param) - sizeof(*h); data_size = h->length - header_size; - } else /* apv >= 89 */ { + } else if (apv <= 94) { header_size = sizeof(struct p_rs_param_89) - sizeof(*h); data_size = h->length - header_size; D_ASSERT(data_size == 0); + } else { + header_size = sizeof(struct p_rs_param_95) - sizeof(*h); + data_size = h->length - header_size; + D_ASSERT(data_size == 0); } /* initialize verify_alg and csums_alg */ @@ -2893,6 +2898,13 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) } } + if (apv > 94) { + mdev->sync_conf.rate = be32_to_cpu(p->rate); + mdev->sync_conf.c_plan_ahead = be32_to_cpu(p->c_plan_ahead); + mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target); + mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target); + mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate); + } spin_lock(&mdev->peer_seq_lock); /* lock against drbd_nl_syncer_conf() */ -- cgit v1.2.3 From 778f271dfe7a7173c0bae2d6cde8d9bd1533e668 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Tue, 6 Jul 2010 11:14:00 +0200 Subject: drbd: The new, smarter resync speed controller Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 20 ++++++++++++++++++++ 1 file changed, 20 insertions(+) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 34bea972f734..5f80b22e711d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1640,6 +1640,8 @@ static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) drbd_send_ack_dp(mdev, P_NEG_ACK, p); } + atomic_add(data_size >> 9, &mdev->rs_sect_in); + return ok; } @@ -2810,6 +2812,8 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; const int apv = mdev->agreed_pro_version; + int *rs_plan_s = NULL; + int fifo_size = 0; exp_max_sz = apv <= 87 ? sizeof(struct p_rs_param) : apv == 88 ? sizeof(struct p_rs_param) @@ -2904,6 +2908,15 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) mdev->sync_conf.c_delay_target = be32_to_cpu(p->c_delay_target); mdev->sync_conf.c_fill_target = be32_to_cpu(p->c_fill_target); mdev->sync_conf.c_max_rate = be32_to_cpu(p->c_max_rate); + + fifo_size = (mdev->sync_conf.c_plan_ahead * 10 * SLEEP_TIME) / HZ; + if (fifo_size != mdev->rs_plan_s.size && fifo_size > 0) { + rs_plan_s = kzalloc(sizeof(int) * fifo_size, GFP_KERNEL); + if (!rs_plan_s) { + dev_err(DEV, "kmalloc of fifo_buffer failed"); + goto disconnect; + } + } } spin_lock(&mdev->peer_seq_lock); @@ -2922,6 +2935,12 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) mdev->csums_tfm = csums_tfm; dev_info(DEV, "using csums-alg: \"%s\"\n", p->csums_alg); } + if (fifo_size != mdev->rs_plan_s.size) { + kfree(mdev->rs_plan_s.values); + mdev->rs_plan_s.values = rs_plan_s; + mdev->rs_plan_s.size = fifo_size; + mdev->rs_planed = 0; + } spin_unlock(&mdev->peer_seq_lock); } @@ -4202,6 +4221,7 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); dec_rs_pending(mdev); + atomic_add(blksize >> 9, &mdev->rs_sect_in); return TRUE; } -- cgit v1.2.3 From 85719573dd716bc2ac3e098b44adfed884250bab Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 21 Jul 2010 10:20:17 +0200 Subject: drbd: Replaced some casts by an union. Improved comments Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5f80b22e711d..0b03e3174f76 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2097,7 +2097,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) if (drbd_recv(mdev, di->digest, digest_size) != digest_size) goto out_free_e; - e->block_id = (u64)(unsigned long)di; + e->digest = di; if (h->command == P_CSUM_RS_REQUEST) { D_ASSERT(mdev->agreed_pro_version >= 89); e->w.cb = w_e_end_csum_rs_req; @@ -3769,6 +3769,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) drbd_thread_stop(&mdev->asender); drbd_free_sock(mdev); + /* wait for current activity to cease. */ spin_lock_irq(&mdev->req_lock); _drbd_wait_ee_list_empty(mdev, &mdev->active_ee); _drbd_wait_ee_list_empty(mdev, &mdev->sync_ee); -- cgit v1.2.3 From c36c3ced692b38d0cf90a5e6f875be2f9ebbc037 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 11 Aug 2010 20:42:55 +0200 Subject: drbd: let drbd_free_ee implicitly free any digest Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0b03e3174f76..2f9320be4906 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -377,6 +377,8 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) { + if (e->flags & EE_HAS_DIGEST) + kfree(e->digest); drbd_pp_free(mdev, e->pages); D_ASSERT(atomic_read(&e->pending_bios) == 0); D_ASSERT(hlist_unhashed(&e->colision)); @@ -2094,10 +2096,12 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) di->digest_size = digest_size; di->digest = (((char *)di)+sizeof(struct digest_info)); + e->digest = di; + e->flags |= EE_HAS_DIGEST; + if (drbd_recv(mdev, di->digest, digest_size) != digest_size) goto out_free_e; - e->digest = di; if (h->command == P_CSUM_RS_REQUEST) { D_ASSERT(mdev->agreed_pro_version >= 89); e->w.cb = w_e_end_csum_rs_req; @@ -2159,7 +2163,6 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) return TRUE; out_free_e: - kfree(di); put_ldev(mdev); drbd_free_ee(mdev, e); return FALSE; -- cgit v1.2.3 From 80a40e439e5a3f30b0a6210a1add6d7c33392e54 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 11 Aug 2010 23:28:00 +0200 Subject: drbd: reduce code duplication when receiving data requests also canonicalize the return values of read_for_csum and drbd_rs_begin_io to return -ESOMETHING, or 0 for success. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 46 ++++++++++---------------------------- 1 file changed, 12 insertions(+), 34 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2f9320be4906..346aed98027f 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2068,21 +2068,12 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) case P_DATA_REQUEST: e->w.cb = w_e_end_data_req; fault_type = DRBD_FAULT_DT_RD; - break; + /* application IO, don't drbd_rs_begin_io */ + goto submit; + case P_RS_DATA_REQUEST: e->w.cb = w_e_end_rsdata_req; fault_type = DRBD_FAULT_RS_RD; - /* Eventually this should become asynchronously. Currently it - * blocks the whole receiver just to delay the reading of a - * resync data block. - * the drbd_work_queue mechanism is made for this... - */ - if (!drbd_rs_begin_io(mdev, sector)) { - /* we have been interrupted, - * probably connection lost! */ - D_ASSERT(signal_pending(current)); - goto out_free_e; - } break; case P_OV_REPLY: @@ -2108,13 +2099,8 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) } else if (h->command == P_OV_REPLY) { e->w.cb = w_e_end_ov_reply; dec_rs_pending(mdev); - break; - } - - if (!drbd_rs_begin_io(mdev, sector)) { - /* we have been interrupted, probably connection lost! */ - D_ASSERT(signal_pending(current)); - goto out_free_e; + /* drbd_rs_begin_io done when we sent this request */ + goto submit; } break; @@ -2133,31 +2119,23 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) } e->w.cb = w_e_end_ov_req; fault_type = DRBD_FAULT_RS_RD; - /* Eventually this should become asynchronous. Currently it - * blocks the whole receiver just to delay the reading of a - * resync data block. - * the drbd_work_queue mechanism is made for this... - */ - if (!drbd_rs_begin_io(mdev, sector)) { - /* we have been interrupted, - * probably connection lost! */ - D_ASSERT(signal_pending(current)); - goto out_free_e; - } break; - default: dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", cmdname(h->command)); fault_type = DRBD_FAULT_MAX; + goto out_free_e; } - spin_lock_irq(&mdev->req_lock); - list_add(&e->w.list, &mdev->read_ee); - spin_unlock_irq(&mdev->req_lock); + if (drbd_rs_begin_io(mdev, e->sector)) + goto out_free_e; +submit: inc_unacked(mdev); + spin_lock_irq(&mdev->req_lock); + list_add_tail(&e->w.list, &mdev->read_ee); + spin_unlock_irq(&mdev->req_lock); if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) return TRUE; -- cgit v1.2.3 From 0f0601f4ea2f53cfd8bcae060fb03d9bbde070ec Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 11 Aug 2010 23:40:24 +0200 Subject: drbd: new configuration parameter c-min-rate We now track the data rate of locally submitted resync related requests, and can thus detect non-resync activity on the lower level device. If the current sync rate is above c-min-rate, and the lower level device appears to be busy, we throttle the resyncer. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 88 +++++++++++++++++++++++++++++++++++--- 1 file changed, 83 insertions(+), 5 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 346aed98027f..0d9967fef528 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1561,6 +1561,7 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si list_add(&e->w.list, &mdev->sync_ee); spin_unlock_irq(&mdev->req_lock); + atomic_add(data_size >> 9, &mdev->rs_sect_ev); if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) return TRUE; @@ -2017,17 +2018,66 @@ out_interrupted: return FALSE; } +/* We may throttle resync, if the lower device seems to be busy, + * and current sync rate is above c_min_rate. + * + * To decide whether or not the lower device is busy, we use a scheme similar + * to MD RAID is_mddev_idle(): if the partition stats reveal "significant" + * (more than 64 sectors) of activity we cannot account for with our own resync + * activity, it obviously is "busy". + * + * The current sync rate used here uses only the most recent two step marks, + * to have a short time average so we can react faster. + */ +int drbd_rs_should_slow_down(struct drbd_conf *mdev) +{ + struct gendisk *disk = mdev->ldev->backing_bdev->bd_contains->bd_disk; + unsigned long db, dt, dbdt; + int curr_events; + int throttle = 0; + + /* feature disabled? */ + if (mdev->sync_conf.c_min_rate == 0) + return 0; + + curr_events = (int)part_stat_read(&disk->part0, sectors[0]) + + (int)part_stat_read(&disk->part0, sectors[1]) - + atomic_read(&mdev->rs_sect_ev); + if (!mdev->rs_last_events || curr_events - mdev->rs_last_events > 64) { + unsigned long rs_left; + int i; + + mdev->rs_last_events = curr_events; + + /* sync speed average over the last 2*DRBD_SYNC_MARK_STEP, + * approx. */ + i = (mdev->rs_last_mark + DRBD_SYNC_MARKS-2) % DRBD_SYNC_MARKS; + rs_left = drbd_bm_total_weight(mdev) - mdev->rs_failed; + + dt = ((long)jiffies - (long)mdev->rs_mark_time[i]) / HZ; + if (!dt) + dt++; + db = mdev->rs_mark_left[i] - rs_left; + dbdt = Bit2KB(db/dt); + + if (dbdt > mdev->sync_conf.c_min_rate) + throttle = 1; + } + return throttle; +} + + static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); struct drbd_epoch_entry *e; struct digest_info *di = NULL; + struct p_block_req *p = (struct p_block_req *)h; + const int brps = sizeof(*p)-sizeof(*h); int size, digest_size; unsigned int fault_type; - struct p_block_req *p = - (struct p_block_req *)h; - const int brps = sizeof(*p)-sizeof(*h); + if (drbd_recv(mdev, h->payload, brps) != brps) return FALSE; @@ -2099,8 +2149,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) } else if (h->command == P_OV_REPLY) { e->w.cb = w_e_end_ov_reply; dec_rs_pending(mdev); - /* drbd_rs_begin_io done when we sent this request */ - goto submit; + /* drbd_rs_begin_io done when we sent this request, + * but accounting still needs to be done. */ + goto submit_for_resync; } break; @@ -2128,9 +2179,36 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) goto out_free_e; } + /* Throttle, drbd_rs_begin_io and submit should become asynchronous + * wrt the receiver, but it is not as straightforward as it may seem. + * Various places in the resync start and stop logic assume resync + * requests are processed in order, requeuing this on the worker thread + * introduces a bunch of new code for synchronization between threads. + * + * Unlimited throttling before drbd_rs_begin_io may stall the resync + * "forever", throttling after drbd_rs_begin_io will lock that extent + * for application writes for the same time. For now, just throttle + * here, where the rest of the code expects the receiver to sleep for + * a while, anyways. + */ + + /* Throttle before drbd_rs_begin_io, as that locks out application IO; + * this defers syncer requests for some time, before letting at least + * on request through. The resync controller on the receiving side + * will adapt to the incoming rate accordingly. + * + * We cannot throttle here if remote is Primary/SyncTarget: + * we would also throttle its application reads. + * In that case, throttling is done on the SyncTarget only. + */ + if (mdev->state.peer != R_PRIMARY && drbd_rs_should_slow_down(mdev)) + msleep(100); if (drbd_rs_begin_io(mdev, e->sector)) goto out_free_e; +submit_for_resync: + atomic_add(size >> 9, &mdev->rs_sect_ev); + submit: inc_unacked(mdev); spin_lock_irq(&mdev->req_lock); -- cgit v1.2.3 From 48acf8689847c061bd82c808c379f1bd79dfe407 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 23 Aug 2010 15:51:56 +0200 Subject: drbd: Microfix: Assigning sector once is sufficient Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0d9967fef528..591a171291d9 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -365,7 +365,6 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, e->size = data_size; e->flags = 0; e->sector = sector; - e->sector = sector; e->block_id = id; return e; -- cgit v1.2.3 From 0b70a13dac014ec9274640b9e945bde493ba365e Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 20 Aug 2010 13:36:10 +0200 Subject: drbd: Sending of big packets, for payloads from 64KByte to 4GByte Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 94 +++++++++++++++++++------------------- 1 file changed, 47 insertions(+), 47 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 591a171291d9..9b3321e2c3cd 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -720,14 +720,14 @@ out: static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd) { - struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; + struct p_header80 *h = (struct p_header80 *) &mdev->data.sbuf.header; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) { - struct p_header *h = (struct p_header *) &mdev->data.sbuf.header; + struct p_header80 *h = (struct p_header80 *) &mdev->data.sbuf.header; int rr; rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); @@ -944,7 +944,7 @@ out_release_sockets: return -1; } -static int drbd_recv_header(struct drbd_conf *mdev, struct p_header *h) +static int drbd_recv_header(struct drbd_conf *mdev, struct p_header80 *h) { int r; @@ -1266,7 +1266,7 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea return 1; } -static int receive_Barrier(struct drbd_conf *mdev, struct p_header *h) +static int receive_Barrier(struct drbd_conf *mdev, struct p_header80 *h) { int rv, issue_flush; struct p_barrier *p = (struct p_barrier *)h; @@ -1570,7 +1570,7 @@ fail: return FALSE; } -static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) +static int receive_DataReply(struct drbd_conf *mdev, struct p_header80 *h) { struct drbd_request *req; sector_t sector; @@ -1610,7 +1610,7 @@ static int receive_DataReply(struct drbd_conf *mdev, struct p_header *h) return ok; } -static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header *h) +static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; unsigned int header_size, data_size; @@ -1767,7 +1767,7 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) } /* mirrored write */ -static int receive_Data(struct drbd_conf *mdev, struct p_header *h) +static int receive_Data(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; struct drbd_epoch_entry *e; @@ -2066,7 +2066,7 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev) } -static int receive_DataRequest(struct drbd_conf *mdev, struct p_header *h) +static int receive_DataRequest(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); @@ -2756,7 +2756,7 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) return 1; } -static int receive_protocol(struct drbd_conf *mdev, struct p_header *h) +static int receive_protocol(struct drbd_conf *mdev, struct p_header80 *h) { struct p_protocol *p = (struct p_protocol *)h; int header_size, data_size; @@ -2862,7 +2862,7 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, return tfm; } -static int receive_SyncParam(struct drbd_conf *mdev, struct p_header *h) +static int receive_SyncParam(struct drbd_conf *mdev, struct p_header80 *h) { int ok = TRUE; struct p_rs_param_95 *p = (struct p_rs_param_95 *)h; @@ -3032,7 +3032,7 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, (unsigned long long)a, (unsigned long long)b); } -static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) +static int receive_sizes(struct drbd_conf *mdev, struct p_header80 *h) { struct p_sizes *p = (struct p_sizes *)h; enum determine_dev_size dd = unchanged; @@ -3148,7 +3148,7 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int receive_uuids(struct drbd_conf *mdev, struct p_header *h) +static int receive_uuids(struct drbd_conf *mdev, struct p_header80 *h) { struct p_uuids *p = (struct p_uuids *)h; u64 *p_uuid; @@ -3241,7 +3241,7 @@ static union drbd_state convert_state(union drbd_state ps) return ms; } -static int receive_req_state(struct drbd_conf *mdev, struct p_header *h) +static int receive_req_state(struct drbd_conf *mdev, struct p_header80 *h) { struct p_req_state *p = (struct p_req_state *)h; union drbd_state mask, val; @@ -3271,7 +3271,7 @@ static int receive_req_state(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int receive_state(struct drbd_conf *mdev, struct p_header *h) +static int receive_state(struct drbd_conf *mdev, struct p_header80 *h) { struct p_state *p = (struct p_state *)h; enum drbd_conns nconn, oconn; @@ -3395,7 +3395,7 @@ static int receive_state(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) +static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header80 *h) { struct p_rs_uuid *p = (struct p_rs_uuid *)h; @@ -3428,7 +3428,7 @@ static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header *h) enum receive_bitmap_ret { OK, DONE, FAILED }; static enum receive_bitmap_ret -receive_bitmap_plain(struct drbd_conf *mdev, struct p_header *h, +receive_bitmap_plain(struct drbd_conf *mdev, struct p_header80 *h, unsigned long *buffer, struct bm_xfer_ctx *c) { unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); @@ -3533,7 +3533,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, const char *direction, struct bm_xfer_ctx *c) { /* what would it take to transfer it "plaintext" */ - unsigned plain = sizeof(struct p_header) * + unsigned plain = sizeof(struct p_header80) * ((c->bm_words+BM_PACKET_WORDS-1)/BM_PACKET_WORDS+1) + c->bm_words * sizeof(long); unsigned total = c->bytes[0] + c->bytes[1]; @@ -3571,7 +3571,7 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we successfully received it. */ -static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) +static int receive_bitmap(struct drbd_conf *mdev, struct p_header80 *h) { struct bm_xfer_ctx c; void *buffer; @@ -3623,7 +3623,7 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) } c.packets[h->command == P_BITMAP]++; - c.bytes[h->command == P_BITMAP] += sizeof(struct p_header) + h->length; + c.bytes[h->command == P_BITMAP] += sizeof(struct p_header80) + h->length; if (ret != OK) break; @@ -3659,7 +3659,7 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header *h) return ok; } -static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent) +static int receive_skip_(struct drbd_conf *mdev, struct p_header80 *h, int silent) { /* TODO zero copy sink :) */ static char sink[128]; @@ -3679,17 +3679,17 @@ static int receive_skip_(struct drbd_conf *mdev, struct p_header *h, int silent) return size == 0; } -static int receive_skip(struct drbd_conf *mdev, struct p_header *h) +static int receive_skip(struct drbd_conf *mdev, struct p_header80 *h) { return receive_skip_(mdev, h, 0); } -static int receive_skip_silent(struct drbd_conf *mdev, struct p_header *h) +static int receive_skip_silent(struct drbd_conf *mdev, struct p_header80 *h) { return receive_skip_(mdev, h, 1); } -static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) +static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header80 *h) { if (mdev->state.disk >= D_INCONSISTENT) drbd_kick_lo(mdev); @@ -3701,7 +3701,7 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header *); +typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header80 *); static drbd_cmd_handler_f drbd_default_handler[] = { [P_DATA] = receive_Data, @@ -3736,7 +3736,7 @@ static drbd_cmd_handler_f *drbd_opt_cmd_handler; static void drbdd(struct drbd_conf *mdev) { drbd_cmd_handler_f handler; - struct p_header *header = &mdev->data.rbuf.header; + struct p_header80 *header = &mdev->data.rbuf.header; while (get_t_state(&mdev->receiver) == Running) { drbd_thread_current_set_cpu(mdev); @@ -3964,7 +3964,7 @@ static int drbd_send_handshake(struct drbd_conf *mdev) p->protocol_min = cpu_to_be32(PRO_VERSION_MIN); p->protocol_max = cpu_to_be32(PRO_VERSION_MAX); ok = _drbd_send_cmd( mdev, mdev->data.socket, P_HAND_SHAKE, - (struct p_header *)p, sizeof(*p), 0 ); + (struct p_header80 *)p, sizeof(*p), 0 ); mutex_unlock(&mdev->data.mutex); return ok; } @@ -3981,7 +3981,7 @@ static int drbd_do_handshake(struct drbd_conf *mdev) /* ASSERT current == mdev->receiver ... */ struct p_handshake *p = &mdev->data.rbuf.handshake; const int expect = sizeof(struct p_handshake) - -sizeof(struct p_header); + -sizeof(struct p_header80); int rv; rv = drbd_send_handshake(mdev); @@ -4058,7 +4058,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) char *response = NULL; char *right_response = NULL; char *peers_ch = NULL; - struct p_header p; + struct p_header80 p; unsigned int key_len = strlen(mdev->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; @@ -4231,7 +4231,7 @@ int drbdd_init(struct drbd_thread *thi) /* ********* acknowledge sender ******** */ -static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) +static int got_RqSReply(struct drbd_conf *mdev, struct p_header80 *h) { struct p_req_state_reply *p = (struct p_req_state_reply *)h; @@ -4249,13 +4249,13 @@ static int got_RqSReply(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_Ping(struct drbd_conf *mdev, struct p_header *h) +static int got_Ping(struct drbd_conf *mdev, struct p_header80 *h) { return drbd_send_ping_ack(mdev); } -static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) +static int got_PingAck(struct drbd_conf *mdev, struct p_header80 *h) { /* restore idle timeout */ mdev->meta.socket->sk->sk_rcvtimeo = mdev->net_conf->ping_int*HZ; @@ -4265,7 +4265,7 @@ static int got_PingAck(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_IsInSync(struct drbd_conf *mdev, struct p_header *h) +static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4336,7 +4336,7 @@ static int validate_req_change_req_state(struct drbd_conf *mdev, return TRUE; } -static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) +static int got_BlockAck(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4376,7 +4376,7 @@ static int got_BlockAck(struct drbd_conf *mdev, struct p_header *h) _ack_id_to_req, __func__ , what); } -static int got_NegAck(struct drbd_conf *mdev, struct p_header *h) +static int got_NegAck(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4396,7 +4396,7 @@ static int got_NegAck(struct drbd_conf *mdev, struct p_header *h) _ack_id_to_req, __func__ , neg_acked); } -static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) +static int got_NegDReply(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; sector_t sector = be64_to_cpu(p->sector); @@ -4409,7 +4409,7 @@ static int got_NegDReply(struct drbd_conf *mdev, struct p_header *h) _ar_id_to_req, __func__ , neg_acked); } -static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) +static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header80 *h) { sector_t sector; int size; @@ -4431,7 +4431,7 @@ static int got_NegRSDReply(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) +static int got_BarrierAck(struct drbd_conf *mdev, struct p_header80 *h) { struct p_barrier_ack *p = (struct p_barrier_ack *)h; @@ -4440,7 +4440,7 @@ static int got_BarrierAck(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) +static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) { struct p_block_ack *p = (struct p_block_ack *)h; struct drbd_work *w; @@ -4474,7 +4474,7 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header *h) return TRUE; } -static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h) +static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header80 *h) { /* IGNORE */ return TRUE; @@ -4482,7 +4482,7 @@ static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header *h) struct asender_cmd { size_t pkt_size; - int (*process)(struct drbd_conf *mdev, struct p_header *h); + int (*process)(struct drbd_conf *mdev, struct p_header80 *h); }; static struct asender_cmd *get_asender_cmd(int cmd) @@ -4491,8 +4491,8 @@ static struct asender_cmd *get_asender_cmd(int cmd) /* anything missing from this table is in * the drbd_cmd_handler (drbd_default_handler) table, * see the beginning of drbdd() */ - [P_PING] = { sizeof(struct p_header), got_Ping }, - [P_PING_ACK] = { sizeof(struct p_header), got_PingAck }, + [P_PING] = { sizeof(struct p_header80), got_Ping }, + [P_PING_ACK] = { sizeof(struct p_header80), got_PingAck }, [P_RECV_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, [P_RS_WRITE_ACK] = { sizeof(struct p_block_ack), got_BlockAck }, @@ -4504,7 +4504,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, - [P_DELAY_PROBE] = { sizeof(struct p_delay_probe), got_something_to_ignore_m }, + [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_something_to_ignore_m }, [P_MAX_CMD] = { 0, NULL }, }; if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) @@ -4515,13 +4515,13 @@ static struct asender_cmd *get_asender_cmd(int cmd) int drbd_asender(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - struct p_header *h = &mdev->meta.rbuf.header; + struct p_header80 *h = &mdev->meta.rbuf.header; struct asender_cmd *cmd = NULL; int rv, len; void *buf = h; int received = 0; - int expect = sizeof(struct p_header); + int expect = sizeof(struct p_header80); int empty; sprintf(current->comm, "drbd%d_asender", mdev_to_minor(mdev)); @@ -4621,7 +4621,7 @@ int drbd_asender(struct drbd_thread *thi) goto disconnect; } expect = cmd->pkt_size; - ERR_IF(len != expect-sizeof(struct p_header)) + ERR_IF(len != expect-sizeof(struct p_header80)) goto reconnect; } if (received == expect) { @@ -4631,7 +4631,7 @@ int drbd_asender(struct drbd_thread *thi) buf = h; received = 0; - expect = sizeof(struct p_header); + expect = sizeof(struct p_header80); cmd = NULL; } } -- cgit v1.2.3 From 02918be2273a6b086292e0d85b740336eda46e36 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Fri, 20 Aug 2010 14:35:10 +0200 Subject: drbd: receiving of big packets, for payloads between 64kByte and 4GByte Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 389 ++++++++++++++++--------------------- 1 file changed, 168 insertions(+), 221 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9b3321e2c3cd..fe308644a63c 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -720,14 +720,14 @@ out: static int drbd_send_fp(struct drbd_conf *mdev, struct socket *sock, enum drbd_packets cmd) { - struct p_header80 *h = (struct p_header80 *) &mdev->data.sbuf.header; + struct p_header80 *h = &mdev->data.sbuf.header.h80; return _drbd_send_cmd(mdev, sock, cmd, h, sizeof(*h), 0); } static enum drbd_packets drbd_recv_fp(struct drbd_conf *mdev, struct socket *sock) { - struct p_header80 *h = (struct p_header80 *) &mdev->data.sbuf.header; + struct p_header80 *h = &mdev->data.rbuf.header.h80; int rr; rr = drbd_recv_short(mdev, sock, h, sizeof(*h), 0); @@ -944,22 +944,27 @@ out_release_sockets: return -1; } -static int drbd_recv_header(struct drbd_conf *mdev, struct p_header80 *h) +static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsigned int *packet_size) { + union p_header *h = &mdev->data.rbuf.header; int r; r = drbd_recv(mdev, h, sizeof(*h)); - if (unlikely(r != sizeof(*h))) { dev_err(DEV, "short read expecting header on sock: r=%d\n", r); return FALSE; - }; - h->command = be16_to_cpu(h->command); - h->length = be16_to_cpu(h->length); - if (unlikely(h->magic != BE_DRBD_MAGIC)) { + } + + if (likely(h->h80.magic == BE_DRBD_MAGIC)) { + *cmd = be16_to_cpu(h->h80.command); + *packet_size = be16_to_cpu(h->h80.length); + } else if (h->h95.magic == BE_DRBD_MAGIC_BIG) { + *cmd = be16_to_cpu(h->h95.command); + *packet_size = be32_to_cpu(h->h95.length); + } else { dev_err(DEV, "magic?? on data m: 0x%lx c: %d l: %d\n", - (long)be32_to_cpu(h->magic), - h->command, h->length); + (long)be32_to_cpu(h->h80.magic), + h->h80.command, h->h80.length); return FALSE; } mdev->last_received = jiffies; @@ -1266,17 +1271,12 @@ int w_e_reissue(struct drbd_conf *mdev, struct drbd_work *w, int cancel) __relea return 1; } -static int receive_Barrier(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_Barrier(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { int rv, issue_flush; - struct p_barrier *p = (struct p_barrier *)h; + struct p_barrier *p = &mdev->data.rbuf.barrier; struct drbd_epoch *epoch; - ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; - - rv = drbd_recv(mdev, h->payload, h->length); - ERR_IF(rv != h->length) return FALSE; - inc_unacked(mdev); if (mdev->net_conf->wire_protocol != DRBD_PROT_C) @@ -1570,21 +1570,12 @@ fail: return FALSE; } -static int receive_DataReply(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_DataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { struct drbd_request *req; sector_t sector; - unsigned int header_size, data_size; int ok; - struct p_data *p = (struct p_data *)h; - - header_size = sizeof(*p) - sizeof(*h); - data_size = h->length - header_size; - - ERR_IF(data_size == 0) return FALSE; - - if (drbd_recv(mdev, h->payload, header_size) != header_size) - return FALSE; + struct p_data *p = &mdev->data.rbuf.data; sector = be64_to_cpu(p->sector); @@ -1610,20 +1601,11 @@ static int receive_DataReply(struct drbd_conf *mdev, struct p_header80 *h) return ok; } -static int receive_RSDataReply(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { sector_t sector; - unsigned int header_size, data_size; int ok; - struct p_data *p = (struct p_data *)h; - - header_size = sizeof(*p) - sizeof(*h); - data_size = h->length - header_size; - - ERR_IF(data_size == 0) return FALSE; - - if (drbd_recv(mdev, h->payload, header_size) != header_size) - return FALSE; + struct p_data *p = &mdev->data.rbuf.data; sector = be64_to_cpu(p->sector); D_ASSERT(p->block_id == ID_SYNCER); @@ -1767,23 +1749,14 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) } /* mirrored write */ -static int receive_Data(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { sector_t sector; struct drbd_epoch_entry *e; - struct p_data *p = (struct p_data *)h; - int header_size, data_size; + struct p_data *p = &mdev->data.rbuf.data; int rw = WRITE; u32 dp_flags; - header_size = sizeof(*p) - sizeof(*h); - data_size = h->length - header_size; - - ERR_IF(data_size == 0) return FALSE; - - if (drbd_recv(mdev, h->payload, header_size) != header_size) - return FALSE; - if (!get_ldev(mdev)) { if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not write mirrored data block " @@ -2066,20 +2039,15 @@ int drbd_rs_should_slow_down(struct drbd_conf *mdev) } -static int receive_DataRequest(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int digest_size) { sector_t sector; const sector_t capacity = drbd_get_capacity(mdev->this_bdev); struct drbd_epoch_entry *e; struct digest_info *di = NULL; - struct p_block_req *p = (struct p_block_req *)h; - const int brps = sizeof(*p)-sizeof(*h); - int size, digest_size; + int size; unsigned int fault_type; - - - if (drbd_recv(mdev, h->payload, brps) != brps) - return FALSE; + struct p_block_req *p = &mdev->data.rbuf.block_req; sector = be64_to_cpu(p->sector); size = be32_to_cpu(p->blksize); @@ -2099,9 +2067,9 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header80 *h) if (__ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not satisfy peer's read request, " "no local data.\n"); - drbd_send_ack_rp(mdev, h->command == P_DATA_REQUEST ? P_NEG_DREPLY : + drbd_send_ack_rp(mdev, cmd == P_DATA_REQUEST ? P_NEG_DREPLY : P_NEG_RS_DREPLY , p); - return drbd_drain_block(mdev, h->length - brps); + return TRUE; } /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD @@ -2113,7 +2081,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header80 *h) return FALSE; } - switch (h->command) { + switch (cmd) { case P_DATA_REQUEST: e->w.cb = w_e_end_data_req; fault_type = DRBD_FAULT_DT_RD; @@ -2128,7 +2096,6 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header80 *h) case P_OV_REPLY: case P_CSUM_RS_REQUEST: fault_type = DRBD_FAULT_RS_RD; - digest_size = h->length - brps ; di = kmalloc(sizeof(*di) + digest_size, GFP_NOIO); if (!di) goto out_free_e; @@ -2142,10 +2109,10 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header80 *h) if (drbd_recv(mdev, di->digest, digest_size) != digest_size) goto out_free_e; - if (h->command == P_CSUM_RS_REQUEST) { + if (cmd == P_CSUM_RS_REQUEST) { D_ASSERT(mdev->agreed_pro_version >= 89); e->w.cb = w_e_end_csum_rs_req; - } else if (h->command == P_OV_REPLY) { + } else if (cmd == P_OV_REPLY) { e->w.cb = w_e_end_ov_reply; dec_rs_pending(mdev); /* drbd_rs_begin_io done when we sent this request, @@ -2173,7 +2140,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, struct p_header80 *h) default: dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", - cmdname(h->command)); + cmdname(cmd)); fault_type = DRBD_FAULT_MAX; goto out_free_e; } @@ -2756,20 +2723,13 @@ static int cmp_after_sb(enum drbd_after_sb_p peer, enum drbd_after_sb_p self) return 1; } -static int receive_protocol(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_protocol(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_protocol *p = (struct p_protocol *)h; - int header_size, data_size; + struct p_protocol *p = &mdev->data.rbuf.protocol; int p_proto, p_after_sb_0p, p_after_sb_1p, p_after_sb_2p; int p_want_lose, p_two_primaries, cf; char p_integrity_alg[SHARED_SECRET_MAX] = ""; - header_size = sizeof(*p) - sizeof(*h); - data_size = h->length - header_size; - - if (drbd_recv(mdev, h->payload, header_size) != header_size) - return FALSE; - p_proto = be32_to_cpu(p->protocol); p_after_sb_0p = be32_to_cpu(p->after_sb_0p); p_after_sb_1p = be32_to_cpu(p->after_sb_1p); @@ -2862,10 +2822,10 @@ struct crypto_hash *drbd_crypto_alloc_digest_safe(const struct drbd_conf *mdev, return tfm; } -static int receive_SyncParam(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_SyncParam(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int packet_size) { int ok = TRUE; - struct p_rs_param_95 *p = (struct p_rs_param_95 *)h; + struct p_rs_param_95 *p = &mdev->data.rbuf.rs_param_95; unsigned int header_size, data_size, exp_max_sz; struct crypto_hash *verify_tfm = NULL; struct crypto_hash *csums_tfm = NULL; @@ -2879,29 +2839,29 @@ static int receive_SyncParam(struct drbd_conf *mdev, struct p_header80 *h) : apv <= 94 ? sizeof(struct p_rs_param_89) : /* apv >= 95 */ sizeof(struct p_rs_param_95); - if (h->length > exp_max_sz) { + if (packet_size > exp_max_sz) { dev_err(DEV, "SyncParam packet too long: received %u, expected <= %u bytes\n", - h->length, exp_max_sz); + packet_size, exp_max_sz); return FALSE; } if (apv <= 88) { - header_size = sizeof(struct p_rs_param) - sizeof(*h); - data_size = h->length - header_size; + header_size = sizeof(struct p_rs_param) - sizeof(struct p_header80); + data_size = packet_size - header_size; } else if (apv <= 94) { - header_size = sizeof(struct p_rs_param_89) - sizeof(*h); - data_size = h->length - header_size; + header_size = sizeof(struct p_rs_param_89) - sizeof(struct p_header80); + data_size = packet_size - header_size; D_ASSERT(data_size == 0); } else { - header_size = sizeof(struct p_rs_param_95) - sizeof(*h); - data_size = h->length - header_size; + header_size = sizeof(struct p_rs_param_95) - sizeof(struct p_header80); + data_size = packet_size - header_size; D_ASSERT(data_size == 0); } /* initialize verify_alg and csums_alg */ memset(p->verify_alg, 0, 2 * SHARED_SECRET_MAX); - if (drbd_recv(mdev, h->payload, header_size) != header_size) + if (drbd_recv(mdev, &p->head.payload, header_size) != header_size) return FALSE; mdev->sync_conf.rate = be32_to_cpu(p->rate); @@ -3032,19 +2992,15 @@ static void warn_if_differ_considerably(struct drbd_conf *mdev, (unsigned long long)a, (unsigned long long)b); } -static int receive_sizes(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_sizes *p = (struct p_sizes *)h; + struct p_sizes *p = &mdev->data.rbuf.sizes; enum determine_dev_size dd = unchanged; unsigned int max_seg_s; sector_t p_size, p_usize, my_usize; int ldsc = 0; /* local disk size changed */ enum dds_flags ddsf; - ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; - if (drbd_recv(mdev, h->payload, h->length) != h->length) - return FALSE; - p_size = be64_to_cpu(p->d_size); p_usize = be64_to_cpu(p->u_size); @@ -3148,16 +3104,12 @@ static int receive_sizes(struct drbd_conf *mdev, struct p_header80 *h) return TRUE; } -static int receive_uuids(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_uuids(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_uuids *p = (struct p_uuids *)h; + struct p_uuids *p = &mdev->data.rbuf.uuids; u64 *p_uuid; int i; - ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; - if (drbd_recv(mdev, h->payload, h->length) != h->length) - return FALSE; - p_uuid = kmalloc(sizeof(u64)*UI_EXTENDED_SIZE, GFP_NOIO); for (i = UI_CURRENT; i < UI_EXTENDED_SIZE; i++) @@ -3241,16 +3193,12 @@ static union drbd_state convert_state(union drbd_state ps) return ms; } -static int receive_req_state(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_req_state *p = (struct p_req_state *)h; + struct p_req_state *p = &mdev->data.rbuf.req_state; union drbd_state mask, val; int rv; - ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; - if (drbd_recv(mdev, h->payload, h->length) != h->length) - return FALSE; - mask.i = be32_to_cpu(p->mask); val.i = be32_to_cpu(p->val); @@ -3271,21 +3219,15 @@ static int receive_req_state(struct drbd_conf *mdev, struct p_header80 *h) return TRUE; } -static int receive_state(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_state *p = (struct p_state *)h; + struct p_state *p = &mdev->data.rbuf.state; enum drbd_conns nconn, oconn; union drbd_state ns, peer_state; enum drbd_disk_state real_peer_disk; enum chg_state_flags cs_flags; int rv; - ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) - return FALSE; - - if (drbd_recv(mdev, h->payload, h->length) != h->length) - return FALSE; - peer_state.i = be32_to_cpu(p->state); real_peer_disk = peer_state.disk; @@ -3395,9 +3337,9 @@ static int receive_state(struct drbd_conf *mdev, struct p_header80 *h) return TRUE; } -static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_sync_uuid(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { - struct p_rs_uuid *p = (struct p_rs_uuid *)h; + struct p_rs_uuid *p = &mdev->data.rbuf.rs_uuid; wait_event(mdev->misc_wait, mdev->state.conn == C_WF_SYNC_UUID || @@ -3406,10 +3348,6 @@ static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header80 *h) /* D_ASSERT( mdev->state.conn == C_WF_SYNC_UUID ); */ - ERR_IF(h->length != (sizeof(*p)-sizeof(*h))) return FALSE; - if (drbd_recv(mdev, h->payload, h->length) != h->length) - return FALSE; - /* Here the _drbd_uuid_ functions are right, current should _not_ be rotated into the history */ if (get_ldev_if_state(mdev, D_NEGOTIATING)) { @@ -3428,14 +3366,14 @@ static int receive_sync_uuid(struct drbd_conf *mdev, struct p_header80 *h) enum receive_bitmap_ret { OK, DONE, FAILED }; static enum receive_bitmap_ret -receive_bitmap_plain(struct drbd_conf *mdev, struct p_header80 *h, - unsigned long *buffer, struct bm_xfer_ctx *c) +receive_bitmap_plain(struct drbd_conf *mdev, unsigned int data_size, + unsigned long *buffer, struct bm_xfer_ctx *c) { unsigned num_words = min_t(size_t, BM_PACKET_WORDS, c->bm_words - c->word_offset); unsigned want = num_words * sizeof(long); - if (want != h->length) { - dev_err(DEV, "%s:want (%u) != h->length (%u)\n", __func__, want, h->length); + if (want != data_size) { + dev_err(DEV, "%s:want (%u) != data_size (%u)\n", __func__, want, data_size); return FAILED; } if (want == 0) @@ -3571,12 +3509,13 @@ void INFO_bm_xfer_stats(struct drbd_conf *mdev, in order to be agnostic to the 32 vs 64 bits issue. returns 0 on failure, 1 if we successfully received it. */ -static int receive_bitmap(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { struct bm_xfer_ctx c; void *buffer; enum receive_bitmap_ret ret; int ok = FALSE; + struct p_header80 *h = &mdev->data.rbuf.header.h80; wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); @@ -3596,21 +3535,21 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header80 *h) }; do { - if (h->command == P_BITMAP) { - ret = receive_bitmap_plain(mdev, h, buffer, &c); - } else if (h->command == P_COMPRESSED_BITMAP) { + if (cmd == P_BITMAP) { + ret = receive_bitmap_plain(mdev, data_size, buffer, &c); + } else if (cmd == P_COMPRESSED_BITMAP) { /* MAYBE: sanity check that we speak proto >= 90, * and the feature is enabled! */ struct p_compressed_bm *p; - if (h->length > BM_PACKET_PAYLOAD_BYTES) { + if (data_size > BM_PACKET_PAYLOAD_BYTES) { dev_err(DEV, "ReportCBitmap packet too large\n"); goto out; } /* use the page buff */ p = buffer; memcpy(p, h, sizeof(*h)); - if (drbd_recv(mdev, p->head.payload, h->length) != h->length) + if (drbd_recv(mdev, p->head.payload, data_size) != data_size) goto out; if (p->head.length <= (sizeof(*p) - sizeof(p->head))) { dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", p->head.length); @@ -3618,17 +3557,17 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header80 *h) } ret = decode_bitmap_c(mdev, p, &c); } else { - dev_warn(DEV, "receive_bitmap: h->command neither ReportBitMap nor ReportCBitMap (is 0x%x)", h->command); + dev_warn(DEV, "receive_bitmap: cmd neither ReportBitMap nor ReportCBitMap (is 0x%x)", cmd); goto out; } - c.packets[h->command == P_BITMAP]++; - c.bytes[h->command == P_BITMAP] += sizeof(struct p_header80) + h->length; + c.packets[cmd == P_BITMAP]++; + c.bytes[cmd == P_BITMAP] += sizeof(struct p_header80) + data_size; if (ret != OK) break; - if (!drbd_recv_header(mdev, h)) + if (!drbd_recv_header(mdev, &cmd, &data_size)) goto out; } while (ret == OK); if (ret == FAILED) @@ -3659,17 +3598,16 @@ static int receive_bitmap(struct drbd_conf *mdev, struct p_header80 *h) return ok; } -static int receive_skip_(struct drbd_conf *mdev, struct p_header80 *h, int silent) +static int receive_skip(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { /* TODO zero copy sink :) */ static char sink[128]; int size, want, r; - if (!silent) - dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", - h->command, h->length); + dev_warn(DEV, "skipping unknown optional packet type %d, l: %d!\n", + cmd, data_size); - size = h->length; + size = data_size; while (size > 0) { want = min_t(int, size, sizeof(sink)); r = drbd_recv(mdev, sink, want); @@ -3679,17 +3617,7 @@ static int receive_skip_(struct drbd_conf *mdev, struct p_header80 *h, int silen return size == 0; } -static int receive_skip(struct drbd_conf *mdev, struct p_header80 *h) -{ - return receive_skip_(mdev, h, 0); -} - -static int receive_skip_silent(struct drbd_conf *mdev, struct p_header80 *h) -{ - return receive_skip_(mdev, h, 1); -} - -static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header80 *h) +static int receive_UnplugRemote(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { if (mdev->state.disk >= D_INCONSISTENT) drbd_kick_lo(mdev); @@ -3701,73 +3629,91 @@ static int receive_UnplugRemote(struct drbd_conf *mdev, struct p_header80 *h) return TRUE; } -typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, struct p_header80 *); - -static drbd_cmd_handler_f drbd_default_handler[] = { - [P_DATA] = receive_Data, - [P_DATA_REPLY] = receive_DataReply, - [P_RS_DATA_REPLY] = receive_RSDataReply, - [P_BARRIER] = receive_Barrier, - [P_BITMAP] = receive_bitmap, - [P_COMPRESSED_BITMAP] = receive_bitmap, - [P_UNPLUG_REMOTE] = receive_UnplugRemote, - [P_DATA_REQUEST] = receive_DataRequest, - [P_RS_DATA_REQUEST] = receive_DataRequest, - [P_SYNC_PARAM] = receive_SyncParam, - [P_SYNC_PARAM89] = receive_SyncParam, - [P_PROTOCOL] = receive_protocol, - [P_UUIDS] = receive_uuids, - [P_SIZES] = receive_sizes, - [P_STATE] = receive_state, - [P_STATE_CHG_REQ] = receive_req_state, - [P_SYNC_UUID] = receive_sync_uuid, - [P_OV_REQUEST] = receive_DataRequest, - [P_OV_REPLY] = receive_DataRequest, - [P_CSUM_RS_REQUEST] = receive_DataRequest, - [P_DELAY_PROBE] = receive_skip_silent, +typedef int (*drbd_cmd_handler_f)(struct drbd_conf *, enum drbd_packets cmd, unsigned int to_receive); + +struct data_cmd { + int expect_payload; + size_t pkt_size; + drbd_cmd_handler_f function; +}; + +static struct data_cmd drbd_cmd_handler[] = { + [P_DATA] = { 1, sizeof(struct p_data), receive_Data }, + [P_DATA_REPLY] = { 1, sizeof(struct p_data), receive_DataReply }, + [P_RS_DATA_REPLY] = { 1, sizeof(struct p_data), receive_RSDataReply } , + [P_BARRIER] = { 0, sizeof(struct p_barrier), receive_Barrier } , + [P_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } , + [P_COMPRESSED_BITMAP] = { 1, sizeof(struct p_header80), receive_bitmap } , + [P_UNPLUG_REMOTE] = { 0, sizeof(struct p_header80), receive_UnplugRemote }, + [P_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, + [P_RS_DATA_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, + [P_SYNC_PARAM] = { 1, sizeof(struct p_header80), receive_SyncParam }, + [P_SYNC_PARAM89] = { 1, sizeof(struct p_header80), receive_SyncParam }, + [P_PROTOCOL] = { 1, sizeof(struct p_protocol), receive_protocol }, + [P_UUIDS] = { 0, sizeof(struct p_uuids), receive_uuids }, + [P_SIZES] = { 0, sizeof(struct p_sizes), receive_sizes }, + [P_STATE] = { 0, sizeof(struct p_state), receive_state }, + [P_STATE_CHG_REQ] = { 0, sizeof(struct p_req_state), receive_req_state }, + [P_SYNC_UUID] = { 0, sizeof(struct p_rs_uuid), receive_sync_uuid }, + [P_OV_REQUEST] = { 0, sizeof(struct p_block_req), receive_DataRequest }, + [P_OV_REPLY] = { 1, sizeof(struct p_block_req), receive_DataRequest }, + [P_CSUM_RS_REQUEST] = { 1, sizeof(struct p_block_req), receive_DataRequest }, + [P_DELAY_PROBE] = { 0, sizeof(struct p_delay_probe93), receive_skip }, /* anything missing from this table is in * the asender_tbl, see get_asender_cmd */ - [P_MAX_CMD] = NULL, + [P_MAX_CMD] = { 0, 0, NULL }, }; -static drbd_cmd_handler_f *drbd_cmd_handler = drbd_default_handler; -static drbd_cmd_handler_f *drbd_opt_cmd_handler; +/* All handler functions that expect a sub-header get that sub-heder in + mdev->data.rbuf.header.head.payload. + + Usually in mdev->data.rbuf.header.head the callback can find the usual + p_header, but they may not rely on that. Since there is also p_header95 ! + */ static void drbdd(struct drbd_conf *mdev) { - drbd_cmd_handler_f handler; - struct p_header80 *header = &mdev->data.rbuf.header; + union p_header *header = &mdev->data.rbuf.header; + unsigned int packet_size; + enum drbd_packets cmd; + size_t shs; /* sub header size */ + int rv; while (get_t_state(&mdev->receiver) == Running) { drbd_thread_current_set_cpu(mdev); - if (!drbd_recv_header(mdev, header)) { - drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); - break; + if (!drbd_recv_header(mdev, &cmd, &packet_size)) + goto err_out; + + if (unlikely(cmd >= P_MAX_CMD || !drbd_cmd_handler[cmd].function)) { + dev_err(DEV, "unknown packet type %d, l: %d!\n", cmd, packet_size); + goto err_out; } - if (header->command < P_MAX_CMD) - handler = drbd_cmd_handler[header->command]; - else if (P_MAY_IGNORE < header->command - && header->command < P_MAX_OPT_CMD) - handler = drbd_opt_cmd_handler[header->command-P_MAY_IGNORE]; - else if (header->command > P_MAX_OPT_CMD) - handler = receive_skip; - else - handler = NULL; + shs = drbd_cmd_handler[cmd].pkt_size - sizeof(union p_header); + rv = drbd_recv(mdev, &header->h80.payload, shs); + if (unlikely(rv != shs)) { + dev_err(DEV, "short read while reading sub header: rv=%d\n", rv); + goto err_out; + } - if (unlikely(!handler)) { - dev_err(DEV, "unknown packet type %d, l: %d!\n", - header->command, header->length); - drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); - break; + if (packet_size - shs > 0 && !drbd_cmd_handler[cmd].expect_payload) { + dev_err(DEV, "No payload expected %s l:%d\n", cmdname(cmd), packet_size); + goto err_out; } - if (unlikely(!handler(mdev, header))) { + + rv = drbd_cmd_handler[cmd].function(mdev, cmd, packet_size - shs); + + if (unlikely(!rv)) { dev_err(DEV, "error receiving %s, l: %d!\n", - cmdname(header->command), header->length); - drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); - break; + cmdname(cmd), packet_size); + goto err_out; } } + + if (0) { + err_out: + drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); + } } void drbd_flush_workqueue(struct drbd_conf *mdev) @@ -3980,27 +3926,28 @@ static int drbd_do_handshake(struct drbd_conf *mdev) { /* ASSERT current == mdev->receiver ... */ struct p_handshake *p = &mdev->data.rbuf.handshake; - const int expect = sizeof(struct p_handshake) - -sizeof(struct p_header80); + const int expect = sizeof(struct p_handshake) - sizeof(struct p_header80); + unsigned int length; + enum drbd_packets cmd; int rv; rv = drbd_send_handshake(mdev); if (!rv) return 0; - rv = drbd_recv_header(mdev, &p->head); + rv = drbd_recv_header(mdev, &cmd, &length); if (!rv) return 0; - if (p->head.command != P_HAND_SHAKE) { + if (cmd != P_HAND_SHAKE) { dev_err(DEV, "expected HandShake packet, received: %s (0x%04x)\n", - cmdname(p->head.command), p->head.command); + cmdname(cmd), cmd); return -1; } - if (p->head.length != expect) { + if (length != expect) { dev_err(DEV, "expected HandShake length: %u, received: %u\n", - expect, p->head.length); + expect, length); return -1; } @@ -4058,10 +4005,11 @@ static int drbd_do_auth(struct drbd_conf *mdev) char *response = NULL; char *right_response = NULL; char *peers_ch = NULL; - struct p_header80 p; unsigned int key_len = strlen(mdev->net_conf->shared_secret); unsigned int resp_size; struct hash_desc desc; + enum drbd_packets cmd; + unsigned int length; int rv; desc.tfm = mdev->cram_hmac_tfm; @@ -4081,33 +4029,33 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - rv = drbd_recv_header(mdev, &p); + rv = drbd_recv_header(mdev, &cmd, &length); if (!rv) goto fail; - if (p.command != P_AUTH_CHALLENGE) { + if (cmd != P_AUTH_CHALLENGE) { dev_err(DEV, "expected AuthChallenge packet, received: %s (0x%04x)\n", - cmdname(p.command), p.command); + cmdname(cmd), cmd); rv = 0; goto fail; } - if (p.length > CHALLENGE_LEN*2) { + if (length > CHALLENGE_LEN * 2) { dev_err(DEV, "expected AuthChallenge payload too big.\n"); rv = -1; goto fail; } - peers_ch = kmalloc(p.length, GFP_NOIO); + peers_ch = kmalloc(length, GFP_NOIO); if (peers_ch == NULL) { dev_err(DEV, "kmalloc of peers_ch failed\n"); rv = -1; goto fail; } - rv = drbd_recv(mdev, peers_ch, p.length); + rv = drbd_recv(mdev, peers_ch, length); - if (rv != p.length) { + if (rv != length) { dev_err(DEV, "short read AuthChallenge: l=%u\n", rv); rv = 0; goto fail; @@ -4122,7 +4070,7 @@ static int drbd_do_auth(struct drbd_conf *mdev) } sg_init_table(&sg, 1); - sg_set_buf(&sg, peers_ch, p.length); + sg_set_buf(&sg, peers_ch, length); rv = crypto_hash_digest(&desc, &sg, sg.length, response); if (rv) { @@ -4135,18 +4083,18 @@ static int drbd_do_auth(struct drbd_conf *mdev) if (!rv) goto fail; - rv = drbd_recv_header(mdev, &p); + rv = drbd_recv_header(mdev, &cmd, &length); if (!rv) goto fail; - if (p.command != P_AUTH_RESPONSE) { + if (cmd != P_AUTH_RESPONSE) { dev_err(DEV, "expected AuthResponse packet, received: %s (0x%04x)\n", - cmdname(p.command), p.command); + cmdname(cmd), cmd); rv = 0; goto fail; } - if (p.length != resp_size) { + if (length != resp_size) { dev_err(DEV, "expected AuthResponse payload of wrong size\n"); rv = 0; goto fail; @@ -4474,9 +4422,8 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) return TRUE; } -static int got_something_to_ignore_m(struct drbd_conf *mdev, struct p_header80 *h) +static int got_skip(struct drbd_conf *mdev, struct p_header80 *h) { - /* IGNORE */ return TRUE; } @@ -4504,7 +4451,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) [P_BARRIER_ACK] = { sizeof(struct p_barrier_ack), got_BarrierAck }, [P_STATE_CHG_REPLY] = { sizeof(struct p_req_state_reply), got_RqSReply }, [P_RS_IS_IN_SYNC] = { sizeof(struct p_block_ack), got_IsInSync }, - [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_something_to_ignore_m }, + [P_DELAY_PROBE] = { sizeof(struct p_delay_probe93), got_skip }, [P_MAX_CMD] = { 0, NULL }, }; if (cmd > P_MAX_CMD || asender_tbl[cmd].process == NULL) @@ -4515,7 +4462,7 @@ static struct asender_cmd *get_asender_cmd(int cmd) int drbd_asender(struct drbd_thread *thi) { struct drbd_conf *mdev = thi->mdev; - struct p_header80 *h = &mdev->meta.rbuf.header; + struct p_header80 *h = &mdev->meta.rbuf.header.h80; struct asender_cmd *cmd = NULL; int rv, len; -- cgit v1.2.3 From d53733893dc43f4ebb5be510863c5debf0f8990b Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Mon, 23 Aug 2010 15:18:33 +0200 Subject: drbd: Actually allow BIOs up to 128k (was 32k). Now we have multiple BIOs per ee, packets with a 32 bit length field, it gets time to use these goodies. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index fe308644a63c..4249117f1f67 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -925,6 +925,11 @@ retry: drbd_thread_start(&mdev->asender); + if (mdev->agreed_pro_version < 95 && get_ldev(mdev)) { + drbd_setup_queue_param(mdev, DRBD_MAX_SIZE_H80_PACKET); + put_ldev(mdev); + } + if (!drbd_send_protocol(mdev)) return -1; drbd_send_sync_param(mdev, &mdev->sync_conf); -- cgit v1.2.3 From 63106d3c6c769b6219bd04edde513b12abae3f61 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 1 Sep 2010 15:47:15 +0200 Subject: drbd: Removed a race that could cause unexpected execution of w_make_resync_request() The actual race happened int the drbd_start_resync() function. Where drbd_resync_finished() -> __drbd_set_state() set STOP_SYNC_TIMER and armed the timer. If the timer fired before execution reaches the mod_timer statement at the end of drbd_start_resync() the latter would cause an unexpected call to w_make_resync_request(). Removed the STOP_SYNC_TIMER bit, and base it on the connection state. The STOP_SYNC_TIMER bit probably originates probably the time before the state engine. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4249117f1f67..885471ded2fb 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3803,7 +3803,6 @@ static void drbd_disconnect(struct drbd_conf *mdev) /* make sure syncer is stopped and w_resume_next_sg queued */ del_timer_sync(&mdev->resync_timer); - set_bit(STOP_SYNC_TIMER, &mdev->flags); resync_timer_fn((unsigned long)mdev); /* wait for all w_e_end_data_req, w_e_end_rsdata_req, w_send_barrier, -- cgit v1.2.3 From 76d2e7eca8e7675c6d7a6592f9e747b121cc8a87 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 25 Aug 2010 11:58:05 +0200 Subject: drbd: Adding support for BIO/Request flags: REQ_FUA, REQ_FLUSH and REQ_DISCARD Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 20 ++++++++++++++------ 1 file changed, 14 insertions(+), 6 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 885471ded2fb..e96fbb04ea4d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1753,6 +1753,18 @@ static int drbd_wait_peer_seq(struct drbd_conf *mdev, const u32 packet_seq) return ret; } +static unsigned long write_flags_to_bio(struct drbd_conf *mdev, u32 dpf) +{ + if (mdev->agreed_pro_version >= 95) + return (dpf & DP_RW_SYNC ? REQ_SYNC : 0) | + (dpf & DP_UNPLUG ? REQ_UNPLUG : 0) | + (dpf & DP_FUA ? REQ_FUA : 0) | + (dpf & DP_FLUSH ? REQ_FUA : 0) | + (dpf & DP_DISCARD ? REQ_DISCARD : 0); + else + return dpf & DP_RW_SYNC ? (REQ_SYNC | REQ_UNPLUG) : 0; +} + /* mirrored write */ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { @@ -1818,12 +1830,8 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_unlock(&mdev->epoch_lock); dp_flags = be32_to_cpu(p->dp_flags); - if (dp_flags & DP_HARDBARRIER) { - dev_err(DEV, "ASSERT FAILED would have submitted barrier request\n"); - /* rw |= REQ_HARDBARRIER; */ - } - if (dp_flags & DP_RW_SYNC) - rw |= REQ_SYNC | REQ_UNPLUG; + rw |= write_flags_to_bio(mdev, dp_flags); + if (dp_flags & DP_MAY_SET_IN_SYNC) e->flags |= EE_MAY_SET_IN_SYNC; -- cgit v1.2.3 From 435f07402b3165b90592073bc0f8c6f8fa160ff9 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 6 Sep 2010 12:30:25 +0200 Subject: drbd: don't count sendpage()d pages only referenced by tcp as in use Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 26 ++++++++++++++++---------- 1 file changed, 16 insertions(+), 10 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e96fbb04ea4d..2c3edf0ac5ca 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -241,7 +241,7 @@ static void drbd_kick_lo_and_reclaim_net(struct drbd_conf *mdev) spin_unlock_irq(&mdev->req_lock); list_for_each_entry_safe(e, t, &reclaimed, w.list) - drbd_free_ee(mdev, e); + drbd_free_net_ee(mdev, e); } /** @@ -298,9 +298,11 @@ static struct page *drbd_pp_alloc(struct drbd_conf *mdev, unsigned number, bool * Is also used from inside an other spin_lock_irq(&mdev->req_lock); * Either links the page chain back to the global pool, * or returns all pages to the system. */ -static void drbd_pp_free(struct drbd_conf *mdev, struct page *page) +static void drbd_pp_free(struct drbd_conf *mdev, struct page *page, int is_net) { + atomic_t *a = is_net ? &mdev->pp_in_use_by_net : &mdev->pp_in_use; int i; + if (drbd_pp_vacant > (DRBD_MAX_SEGMENT_SIZE/PAGE_SIZE)*minor_count) i = page_chain_free(page); else { @@ -311,10 +313,10 @@ static void drbd_pp_free(struct drbd_conf *mdev, struct page *page) drbd_pp_vacant += i; spin_unlock(&drbd_pp_lock); } - atomic_sub(i, &mdev->pp_in_use); - i = atomic_read(&mdev->pp_in_use); + i = atomic_sub_return(i, a); if (i < 0) - dev_warn(DEV, "ASSERTION FAILED: pp_in_use: %d < 0\n", i); + dev_warn(DEV, "ASSERTION FAILED: %s: %d < 0\n", + is_net ? "pp_in_use_by_net" : "pp_in_use", i); wake_up(&drbd_pp_wait); } @@ -374,11 +376,11 @@ struct drbd_epoch_entry *drbd_alloc_ee(struct drbd_conf *mdev, return NULL; } -void drbd_free_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e) +void drbd_free_some_ee(struct drbd_conf *mdev, struct drbd_epoch_entry *e, int is_net) { if (e->flags & EE_HAS_DIGEST) kfree(e->digest); - drbd_pp_free(mdev, e->pages); + drbd_pp_free(mdev, e->pages, is_net); D_ASSERT(atomic_read(&e->pending_bios) == 0); D_ASSERT(hlist_unhashed(&e->colision)); mempool_free(e, drbd_ee_mempool); @@ -389,13 +391,14 @@ int drbd_release_ee(struct drbd_conf *mdev, struct list_head *list) LIST_HEAD(work_list); struct drbd_epoch_entry *e, *t; int count = 0; + int is_net = list == &mdev->net_ee; spin_lock_irq(&mdev->req_lock); list_splice_init(list, &work_list); spin_unlock_irq(&mdev->req_lock); list_for_each_entry_safe(e, t, &work_list, w.list) { - drbd_free_ee(mdev, e); + drbd_free_some_ee(mdev, e, is_net); count++; } return count; @@ -424,7 +427,7 @@ static int drbd_process_done_ee(struct drbd_conf *mdev) spin_unlock_irq(&mdev->req_lock); list_for_each_entry_safe(e, t, &reclaimed, w.list) - drbd_free_ee(mdev, e); + drbd_free_net_ee(mdev, e); /* possible callbacks here: * e_end_block, and e_end_resync_block, e_send_discard_ack. @@ -1460,7 +1463,7 @@ static int drbd_drain_block(struct drbd_conf *mdev, int data_size) data_size -= rr; } kunmap(page); - drbd_pp_free(mdev, page); + drbd_pp_free(mdev, page, 0); return rv; } @@ -3879,6 +3882,9 @@ static void drbd_disconnect(struct drbd_conf *mdev) i = drbd_release_ee(mdev, &mdev->net_ee); if (i) dev_info(DEV, "net_ee not empty, killed %u entries\n", i); + i = atomic_read(&mdev->pp_in_use_by_net); + if (i) + dev_info(DEV, "pp_in_use_by_net = %d, expected 0\n", i); i = atomic_read(&mdev->pp_in_use); if (i) dev_info(DEV, "pp_in_use = %d, expected 0\n", i); -- cgit v1.2.3 From 1d53f09e170e477de67babd7a10e277479260d51 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Sun, 5 Sep 2010 01:13:24 +0200 Subject: drbd: fix potential kernel BUG (NULL deref) BUG trace would look like: lc_find drbd_rs_complete_io got_OVResult drbd_asender Could be triggered by explicit, or IO-error policy based, detach during online-verify. We may only dereference mdev->resync, if we first get_ldev(), as the disk may break any time, causing mdev->resync to disappear once all ldev references have been returned. Already in flight online-verify requests or replies may still come in, which we then need to ignore. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 15 +++++++++++---- 1 file changed, 11 insertions(+), 4 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 2c3edf0ac5ca..e4e4eddf04f2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4241,10 +4241,13 @@ static int got_IsInSync(struct drbd_conf *mdev, struct p_header80 *h) update_peer_seq(mdev, be32_to_cpu(p->seq_num)); - drbd_rs_complete_io(mdev, sector); - drbd_set_in_sync(mdev, sector, blksize); - /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ - mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); + if (get_ldev(mdev)) { + drbd_rs_complete_io(mdev, sector); + drbd_set_in_sync(mdev, sector, blksize); + /* rs_same_csums is supposed to count in units of BM_BLOCK_SIZE */ + mdev->rs_same_csum += (blksize >> BM_BLOCK_SHIFT); + put_ldev(mdev); + } dec_rs_pending(mdev); atomic_add(blksize >> 9, &mdev->rs_sect_in); @@ -4423,6 +4426,9 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) else ov_oos_print(mdev); + if (!get_ldev(mdev)) + return TRUE; + drbd_rs_complete_io(mdev, sector); dec_rs_pending(mdev); @@ -4437,6 +4443,7 @@ static int got_OVResult(struct drbd_conf *mdev, struct p_header80 *h) drbd_resync_finished(mdev); } } + put_ldev(mdev); return TRUE; } -- cgit v1.2.3 From a821cc4a9a8d9e67356f9a5cfc1206aa3cfd30f7 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Mon, 6 Sep 2010 12:31:37 +0200 Subject: drbd: fix spurious protocol error If we cannot satisfy a request (because our disk just broke), we still need to drain the payload. Or we'll get a protocol error when interpreting the payload as DRBD packet header. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index e4e4eddf04f2..983e49cbd233 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2085,7 +2085,8 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un "no local data.\n"); drbd_send_ack_rp(mdev, cmd == P_DATA_REQUEST ? P_NEG_DREPLY : P_NEG_RS_DREPLY , p); - return TRUE; + /* drain possibly payload */ + return drbd_drain_block(mdev, digest_size); } /* GFP_NOIO, because we must not cause arbitrary write-out: in a DRBD -- cgit v1.2.3 From fb22c402ffdf61dd121795b5809de587185d5240 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 8 Sep 2010 23:20:21 +0200 Subject: drbd: Track the reasons to suspend IO in dedicated state bits There are three ways to get IO suspended: * Loss of any access to data * Fence-peer-handler running * User requested to suspend IO Track those in different bits, so that one condition clearing its state bit does not interfere with the other two conditions. Only when the user resumes IO he overrules all three bits. The fact is hidden from the user, he sees only a single suspend bit. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 983e49cbd233..6b69b2f734dc 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3315,7 +3315,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) ns.disk = mdev->new_state_tmp.disk; cs_flags = CS_VERBOSE + (oconn < C_CONNECTED && nconn >= C_CONNECTED ? 0 : CS_HARD); - if (ns.pdsk == D_CONSISTENT && ns.susp && nconn == C_CONNECTED && oconn < C_CONNECTED && + if (ns.pdsk == D_CONSISTENT && is_susp(ns) && nconn == C_CONNECTED && oconn < C_CONNECTED && test_bit(NEW_CUR_UUID, &mdev->flags)) { /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this for temporal network outages! */ @@ -3829,7 +3829,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) kfree(mdev->p_uuid); mdev->p_uuid = NULL; - if (!mdev->state.susp) + if (!is_susp(mdev->state)) tl_clear(mdev); dev_info(DEV, "Connection closed\n"); @@ -3858,7 +3858,7 @@ static void drbd_disconnect(struct drbd_conf *mdev) if (os.conn == C_DISCONNECTING) { wait_event(mdev->net_cnt_wait, atomic_read(&mdev->net_cnt) == 0); - if (!mdev->state.susp) { + if (!is_susp(mdev->state)) { /* we must not free the tl_hash * while application io is still on the fly */ wait_event(mdev->misc_wait, !atomic_read(&mdev->ap_bio_cnt)); -- cgit v1.2.3 From 8979d9c9e0bc8e54cf5bd7a89abb2145f087b5e1 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 14 Sep 2010 15:56:29 +0200 Subject: drbd: protocol compatibility for maximum packet sizes Two missing corner cases to the "maximum packet size" handshake. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 6b69b2f734dc..9da32ac62c22 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3088,6 +3088,8 @@ static int receive_sizes(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (mdev->agreed_pro_version < 94) max_seg_s = be32_to_cpu(p->max_segment_size); + else if (mdev->agreed_pro_version == 94) + max_seg_s = DRBD_MAX_SIZE_H80_PACKET; else /* drbd 8.3.8 onwards */ max_seg_s = DRBD_MAX_SEGMENT_SIZE; -- cgit v1.2.3 From 004352fa60345e499379af310de73a2df1a5762a Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 5 Oct 2010 20:13:58 +0200 Subject: drbd: Fix regression in recv_bm_rle_bits (compressed bitmap) We used to be16_to_cpu the length field in our received packet header. drbd commit 17c854fea474a5eb3cfa12e4fb019e46debbc4ec drbd: receiving of big packets, for payloads between 64kByte and 4GByte changed this, but forgot to adjust a few places where we relied on h->length being in native byte order. This broke the receiving side of the RLE compressed bitmap exchange. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 27 +++++++++++++++------------ 1 file changed, 15 insertions(+), 12 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 9da32ac62c22..b5d3fa6c7a8b 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -970,9 +970,10 @@ static int drbd_recv_header(struct drbd_conf *mdev, enum drbd_packets *cmd, unsi *cmd = be16_to_cpu(h->h95.command); *packet_size = be32_to_cpu(h->h95.length); } else { - dev_err(DEV, "magic?? on data m: 0x%lx c: %d l: %d\n", - (long)be32_to_cpu(h->h80.magic), - h->h80.command, h->h80.length); + dev_err(DEV, "magic?? on data m: 0x%08x c: %d l: %d\n", + be32_to_cpu(h->h80.magic), + be16_to_cpu(h->h80.command), + be16_to_cpu(h->h80.length)); return FALSE; } mdev->last_received = jiffies; @@ -3421,7 +3422,7 @@ recv_bm_rle_bits(struct drbd_conf *mdev, u64 tmp; unsigned long s = c->bit_offset; unsigned long e; - int len = p->head.length - (sizeof(*p) - sizeof(p->head)); + int len = be16_to_cpu(p->head.length) - (sizeof(*p) - sizeof(p->head)); int toggle = DCBP_get_start(p); int have; int bits; @@ -3570,8 +3571,8 @@ static int receive_bitmap(struct drbd_conf *mdev, enum drbd_packets cmd, unsigne memcpy(p, h, sizeof(*h)); if (drbd_recv(mdev, p->head.payload, data_size) != data_size) goto out; - if (p->head.length <= (sizeof(*p) - sizeof(p->head))) { - dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", p->head.length); + if (data_size <= (sizeof(*p) - sizeof(p->head))) { + dev_err(DEV, "ReportCBitmap packet too small (l:%u)\n", data_size); return FAILED; } ret = decode_bitmap_c(mdev, p, &c); @@ -4582,17 +4583,19 @@ int drbd_asender(struct drbd_thread *thi) if (received == expect && cmd == NULL) { if (unlikely(h->magic != BE_DRBD_MAGIC)) { - dev_err(DEV, "magic?? on meta m: 0x%lx c: %d l: %d\n", - (long)be32_to_cpu(h->magic), - h->command, h->length); + dev_err(DEV, "magic?? on meta m: 0x%08x c: %d l: %d\n", + be32_to_cpu(h->magic), + be16_to_cpu(h->command), + be16_to_cpu(h->length)); goto reconnect; } cmd = get_asender_cmd(be16_to_cpu(h->command)); len = be16_to_cpu(h->length); if (unlikely(cmd == NULL)) { - dev_err(DEV, "unknown command?? on meta m: 0x%lx c: %d l: %d\n", - (long)be32_to_cpu(h->magic), - h->command, h->length); + dev_err(DEV, "unknown command?? on meta m: 0x%08x c: %d l: %d\n", + be32_to_cpu(h->magic), + be16_to_cpu(h->command), + be16_to_cpu(h->length)); goto disconnect; } expect = cmd->pkt_size; -- cgit v1.2.3 From 2b2bf2148fd46874ee72a877c951e5c6675d1caa Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 6 Oct 2010 11:46:55 +0200 Subject: drbd: drbd_send_ack_dp must not rely on header information drbd commit 17c854fea474a5eb3cfa12e4fb019e46debbc4ec drbd: receiving of big packets, for payloads between 64kByte and 4GByte introduced a new on-the-wire packet header format. We must no longer assume either format, but use the result of whatever drbd_recv_header has decoded. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index b5d3fa6c7a8b..45a2d610ca1d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1630,7 +1630,7 @@ static int receive_RSDataReply(struct drbd_conf *mdev, enum drbd_packets cmd, un ok = drbd_drain_block(mdev, data_size); - drbd_send_ack_dp(mdev, P_NEG_ACK, p); + drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); } atomic_add(data_size >> 9, &mdev->rs_sect_in); @@ -1787,7 +1787,7 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned mdev->peer_seq++; spin_unlock(&mdev->peer_seq_lock); - drbd_send_ack_dp(mdev, P_NEG_ACK, p); + drbd_send_ack_dp(mdev, P_NEG_ACK, p, data_size); atomic_inc(&mdev->current_epoch->epoch_size); return drbd_drain_block(mdev, data_size); } -- cgit v1.2.3 From 4ac4aadacb5badc45679cd94cd362132daafe8c4 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 22 Jul 2010 17:39:26 +0200 Subject: drbd: preparation commit, using full state in receive_state() no functional change, just using full state instead of just the .conn part of it for comparisons. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 39 ++++++++++++++++++-------------------- 1 file changed, 18 insertions(+), 21 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 45a2d610ca1d..585049dfb711 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3242,8 +3242,7 @@ static int receive_req_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsi static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned int data_size) { struct p_state *p = &mdev->data.rbuf.state; - enum drbd_conns nconn, oconn; - union drbd_state ns, peer_state; + union drbd_state os, ns, peer_state; enum drbd_disk_state real_peer_disk; enum chg_state_flags cs_flags; int rv; @@ -3258,38 +3257,38 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned spin_lock_irq(&mdev->req_lock); retry: - oconn = nconn = mdev->state.conn; + os = ns = mdev->state; spin_unlock_irq(&mdev->req_lock); - if (nconn == C_WF_REPORT_PARAMS) - nconn = C_CONNECTED; + if (ns.conn == C_WF_REPORT_PARAMS) + ns.conn = C_CONNECTED; if (mdev->p_uuid && peer_state.disk >= D_NEGOTIATING && get_ldev_if_state(mdev, D_NEGOTIATING)) { int cr; /* consider resync */ /* if we established a new connection */ - cr = (oconn < C_CONNECTED); + cr = (os.conn < C_CONNECTED); /* if we had an established connection * and one of the nodes newly attaches a disk */ - cr |= (oconn == C_CONNECTED && + cr |= (os.conn == C_CONNECTED && (peer_state.disk == D_NEGOTIATING || - mdev->state.disk == D_NEGOTIATING)); + os.disk == D_NEGOTIATING)); /* if we have both been inconsistent, and the peer has been * forced to be UpToDate with --overwrite-data */ cr |= test_bit(CONSIDER_RESYNC, &mdev->flags); /* if we had been plain connected, and the admin requested to * start a sync by "invalidate" or "invalidate-remote" */ - cr |= (oconn == C_CONNECTED && + cr |= (os.conn == C_CONNECTED && (peer_state.conn >= C_STARTING_SYNC_S && peer_state.conn <= C_WF_BITMAP_T)); if (cr) - nconn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk); + ns.conn = drbd_sync_handshake(mdev, peer_state.role, real_peer_disk); put_ldev(mdev); - if (nconn == C_MASK) { - nconn = C_CONNECTED; + if (ns.conn == C_MASK) { + ns.conn = C_CONNECTED; if (mdev->state.disk == D_NEGOTIATING) { drbd_force_state(mdev, NS(disk, D_DISKLESS)); } else if (peer_state.disk == D_NEGOTIATING) { @@ -3299,7 +3298,7 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } else { if (test_and_clear_bit(CONN_DRY_RUN, &mdev->flags)) return FALSE; - D_ASSERT(oconn == C_WF_REPORT_PARAMS); + D_ASSERT(os.conn == C_WF_REPORT_PARAMS); drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); return FALSE; } @@ -3307,18 +3306,16 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned } spin_lock_irq(&mdev->req_lock); - if (mdev->state.conn != oconn) + if (mdev->state.i != os.i) goto retry; clear_bit(CONSIDER_RESYNC, &mdev->flags); - ns.i = mdev->state.i; - ns.conn = nconn; ns.peer = peer_state.role; ns.pdsk = real_peer_disk; ns.peer_isp = (peer_state.aftr_isp | peer_state.user_isp); - if ((nconn == C_CONNECTED || nconn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) + if ((ns.conn == C_CONNECTED || ns.conn == C_WF_BITMAP_S) && ns.disk == D_NEGOTIATING) ns.disk = mdev->new_state_tmp.disk; - cs_flags = CS_VERBOSE + (oconn < C_CONNECTED && nconn >= C_CONNECTED ? 0 : CS_HARD); - if (ns.pdsk == D_CONSISTENT && is_susp(ns) && nconn == C_CONNECTED && oconn < C_CONNECTED && + cs_flags = CS_VERBOSE + (os.conn < C_CONNECTED && ns.conn >= C_CONNECTED ? 0 : CS_HARD); + if (ns.pdsk == D_CONSISTENT && is_susp(ns) && ns.conn == C_CONNECTED && os.conn < C_CONNECTED && test_bit(NEW_CUR_UUID, &mdev->flags)) { /* Do not allow tl_restart(resend) for a rebooted peer. We can only allow this for temporal network outages! */ @@ -3339,8 +3336,8 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned return FALSE; } - if (oconn > C_WF_REPORT_PARAMS) { - if (nconn > C_CONNECTED && peer_state.conn <= C_CONNECTED && + if (os.conn > C_WF_REPORT_PARAMS) { + if (ns.conn > C_CONNECTED && peer_state.conn <= C_CONNECTED && peer_state.disk != D_NEGOTIATING ) { /* we want resync, peer has not yet decided to sync... */ /* Nowadays only used when forcing a node into primary role and -- cgit v1.2.3 From e9ef7bb6f9696471ddddf0065afac8b435e5d051 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 7 Oct 2010 15:55:39 +0200 Subject: drbd: allow for explicit resync-finished notifications Preparation patch so more drbd_send_state() usage on the peer will not confuse drbd in receive_state(). Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 34 ++++++++++++++++++++++++++++++++++ 1 file changed, 34 insertions(+) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 585049dfb711..990fe01afa50 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3260,6 +3260,40 @@ static int receive_state(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned os = ns = mdev->state; spin_unlock_irq(&mdev->req_lock); + /* peer says his disk is uptodate, while we think it is inconsistent, + * and this happens while we think we have a sync going on. */ + if (os.pdsk == D_INCONSISTENT && real_peer_disk == D_UP_TO_DATE && + os.conn > C_CONNECTED && os.disk == D_UP_TO_DATE) { + /* If we are (becoming) SyncSource, but peer is still in sync + * preparation, ignore its uptodate-ness to avoid flapping, it + * will change to inconsistent once the peer reaches active + * syncing states. + * It may have changed syncer-paused flags, however, so we + * cannot ignore this completely. */ + if (peer_state.conn > C_CONNECTED && + peer_state.conn < C_SYNC_SOURCE) + real_peer_disk = D_INCONSISTENT; + + /* if peer_state changes to connected at the same time, + * it explicitly notifies us that it finished resync. + * Maybe we should finish it up, too? */ + else if (os.conn >= C_SYNC_SOURCE && + peer_state.conn == C_CONNECTED) { + if (drbd_bm_total_weight(mdev) <= mdev->rs_failed) + drbd_resync_finished(mdev); + return TRUE; + } + } + + /* peer says his disk is inconsistent, while we think it is uptodate, + * and this happens while the peer still thinks we have a sync going on, + * but we think we are already done with the sync. + * We ignore this to avoid flapping pdsk. + * This should not happen, if the peer is a recent version of drbd. */ + if (os.pdsk == D_UP_TO_DATE && real_peer_disk == D_INCONSISTENT && + os.conn == C_CONNECTED && peer_state.conn > C_SYNC_SOURCE) + real_peer_disk = D_UP_TO_DATE; + if (ns.conn == C_WF_REPORT_PARAMS) ns.conn = C_CONNECTED; -- cgit v1.2.3 From 22cc37a943832c948808884604ec6f5ff2594c1d Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Tue, 14 Sep 2010 20:40:41 +0200 Subject: drbd: fix unlikely access after free and list corruption Various cleanup paths have been incomplete, for the very unlikely case that we cannot allocate enough bios from process context when submitting on behalf of the peer or resync process. Never observed. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 990fe01afa50..71775a9de21d 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -1573,6 +1573,13 @@ static int recv_resync_read(struct drbd_conf *mdev, sector_t sector, int data_si if (drbd_submit_ee(mdev, e, WRITE, DRBD_FAULT_RS_WR) == 0) return TRUE; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + spin_unlock_irq(&mdev->req_lock); + drbd_free_ee(mdev, e); fail: put_ldev(mdev); @@ -1998,6 +2005,16 @@ static int receive_Data(struct drbd_conf *mdev, enum drbd_packets cmd, unsigned if (drbd_submit_ee(mdev, e, rw, DRBD_FAULT_DT_WR) == 0) return TRUE; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + hlist_del_init(&e->colision); + spin_unlock_irq(&mdev->req_lock); + if (e->flags & EE_CALL_AL_COMPLETE_IO) + drbd_al_complete_io(mdev, e->sector); + out_interrupted: /* yes, the epoch_size now is imbalanced. * but we drop the connection anyways, so we don't have a chance to @@ -2202,6 +2219,14 @@ submit: if (drbd_submit_ee(mdev, e, READ, fault_type) == 0) return TRUE; + /* drbd_submit_ee currently fails for one reason only: + * not being able to allocate enough bios. + * Is dropping the connection going to help? */ + spin_lock_irq(&mdev->req_lock); + list_del(&e->w.list); + spin_unlock_irq(&mdev->req_lock); + /* no drbd_rs_complete_io(), we are dropping the connection anyways */ + out_free_e: put_ldev(mdev); drbd_free_ee(mdev, e); -- cgit v1.2.3 From b18b37befb37810ce50e1a9b0a6206dfe363d827 Mon Sep 17 00:00:00 2001 From: Philipp Reisner Date: Wed, 13 Oct 2010 15:32:44 +0200 Subject: drbd: Do not log an ASSERT for P_OV_REQUEST packets while C_CONNECTED This might happen if on the VERIFY_S node the disk gets dropped. Although this is an cluster wide state transition, the VERIFY_T node, updates it connection state first. Then the ack packet for the cluster wide state transition travels back, and the VERIFY_S node stops to produce the P_OV_REQUEST packets. There is absolutely nothing wrong with that. Further, do not log "Can not satisfy peer's..." on the VERIFY_S node in this case, but pretend that they had equal checksum. [Bugz 327] Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 30 ++++++++++++++++++++++-------- 1 file changed, 22 insertions(+), 8 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 71775a9de21d..4dc6a8870526 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -2079,7 +2079,7 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un const sector_t capacity = drbd_get_capacity(mdev->this_bdev); struct drbd_epoch_entry *e; struct digest_info *di = NULL; - int size; + int size, verb; unsigned int fault_type; struct p_block_req *p = &mdev->data.rbuf.block_req; @@ -2098,11 +2098,29 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un } if (!get_ldev_if_state(mdev, D_UP_TO_DATE)) { - if (__ratelimit(&drbd_ratelimit_state)) + verb = 1; + switch (cmd) { + case P_DATA_REQUEST: + drbd_send_ack_rp(mdev, P_NEG_DREPLY, p); + break; + case P_RS_DATA_REQUEST: + case P_CSUM_RS_REQUEST: + case P_OV_REQUEST: + drbd_send_ack_rp(mdev, P_NEG_RS_DREPLY , p); + break; + case P_OV_REPLY: + verb = 0; + dec_rs_pending(mdev); + drbd_send_ack_ex(mdev, P_OV_RESULT, sector, size, ID_IN_SYNC); + break; + default: + dev_err(DEV, "unexpected command (%s) in receive_DataRequest\n", + cmdname(cmd)); + } + if (verb && __ratelimit(&drbd_ratelimit_state)) dev_err(DEV, "Can not satisfy peer's read request, " "no local data.\n"); - drbd_send_ack_rp(mdev, cmd == P_DATA_REQUEST ? P_NEG_DREPLY : - P_NEG_RS_DREPLY , p); + /* drain possibly payload */ return drbd_drain_block(mdev, digest_size); } @@ -2157,10 +2175,6 @@ static int receive_DataRequest(struct drbd_conf *mdev, enum drbd_packets cmd, un break; case P_OV_REQUEST: - if (mdev->state.conn >= C_CONNECTED && - mdev->state.conn != C_VERIFY_T) - dev_warn(DEV, "ASSERT FAILED: got P_OV_REQUEST while being %s\n", - drbd_conn_str(mdev->state.conn)); if (mdev->ov_start_sector == ~(sector_t)0 && mdev->agreed_pro_version >= 90) { mdev->ov_start_sector = sector; -- cgit v1.2.3 From 0f8488e1608b6e30e705460f8110888c645f7f9f Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Wed, 13 Oct 2010 18:19:23 +0200 Subject: drbd: cleanup useless leftover warn/error printk's Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 4dc6a8870526..5a4b6dcd48f2 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -4591,10 +4591,8 @@ int drbd_asender(struct drbd_thread *thi) while (1) { clear_bit(SIGNAL_ASENDER, &mdev->flags); flush_signals(current); - if (!drbd_process_done_ee(mdev)) { - dev_err(DEV, "process_done_ee() = NOT_OK\n"); + if (!drbd_process_done_ee(mdev)) goto reconnect; - } /* to avoid race with newly queued ACKs */ set_bit(SIGNAL_ASENDER, &mdev->flags); spin_lock_irq(&mdev->req_lock); -- cgit v1.2.3 From 856c50c7b616d50e1a3ccd4ce35f7814650fa594 Mon Sep 17 00:00:00 2001 From: Lars Ellenberg Date: Thu, 14 Oct 2010 13:37:40 +0200 Subject: drbd: add some more explicit drbd_md_sync It sometimes may take a while for the after state change work to be scheduled, which does drbd_md_sync. At convenient places, we should do explicit drbd_md_sync to have the new state information on disk as soon as possible. Signed-off-by: Philipp Reisner Signed-off-by: Lars Ellenberg --- drivers/block/drbd/drbd_receiver.c | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'drivers/block/drbd/drbd_receiver.c') diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 5a4b6dcd48f2..6ec922c623a1 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -3804,6 +3804,9 @@ static void drbdd(struct drbd_conf *mdev) err_out: drbd_force_state(mdev, NS(conn, C_PROTOCOL_ERROR)); } + /* If we leave here, we probably want to update at least the + * "Connected" indicator on stable storage. Do so explicitly here. */ + drbd_md_sync(mdev); } void drbd_flush_workqueue(struct drbd_conf *mdev) @@ -4685,10 +4688,12 @@ int drbd_asender(struct drbd_thread *thi) if (0) { reconnect: drbd_force_state(mdev, NS(conn, C_NETWORK_FAILURE)); + drbd_md_sync(mdev); } if (0) { disconnect: drbd_force_state(mdev, NS(conn, C_DISCONNECTING)); + drbd_md_sync(mdev); } clear_bit(SIGNAL_ASENDER, &mdev->flags); -- cgit v1.2.3