From 9114d79569a3fb858a7ecb1f21cb1dec93dc2f21 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 19 Mar 2013 18:16:42 +0100
Subject: drbd: cleanup bogus assert message

This fixes ASSERT( mdev->state.disk == D_FAILED ) in drivers/block/drbd/drbd_main.c

When we detach from local disk, we let the local refcount hit zero twice.

First, we transition to D_FAILED, so we won't give out new references
to incoming requests; we still may give out *internal* references, though.
Once the refcount hits zero [1] while in D_FAILED, we queue a transition
to D_DISKLESS to our worker.  We need to queue it, because we may be in
atomic context when putting the reference.
Once the transition to D_DISKLESS actually happened [2] from worker context,
we don't give out new internal references either.

Between hitting zero the first time [1] and actually transition to
D_DISKLESS [2], there may be a few very short lived internal get/put,
so we may hit zero more than once while being in D_FAILED, or even see a
race where a an internal get_ldev() happened while D_FAILED, but the
corresponding put_ldev() happens just after the transition to D_DISKLESS.

That's why we have the additional test_and_set_bit(GO_DISKLESS,);
and that's why the assert was placed wrong.
Since there was exactly one code path left to drbd_go_diskless(),
and that checks already for D_FAILED, drop that assert,
and fold in the drbd_queue_work().

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_main.c | 7 -------
 1 file changed, 7 deletions(-)

(limited to 'drivers/block/drbd/drbd_main.c')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index e98da675f0c1..731a28eedc56 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3252,13 +3252,6 @@ static int w_go_diskless(struct drbd_work *w, int unused)
 	return 0;
 }
 
-void drbd_go_diskless(struct drbd_conf *mdev)
-{
-	D_ASSERT(mdev->state.disk == D_FAILED);
-	if (!test_and_set_bit(GO_DISKLESS, &mdev->flags))
-		drbd_queue_work(&mdev->tconn->sender_work, &mdev->go_diskless);
-}
-
 /**
  * drbd_queue_bitmap_io() - Queues an IO operation on the whole bitmap
  * @mdev:	DRBD device.
-- 
cgit v1.2.3


From ae8bf312e97d554b6aa32e7b2ceb993812ad0835 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 19 Mar 2013 18:16:43 +0100
Subject: drbd: cleanup ondisk meta data layout calculations and defines

Add a comment about our meta data layout variants,
and rename a few defines (e.g. MD_RESERVED_SECT -> MD_128MB_SECT)
to make it clear that they are short hand for fixed constants,
and not arbitrarily to be redefined as one may see fit.

Properly pad struct meta_data_on_disk to 4kB,
and initialize to zero not only the first 512 Byte,
but all of it in drbd_md_sync().

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_main.c | 11 ++++++++---
 1 file changed, 8 insertions(+), 3 deletions(-)

(limited to 'drivers/block/drbd/drbd_main.c')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 731a28eedc56..76faeab40c8f 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2834,6 +2834,7 @@ void conn_md_sync(struct drbd_tconn *tconn)
 	rcu_read_unlock();
 }
 
+/* aligned 4kByte */
 struct meta_data_on_disk {
 	u64 la_size;           /* last agreed size. */
 	u64 uuid[UI_SIZE];   /* UUIDs. */
@@ -2843,13 +2844,13 @@ struct meta_data_on_disk {
 	u32 magic;
 	u32 md_size_sect;
 	u32 al_offset;         /* offset to this block */
-	u32 al_nr_extents;     /* important for restoring the AL */
+	u32 al_nr_extents;     /* important for restoring the AL (userspace) */
 	      /* `-- act_log->nr_elements <-- ldev->dc.al_extents */
 	u32 bm_offset;         /* offset to the bitmap, from here */
 	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
 	u32 la_peer_max_bio_size;   /* last peer max_bio_size */
-	u32 reserved_u32[3];
 
+	u8 reserved_u8[4096 - (7*8 + 8*4)];
 } __packed;
 
 /**
@@ -2862,6 +2863,10 @@ void drbd_md_sync(struct drbd_conf *mdev)
 	sector_t sector;
 	int i;
 
+	/* Don't accidentally change the DRBD meta data layout. */
+	BUILD_BUG_ON(UI_SIZE != 4);
+	BUILD_BUG_ON(sizeof(struct meta_data_on_disk) != 4096);
+
 	del_timer(&mdev->md_sync_timer);
 	/* timer may be rearmed by drbd_md_mark_dirty() now. */
 	if (!test_and_clear_bit(MD_DIRTY, &mdev->flags))
@@ -2876,7 +2881,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
 	if (!buffer)
 		goto out;
 
-	memset(buffer, 0, 512);
+	memset(buffer, 0, sizeof(*buffer));
 
 	buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
-- 
cgit v1.2.3


From 3a4d4eb3cb03fbc66696fc8cd472701d56f3aee7 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 19 Mar 2013 18:16:44 +0100
Subject: drbd: prepare for new striped layout of activity log

Introduce two new on-disk meta data fields: al_stripes and al_stripe_size_4k
The intended use case is activity log on RAID 0 or similar.
Logically consecutive transactions will advance their on-disk position
by al_stripe_size_4k 4kB (transaction sized) blocks.

Right now, these are still asserted to be the backward compatible
values al_stripes = 1, al_stripe_size_4k = 8 (which amounts to 32kB).

Also introduce a caching member for meta_dev_idx in the in-core
structure: even though it is initially passed in in the rcu-protected
disk_conf structure, it cannot change without a detach/attach cycle.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_main.c | 77 +++++++++++++++++++++++++++++++++++++-----
 1 file changed, 69 insertions(+), 8 deletions(-)

(limited to 'drivers/block/drbd/drbd_main.c')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 76faeab40c8f..7a2e07b45ecf 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2850,7 +2850,11 @@ struct meta_data_on_disk {
 	u32 bm_bytes_per_bit;  /* BM_BLOCK_SIZE */
 	u32 la_peer_max_bio_size;   /* last peer max_bio_size */
 
-	u8 reserved_u8[4096 - (7*8 + 8*4)];
+	/* see al_tr_number_to_on_disk_sector() */
+	u32 al_stripes;
+	u32 al_stripe_size_4k;
+
+	u8 reserved_u8[4096 - (7*8 + 10*4)];
 } __packed;
 
 /**
@@ -2898,7 +2902,10 @@ void drbd_md_sync(struct drbd_conf *mdev)
 	buffer->bm_offset = cpu_to_be32(mdev->ldev->md.bm_offset);
 	buffer->la_peer_max_bio_size = cpu_to_be32(mdev->peer_max_bio_size);
 
-	D_ASSERT(drbd_md_ss__(mdev, mdev->ldev) == mdev->ldev->md.md_offset);
+	buffer->al_stripes = cpu_to_be32(mdev->ldev->md.al_stripes);
+	buffer->al_stripe_size_4k = cpu_to_be32(mdev->ldev->md.al_stripe_size_4k);
+
+	D_ASSERT(drbd_md_ss(mdev->ldev) == mdev->ldev->md.md_offset);
 	sector = mdev->ldev->md.md_offset;
 
 	if (drbd_md_sync_page_io(mdev, mdev->ldev, sector, WRITE)) {
@@ -2916,13 +2923,60 @@ out:
 	put_ldev(mdev);
 }
 
+static int check_activity_log_stripe_size(struct drbd_conf *mdev,
+		struct meta_data_on_disk *on_disk,
+		struct drbd_md *in_core)
+{
+	u32 al_stripes = be32_to_cpu(on_disk->al_stripes);
+	u32 al_stripe_size_4k = be32_to_cpu(on_disk->al_stripe_size_4k);
+	u64 al_size_4k;
+
+	/* both not set: default to old fixed size activity log */
+	if (al_stripes == 0 && al_stripe_size_4k == 0) {
+		al_stripes = 1;
+		al_stripe_size_4k = MD_32kB_SECT/8;
+	}
+
+	/* some paranoia plausibility checks */
+
+	/* we need both values to be set */
+	if (al_stripes == 0 || al_stripe_size_4k == 0)
+		goto err;
+
+	al_size_4k = (u64)al_stripes * al_stripe_size_4k;
+
+	/* Upper limit of activity log area, to avoid potential overflow
+	 * problems in al_tr_number_to_on_disk_sector(). As right now, more
+	 * than 72 * 4k blocks total only increases the amount of history,
+	 * limiting this arbitrarily to 16 GB is not a real limitation ;-)  */
+	if (al_size_4k > (16 * 1024 * 1024/4))
+		goto err;
+
+	/* Lower limit: we need at least 8 transaction slots (32kB)
+	 * to not break existing setups */
+	if (al_size_4k < MD_32kB_SECT/8)
+		goto err;
+
+	in_core->al_stripe_size_4k = al_stripe_size_4k;
+	in_core->al_stripes = al_stripes;
+	in_core->al_size_4k = al_size_4k;
+
+	return 0;
+err:
+	dev_err(DEV, "invalid activity log striping: al_stripes=%u, al_stripe_size_4k=%u\n",
+			al_stripes, al_stripe_size_4k);
+	return -EINVAL;
+}
+
 /**
  * drbd_md_read() - Reads in the meta data super block
  * @mdev:	DRBD device.
  * @bdev:	Device from which the meta data should be read in.
  *
- * Return 0 (NO_ERROR) on success, and an enum drbd_ret_code in case
+ * Return NO_ERROR on success, and an enum drbd_ret_code in case
  * something goes wrong.
+ *
+ * Called exactly once during drbd_adm_attach()
  */
 int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 {
@@ -2937,6 +2991,10 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	if (!buffer)
 		goto out;
 
+	/* First, figure out where our meta data superblock is located. */
+	bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
+	bdev->md.md_offset = drbd_md_ss(bdev);
+
 	if (drbd_md_sync_page_io(mdev, bdev, bdev->md.md_offset, READ)) {
 		/* NOTE: can't do normal error processing here as this is
 		   called BEFORE disk is attached */
@@ -2954,40 +3012,43 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 		rv = ERR_MD_UNCLEAN;
 		goto err;
 	}
+
+	rv = ERR_MD_INVALID;
 	if (magic != DRBD_MD_MAGIC_08) {
 		if (magic == DRBD_MD_MAGIC_07)
 			dev_err(DEV, "Found old (0.7) meta data magic. Did you \"drbdadm create-md\"?\n");
 		else
 			dev_err(DEV, "Meta data magic not found. Did you \"drbdadm create-md\"?\n");
-		rv = ERR_MD_INVALID;
 		goto err;
 	}
+
+	if (check_activity_log_stripe_size(mdev, buffer, &bdev->md))
+		goto err;
+
 	if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
 		dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
 		    be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
-		rv = ERR_MD_INVALID;
 		goto err;
 	}
 	if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
 		dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
 		    be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
-		rv = ERR_MD_INVALID;
 		goto err;
 	}
 	if (be32_to_cpu(buffer->md_size_sect) != bdev->md.md_size_sect) {
 		dev_err(DEV, "unexpected md_size: %u (expected %u)\n",
 		    be32_to_cpu(buffer->md_size_sect), bdev->md.md_size_sect);
-		rv = ERR_MD_INVALID;
 		goto err;
 	}
 
 	if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
 		dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
 		    be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
-		rv = ERR_MD_INVALID;
 		goto err;
 	}
 
+	rv = NO_ERROR;
+
 	bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
-- 
cgit v1.2.3


From cccac9857d624dab74b23bafe0482fcdd91df7d8 Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 19 Mar 2013 18:16:46 +0100
Subject: drbd: mechanically rename la_size to la_size_sect

Make it obvious that this value is in units of 512 Byte sectors.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_main.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

(limited to 'drivers/block/drbd/drbd_main.c')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 7a2e07b45ecf..6b956fc04dc8 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2836,7 +2836,7 @@ void conn_md_sync(struct drbd_tconn *tconn)
 
 /* aligned 4kByte */
 struct meta_data_on_disk {
-	u64 la_size;           /* last agreed size. */
+	u64 la_size_sect;      /* last agreed size. */
 	u64 uuid[UI_SIZE];   /* UUIDs. */
 	u64 device_uuid;
 	u64 reserved_u64_1;
@@ -2887,7 +2887,7 @@ void drbd_md_sync(struct drbd_conf *mdev)
 
 	memset(buffer, 0, sizeof(*buffer));
 
-	buffer->la_size = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
+	buffer->la_size_sect = cpu_to_be64(drbd_get_capacity(mdev->this_bdev));
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		buffer->uuid[i] = cpu_to_be64(mdev->ldev->md.uuid[i]);
 	buffer->flags = cpu_to_be32(mdev->ldev->md.flags);
@@ -3049,7 +3049,7 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 
 	rv = NO_ERROR;
 
-	bdev->md.la_size_sect = be64_to_cpu(buffer->la_size);
+	bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect);
 	for (i = UI_CURRENT; i < UI_SIZE; i++)
 		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
 	bdev->md.flags = be32_to_cpu(buffer->flags);
-- 
cgit v1.2.3


From c04ccaa669e147ffb66e4e74d82c7dbfc100ec5e Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Tue, 19 Mar 2013 18:16:47 +0100
Subject: drbd: read meta data early, base on-disk offsets on super block

We used to calculate all on-disk meta data offsets, and then compare
the stored offsets, basically treating them as magic numbers.

Now with the activity log striping, the activity log size is no longer
fixed.  We need to first read the super block, then base the activity
log and bitmap offsets on the stored offsets/al stripe settings.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_main.c | 131 +++++++++++++++++++++++++++++++++--------
 1 file changed, 107 insertions(+), 24 deletions(-)

(limited to 'drivers/block/drbd/drbd_main.c')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 6b956fc04dc8..e55271d6e7f6 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2968,6 +2968,86 @@ err:
 	return -EINVAL;
 }
 
+static int check_offsets_and_sizes(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
+{
+	sector_t capacity = drbd_get_capacity(bdev->md_bdev);
+	struct drbd_md *in_core = &bdev->md;
+	s32 on_disk_al_sect;
+	s32 on_disk_bm_sect;
+
+	/* The on-disk size of the activity log, calculated from offsets, and
+	 * the size of the activity log calculated from the stripe settings,
+	 * should match.
+	 * Though we could relax this a bit: it is ok, if the striped activity log
+	 * fits in the available on-disk activity log size.
+	 * Right now, that would break how resize is implemented.
+	 * TODO: make drbd_determine_dev_size() (and the drbdmeta tool) aware
+	 * of possible unused padding space in the on disk layout. */
+	if (in_core->al_offset < 0) {
+		if (in_core->bm_offset > in_core->al_offset)
+			goto err;
+		on_disk_al_sect = -in_core->al_offset;
+		on_disk_bm_sect = in_core->al_offset - in_core->bm_offset;
+	} else {
+		if (in_core->al_offset != MD_4kB_SECT)
+			goto err;
+		if (in_core->bm_offset < in_core->al_offset + in_core->al_size_4k * MD_4kB_SECT)
+			goto err;
+
+		on_disk_al_sect = in_core->bm_offset - MD_4kB_SECT;
+		on_disk_bm_sect = in_core->md_size_sect - in_core->bm_offset;
+	}
+
+	/* old fixed size meta data is exactly that: fixed. */
+	if (in_core->meta_dev_idx >= 0) {
+		if (in_core->md_size_sect != MD_128MB_SECT
+		||  in_core->al_offset != MD_4kB_SECT
+		||  in_core->bm_offset != MD_4kB_SECT + MD_32kB_SECT
+		||  in_core->al_stripes != 1
+		||  in_core->al_stripe_size_4k != MD_32kB_SECT/8)
+			goto err;
+	}
+
+	if (capacity < in_core->md_size_sect)
+		goto err;
+	if (capacity - in_core->md_size_sect < drbd_md_first_sector(bdev))
+		goto err;
+
+	/* should be aligned, and at least 32k */
+	if ((on_disk_al_sect & 7) || (on_disk_al_sect < MD_32kB_SECT))
+		goto err;
+
+	/* should fit (for now: exactly) into the available on-disk space;
+	 * overflow prevention is in check_activity_log_stripe_size() above. */
+	if (on_disk_al_sect != in_core->al_size_4k * MD_4kB_SECT)
+		goto err;
+
+	/* again, should be aligned */
+	if (in_core->bm_offset & 7)
+		goto err;
+
+	/* FIXME check for device grow with flex external meta data? */
+
+	/* can the available bitmap space cover the last agreed device size? */
+	if (on_disk_bm_sect < (in_core->la_size_sect+7)/MD_4kB_SECT/8/512)
+		goto err;
+
+	return 0;
+
+err:
+	dev_err(DEV, "meta data offsets don't make sense: idx=%d "
+			"al_s=%u, al_sz4k=%u, al_offset=%d, bm_offset=%d, "
+			"md_size_sect=%u, la_size=%llu, md_capacity=%llu\n",
+			in_core->meta_dev_idx,
+			in_core->al_stripes, in_core->al_stripe_size_4k,
+			in_core->al_offset, in_core->bm_offset, in_core->md_size_sect,
+			(unsigned long long)in_core->la_size_sect,
+			(unsigned long long)capacity);
+
+	return -EINVAL;
+}
+
+
 /**
  * drbd_md_read() - Reads in the meta data super block
  * @mdev:	DRBD device.
@@ -2976,7 +3056,8 @@ err:
  * Return NO_ERROR on success, and an enum drbd_ret_code in case
  * something goes wrong.
  *
- * Called exactly once during drbd_adm_attach()
+ * Called exactly once during drbd_adm_attach(), while still being D_DISKLESS,
+ * even before @bdev is assigned to @mdev->ldev.
  */
 int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 {
@@ -2984,14 +3065,15 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 	u32 magic, flags;
 	int i, rv = NO_ERROR;
 
-	if (!get_ldev_if_state(mdev, D_ATTACHING))
-		return ERR_IO_MD_DISK;
+	if (mdev->state.disk != D_DISKLESS)
+		return ERR_DISK_CONFIGURED;
 
 	buffer = drbd_md_get_buffer(mdev);
 	if (!buffer)
-		goto out;
+		return ERR_NOMEM;
 
-	/* First, figure out where our meta data superblock is located. */
+	/* First, figure out where our meta data superblock is located,
+	 * and read it. */
 	bdev->md.meta_dev_idx = bdev->disk_conf->meta_dev_idx;
 	bdev->md.md_offset = drbd_md_ss(bdev);
 
@@ -3022,14 +3104,29 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 		goto err;
 	}
 
-	if (check_activity_log_stripe_size(mdev, buffer, &bdev->md))
+	if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
+		dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
+		    be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
 		goto err;
+	}
 
-	if (be32_to_cpu(buffer->al_offset) != bdev->md.al_offset) {
-		dev_err(DEV, "unexpected al_offset: %d (expected %d)\n",
-		    be32_to_cpu(buffer->al_offset), bdev->md.al_offset);
+
+	/* convert to in_core endian */
+	bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect);
+	for (i = UI_CURRENT; i < UI_SIZE; i++)
+		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
+	bdev->md.flags = be32_to_cpu(buffer->flags);
+	bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
+
+	bdev->md.md_size_sect = be32_to_cpu(buffer->md_size_sect);
+	bdev->md.al_offset = be32_to_cpu(buffer->al_offset);
+	bdev->md.bm_offset = be32_to_cpu(buffer->bm_offset);
+
+	if (check_activity_log_stripe_size(mdev, buffer, &bdev->md))
 		goto err;
-	}
+	if (check_offsets_and_sizes(mdev, bdev))
+		goto err;
+
 	if (be32_to_cpu(buffer->bm_offset) != bdev->md.bm_offset) {
 		dev_err(DEV, "unexpected bm_offset: %d (expected %d)\n",
 		    be32_to_cpu(buffer->bm_offset), bdev->md.bm_offset);
@@ -3041,20 +3138,8 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 		goto err;
 	}
 
-	if (be32_to_cpu(buffer->bm_bytes_per_bit) != BM_BLOCK_SIZE) {
-		dev_err(DEV, "unexpected bm_bytes_per_bit: %u (expected %u)\n",
-		    be32_to_cpu(buffer->bm_bytes_per_bit), BM_BLOCK_SIZE);
-		goto err;
-	}
-
 	rv = NO_ERROR;
 
-	bdev->md.la_size_sect = be64_to_cpu(buffer->la_size_sect);
-	for (i = UI_CURRENT; i < UI_SIZE; i++)
-		bdev->md.uuid[i] = be64_to_cpu(buffer->uuid[i]);
-	bdev->md.flags = be32_to_cpu(buffer->flags);
-	bdev->md.device_uuid = be64_to_cpu(buffer->device_uuid);
-
 	spin_lock_irq(&mdev->tconn->req_lock);
 	if (mdev->state.conn < C_CONNECTED) {
 		unsigned int peer;
@@ -3066,8 +3151,6 @@ int drbd_md_read(struct drbd_conf *mdev, struct drbd_backing_dev *bdev)
 
  err:
 	drbd_md_put_buffer(mdev);
- out:
-	put_ldev(mdev);
 
 	return rv;
 }
-- 
cgit v1.2.3


From 113fef9e20e0d614b3f5940b67c96e719c559eea Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Fri, 22 Mar 2013 18:14:40 -0600
Subject: drbd: prepare to queue write requests on a submit worker

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_main.c | 26 +++++++++++++++++++++++++-
 1 file changed, 25 insertions(+), 1 deletion(-)

(limited to 'drivers/block/drbd/drbd_main.c')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index e55271d6e7f6..a150b59897a0 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -45,7 +45,7 @@
 #include <linux/reboot.h>
 #include <linux/notifier.h>
 #include <linux/kthread.h>
-
+#include <linux/workqueue.h>
 #define __KERNEL_SYSCALLS__
 #include <linux/unistd.h>
 #include <linux/vmalloc.h>
@@ -2300,6 +2300,7 @@ static void drbd_cleanup(void)
 	idr_for_each_entry(&minors, mdev, i) {
 		idr_remove(&minors, mdev_to_minor(mdev));
 		idr_remove(&mdev->tconn->volumes, mdev->vnr);
+		destroy_workqueue(mdev->submit.wq);
 		del_gendisk(mdev->vdisk);
 		/* synchronize_rcu(); No other threads running at this point */
 		kref_put(&mdev->kref, &drbd_minor_destroy);
@@ -2589,6 +2590,21 @@ void conn_destroy(struct kref *kref)
 	kfree(tconn);
 }
 
+int init_submitter(struct drbd_conf *mdev)
+{
+	/* opencoded create_singlethread_workqueue(),
+	 * to be able to say "drbd%d", ..., minor */
+	mdev->submit.wq = alloc_workqueue("drbd%u_submit",
+			WQ_UNBOUND | WQ_MEM_RECLAIM, 1, mdev->minor);
+	if (!mdev->submit.wq)
+		return -ENOMEM;
+
+	INIT_WORK(&mdev->submit.worker, do_submit);
+	spin_lock_init(&mdev->submit.lock);
+	INIT_LIST_HEAD(&mdev->submit.writes);
+	return 0;
+}
+
 enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor, int vnr)
 {
 	struct drbd_conf *mdev;
@@ -2678,6 +2694,12 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
 		goto out_idr_remove_minor;
 	}
 
+	if (init_submitter(mdev)) {
+		err = ERR_NOMEM;
+		drbd_msg_put_info("unable to create submit workqueue");
+		goto out_idr_remove_vol;
+	}
+
 	add_disk(disk);
 	kref_init(&mdev->kref); /* one ref for both idrs and the the add_disk */
 
@@ -2688,6 +2710,8 @@ enum drbd_ret_code conn_new_minor(struct drbd_tconn *tconn, unsigned int minor,
 
 	return NO_ERROR;
 
+out_idr_remove_vol:
+	idr_remove(&tconn->volumes, vnr_got);
 out_idr_remove_minor:
 	idr_remove(&minors, minor_got);
 	synchronize_rcu();
-- 
cgit v1.2.3


From bb45185de2e90af63a7bc48855de6f870cc216fc Mon Sep 17 00:00:00 2001
From: Philipp Reisner <philipp.reisner@linbit.com>
Date: Wed, 27 Mar 2013 14:08:39 +0100
Subject: drbd: fix spurious warning about bitmap being locked from detach

Introduced in drbd: always write bitmap on detach,
the bitmap bulk writeout on detach was indicating
it expected exclusive bitmap access.

Where I meant to say: expect no more modifications,
but testing/counting is still allowed.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_main.c | 6 +++++-
 1 file changed, 5 insertions(+), 1 deletion(-)

(limited to 'drivers/block/drbd/drbd_main.c')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index a150b59897a0..67d2bb3bb533 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -3412,8 +3412,12 @@ static int w_go_diskless(struct drbd_work *w, int unused)
 	 * end up here after a failed attach, before ldev was even assigned.
 	 */
 	if (mdev->bitmap && mdev->ldev) {
+		/* An interrupted resync or similar is allowed to recounts bits
+		 * while we detach.
+		 * Any modifications would not be expected anymore, though.
+		 */
 		if (drbd_bitmap_io_from_worker(mdev, drbd_bm_write,
-					"detach", BM_LOCKED_MASK)) {
+					"detach", BM_LOCKED_TEST_ALLOWED)) {
 			if (test_bit(WAS_READ_ERROR, &mdev->flags)) {
 				drbd_md_set_flag(mdev, MDF_FULL_SYNC);
 				drbd_md_sync(mdev);
-- 
cgit v1.2.3


From 94ad0a101415978be04945b2787be1e8e8a874db Mon Sep 17 00:00:00 2001
From: Lars Ellenberg <lars.ellenberg@linbit.com>
Date: Wed, 27 Mar 2013 14:08:42 +0100
Subject: drbd: fix memory leak

We forgot to free the disk_conf,
so for each attach/detach cycle we leaked 336 bytes.

Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
Signed-off-by: Jens Axboe <axboe@kernel.dk>
---
 drivers/block/drbd/drbd_main.c | 1 +
 1 file changed, 1 insertion(+)

(limited to 'drivers/block/drbd/drbd_main.c')

diff --git a/drivers/block/drbd/drbd_main.c b/drivers/block/drbd/drbd_main.c
index 67d2bb3bb533..1b93a7262ef7 100644
--- a/drivers/block/drbd/drbd_main.c
+++ b/drivers/block/drbd/drbd_main.c
@@ -2819,6 +2819,7 @@ void drbd_free_bc(struct drbd_backing_dev *ldev)
 	blkdev_put(ldev->backing_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 	blkdev_put(ldev->md_bdev, FMODE_READ | FMODE_WRITE | FMODE_EXCL);
 
+	kfree(ldev->disk_conf);
 	kfree(ldev);
 }
 
-- 
cgit v1.2.3