From 1196f8b814f32cd04df334abf47648c2a9fd8324 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Apr 2011 20:54:45 +0200 Subject: block: rescan partitions on invalidated devices on -ENOMEDIA too __blkdev_get() doesn't rescan partitions if disk->fops->open() fails, which leads to ghost partition devices lingering after medimum removal is known to both the kernel and userland. The behavior also creates a subtle inconsistency where O_NONBLOCK open, which doesn't fail even if there's no medium, clears the ghots partitions, which is exploited to work around the problem from userland. Fix it by updating __blkdev_get() to issue partition rescan after -ENOMEDIA too. This was reported in the following bz. https://bugzilla.kernel.org/show_bug.cgi?id=13029 Note for stable: 2.6.38 and later only Cc: stable@kernel.org Signed-off-by: Tejun Heo Reported-by: David Zeuthen Reported-by: Martin Pitt Reported-by: Kay Sievers Tested-by: Kay Sievers Cc: Alan Cox Signed-off-by: Jens Axboe --- fs/block_dev.c | 27 ++++++++++++++++++--------- 1 file changed, 18 insertions(+), 9 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 5147bdd3b8e1..257b00e98428 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1102,6 +1102,7 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) if (!bdev->bd_part) goto out_clear; + ret = 0; if (disk->fops->open) { ret = disk->fops->open(bdev, mode); if (ret == -ERESTARTSYS) { @@ -1118,9 +1119,18 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) put_disk(disk); goto restart; } - if (ret) - goto out_clear; } + /* + * If the device is invalidated, rescan partition + * if open succeeded or failed with -ENOMEDIUM. + * The latter is necessary to prevent ghost + * partitions on a removed medium. + */ + if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) + rescan_partitions(disk, bdev); + if (ret) + goto out_clear; + if (!bdev->bd_openers) { bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); bdi = blk_get_backing_dev_info(bdev); @@ -1128,8 +1138,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) bdi = &default_backing_dev_info; bdev_inode_switch_bdi(bdev->bd_inode, bdi); } - if (bdev->bd_invalidated) - rescan_partitions(disk, bdev); } else { struct block_device *whole; whole = bdget_disk(disk, 0); @@ -1153,13 +1161,14 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) } } else { if (bdev->bd_contains == bdev) { - if (bdev->bd_disk->fops->open) { + ret = 0; + if (bdev->bd_disk->fops->open) ret = bdev->bd_disk->fops->open(bdev, mode); - if (ret) - goto out_unlock_bdev; - } - if (bdev->bd_invalidated) + /* the same as first opener case, read comment there */ + if (bdev->bd_invalidated && (!ret || ret == -ENOMEDIUM)) rescan_partitions(bdev->bd_disk, bdev); + if (ret) + goto out_unlock_bdev; } /* only one opener holds refs to the module and disk */ module_put(disk->fops->owner); -- cgit v1.2.3 From d4dc210f69bcb0b4bef5a83b1c323817be89bad1 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Apr 2011 20:54:46 +0200 Subject: block: don't block events on excl write for non-optical devices Disk event code automatically blocks events on excl write. This is primarily to avoid issuing polling commands while burning is in progress. This behavior doesn't fit other types of devices with removeable media where polling commands don't have adverse side effects and door locking usually doesn't exist. This patch introduces new genhd flag which controls the auto-blocking behavior and uses it to enable auto-blocking only on optical devices. Note for stable: 2.6.38 and later only Cc: stable@kernel.org Signed-off-by: Tejun Heo Reported-by: Kay Sievers Signed-off-by: Jens Axboe --- fs/block_dev.c | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index 257b00e98428..d7c2e0fddc6f 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1237,6 +1237,8 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) res = __blkdev_get(bdev, mode, 0); if (whole) { + struct gendisk *disk = whole->bd_disk; + /* finish claiming */ mutex_lock(&bdev->bd_mutex); spin_lock(&bdev_lock); @@ -1263,15 +1265,16 @@ int blkdev_get(struct block_device *bdev, fmode_t mode, void *holder) spin_unlock(&bdev_lock); /* - * Block event polling for write claims. Any write - * holder makes the write_holder state stick until all - * are released. This is good enough and tracking - * individual writeable reference is too fragile given - * the way @mode is used in blkdev_get/put(). + * Block event polling for write claims if requested. Any + * write holder makes the write_holder state stick until + * all are released. This is good enough and tracking + * individual writeable reference is too fragile given the + * way @mode is used in blkdev_get/put(). */ - if (!res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { + if ((disk->flags & GENHD_FL_BLOCK_EVENTS_ON_EXCL_WRITE) && + !res && (mode & FMODE_WRITE) && !bdev->bd_write_holder) { bdev->bd_write_holder = true; - disk_block_events(bdev->bd_disk); + disk_block_events(disk); } mutex_unlock(&bdev->bd_mutex); -- cgit v1.2.3 From 23ceb5b7719e9276d4fa72a3ecf94dd396755276 Mon Sep 17 00:00:00 2001 From: Tao Ma Date: Fri, 6 May 2011 19:30:02 -0600 Subject: block: Remove extra discard_alignment from hd_struct. Currently, hd_struct.discard_alignment is only used when we show /sys/block/sdx/sdx/discard_alignment. So remove it and calculate when it is asked to show. Signed-off-by: Tao Ma Signed-off-by: Jens Axboe --- fs/partitions/check.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'fs') diff --git a/fs/partitions/check.c b/fs/partitions/check.c index d545e97d99c3..b7e16bccd5e5 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -255,7 +255,12 @@ ssize_t part_discard_alignment_show(struct device *dev, struct device_attribute *attr, char *buf) { struct hd_struct *p = dev_to_part(dev); - return sprintf(buf, "%u\n", p->discard_alignment); + struct gendisk *disk = dev_to_disk(dev); + + return sprintf(buf, "%u\n", + (unsigned long long)queue_limit_discard_alignment( + &disk->queue->limits, + p->start_sect)); } ssize_t part_stat_show(struct device *dev, @@ -449,8 +454,6 @@ struct hd_struct *add_partition(struct gendisk *disk, int partno, p->start_sect = start; p->alignment_offset = queue_limit_alignment_offset(&disk->queue->limits, start); - p->discard_alignment = - queue_limit_discard_alignment(&disk->queue->limits, start); p->nr_sects = len; p->partno = partno; p->policy = get_disk_ro(disk); -- cgit v1.2.3 From bbdd304cf66fbf2b4b2d28418dc619d443635e83 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Mon, 9 May 2011 08:28:13 +0200 Subject: fs: fixup warning part_discard_alignment_show() Stephen reports: ----- After merging the block tree, today's linux-next build (x86_64 allmodconfig) produced this warning: fs/partitions/check.c: In function 'part_discard_alignment_show': fs/partitions/check.c:263: warning: format '%u' expects type 'unsigned int', but argument 3 has type 'long long unsigned int' Introduced by commit ("block: Remove extra discard_alignment from hd_struct") ----- Fix it up by just removing the cast, we return an int already. Reported-by: Stephen Rothwell Signed-off-by: Jens Axboe --- fs/partitions/check.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'fs') diff --git a/fs/partitions/check.c b/fs/partitions/check.c index b7e16bccd5e5..8ed4d3433199 100644 --- a/fs/partitions/check.c +++ b/fs/partitions/check.c @@ -258,8 +258,7 @@ ssize_t part_discard_alignment_show(struct device *dev, struct gendisk *disk = dev_to_disk(dev); return sprintf(buf, "%u\n", - (unsigned long long)queue_limit_discard_alignment( - &disk->queue->limits, + queue_limit_discard_alignment(&disk->queue->limits, p->start_sect)); } -- cgit v1.2.3 From 7e69723fef8771a9d57bd27d36281d756130b4b5 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Mon, 23 May 2011 13:26:07 +0200 Subject: block: move bd_set_size() above rescan_partitions() in __blkdev_get() 02e352287a4 (block: rescan partitions on invalidated devices on -ENOMEDIA too) relocated partition rescan above explicit bd_set_size() to simplify condition check. As rescan_partitions() does its own bdev size setting, this doesn't break anything; however, rescan_partitions() prints out the following messages when adjusting bdev size, which can be confusing. sda: detected capacity change from 0 to 146815737856 sdb: detected capacity change from 0 to 146815737856 This patch restores the original order and remove the warning messages. stable: Please apply together with 02e352287a4 (block: rescan partitions on invalidated devices on -ENOMEDIA too). Signed-off-by: Tejun Heo Reported-by: Tony Luck Tested-by: Tony Luck Cc: stable@kernel.org Stable note: 2.6.39 only. Signed-off-by: Jens Axboe --- fs/block_dev.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'fs') diff --git a/fs/block_dev.c b/fs/block_dev.c index d7c2e0fddc6f..1f2b19978333 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -1120,6 +1120,15 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) goto restart; } } + + if (!ret && !bdev->bd_openers) { + bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); + bdi = blk_get_backing_dev_info(bdev); + if (bdi == NULL) + bdi = &default_backing_dev_info; + bdev_inode_switch_bdi(bdev->bd_inode, bdi); + } + /* * If the device is invalidated, rescan partition * if open succeeded or failed with -ENOMEDIUM. @@ -1130,14 +1139,6 @@ static int __blkdev_get(struct block_device *bdev, fmode_t mode, int for_part) rescan_partitions(disk, bdev); if (ret) goto out_clear; - - if (!bdev->bd_openers) { - bd_set_size(bdev,(loff_t)get_capacity(disk)<<9); - bdi = blk_get_backing_dev_info(bdev); - if (bdi == NULL) - bdi = &default_backing_dev_info; - bdev_inode_switch_bdi(bdev->bd_inode, bdi); - } } else { struct block_device *whole; whole = bdget_disk(disk, 0); -- cgit v1.2.3