From dfaa2ef68e80c378e610e3c8c536f1c239e8d3ef Mon Sep 17 00:00:00 2001 From: Lukas Czerner Date: Fri, 19 Aug 2011 14:50:46 +0200 Subject: loop: add discard support for loop devices This commit adds discard support for loop devices. Discard is usually supported by SSD and thinly provisioned devices as a method for reclaiming unused space. This is no different than trying to reclaim back space which is not used by the file system on the image, but it still occupies space on the host file system. We can do the reclamation on file system which does support hole punching. So when discard request gets to the loop driver we can translate that to punch a hole to the underlying file, hence reclaim the free space. This is very useful for trimming down the size of the image to only what is really used by the file system on that image. Fstrim may be used for that purpose. It has been tested on ext4, xfs and btrfs with the image file systems ext4, ext3, xfs and btrfs. ext4, or ext6 image on ext4 file system has some problems but it seems that ext4 punch hole implementation is somewhat flawed and it is unrelated to this commit. Also this is a very good method of validating file systems punch hole implementation. Note that when encryption is used, discard support is disabled, because using it might leak some information useful for possible attacker. Signed-off-by: Lukas Czerner Reviewed-by: Jeff Moyer Signed-off-by: Jens Axboe --- drivers/block/loop.c | 54 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 54 insertions(+) (limited to 'drivers/block/loop.c') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 76c8da78212b..936cac3c3126 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -75,6 +75,7 @@ #include #include #include +#include #include @@ -484,6 +485,29 @@ static int do_bio_filebacked(struct loop_device *lo, struct bio *bio) } } + /* + * We use punch hole to reclaim the free space used by the + * image a.k.a. discard. However we do support discard if + * encryption is enabled, because it may give an attacker + * useful information. + */ + if (bio->bi_rw & REQ_DISCARD) { + struct file *file = lo->lo_backing_file; + int mode = FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE; + + if ((!file->f_op->fallocate) || + lo->lo_encrypt_key_size) { + ret = -EOPNOTSUPP; + goto out; + } + ret = file->f_op->fallocate(file, mode, pos, + bio->bi_size); + if (unlikely(ret && ret != -EINVAL && + ret != -EOPNOTSUPP)) + ret = -EIO; + goto out; + } + ret = lo_send(lo, bio, pos); if ((bio->bi_rw & REQ_FUA) && !ret) { @@ -814,6 +838,35 @@ static void loop_sysfs_exit(struct loop_device *lo) &loop_attribute_group); } +static void loop_config_discard(struct loop_device *lo) +{ + struct file *file = lo->lo_backing_file; + struct inode *inode = file->f_mapping->host; + struct request_queue *q = lo->lo_queue; + + /* + * We use punch hole to reclaim the free space used by the + * image a.k.a. discard. However we do support discard if + * encryption is enabled, because it may give an attacker + * useful information. + */ + if ((!file->f_op->fallocate) || + lo->lo_encrypt_key_size) { + q->limits.discard_granularity = 0; + q->limits.discard_alignment = 0; + q->limits.max_discard_sectors = 0; + q->limits.discard_zeroes_data = 0; + queue_flag_clear_unlocked(QUEUE_FLAG_DISCARD, q); + return; + } + + q->limits.discard_granularity = inode->i_sb->s_blocksize; + q->limits.discard_alignment = inode->i_sb->s_blocksize; + q->limits.max_discard_sectors = UINT_MAX >> 9; + q->limits.discard_zeroes_data = 1; + queue_flag_set_unlocked(QUEUE_FLAG_DISCARD, q); +} + static int loop_set_fd(struct loop_device *lo, fmode_t mode, struct block_device *bdev, unsigned int arg) { @@ -1090,6 +1143,7 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) if (figure_loop_size(lo)) return -EFBIG; } + loop_config_discard(lo); memcpy(lo->lo_file_name, info->lo_file_name, LO_NAME_SIZE); memcpy(lo->lo_crypt_name, info->lo_crypt_name, LO_NAME_SIZE); -- cgit v1.2.3 From e03c8dd14915fabc101aa495828d58598dc5af98 Mon Sep 17 00:00:00 2001 From: Kay Sievers Date: Tue, 23 Aug 2011 20:12:04 +0200 Subject: loop: always allow userspace partitions and optionally support automatic scanning Automatic partition scanning can be requested individually per loop device during its setup by setting LO_FLAGS_PARTSCAN. By default, no partition tables are scanned. Userspace can now always add and remove partitions from all loop devices, regardless if the in-kernel partition scanner is enabled or not. The needed partition minor numbers are allocated from the extended minors space, the main loop device numbers will continue to match the loop minors, regardless of the number of partitions used. # grep . /sys/class/block/loop1/loop/* /sys/block/loop1/loop/autoclear:0 /sys/block/loop1/loop/backing_file:/home/kay/data/stuff/part.img /sys/block/loop1/loop/offset:0 /sys/block/loop1/loop/partscan:1 /sys/block/loop1/loop/sizelimit:0 # ls -l /dev/loop* brw-rw---- 1 root disk 7, 0 Aug 14 20:22 /dev/loop0 brw-rw---- 1 root disk 7, 1 Aug 14 20:23 /dev/loop1 brw-rw---- 1 root disk 259, 0 Aug 14 20:23 /dev/loop1p1 brw-rw---- 1 root disk 259, 1 Aug 14 20:23 /dev/loop1p2 brw-rw---- 1 root disk 7, 99 Aug 14 20:23 /dev/loop99 brw-rw---- 1 root disk 259, 2 Aug 14 20:23 /dev/loop99p1 brw-rw---- 1 root disk 259, 3 Aug 14 20:23 /dev/loop99p2 crw------T 1 root root 10, 237 Aug 14 20:22 /dev/loop-control Cc: Karel Zak Cc: Davidlohr Bueso Acked-By: Tejun Heo Signed-off-by: Kay Sievers Signed-off-by: Jens Axboe --- drivers/block/loop.c | 49 +++++++++++++++++++++++++++++++++++++++++++++---- 1 file changed, 45 insertions(+), 4 deletions(-) (limited to 'drivers/block/loop.c') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index 936cac3c3126..b336433f8157 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -724,7 +724,7 @@ static int loop_change_fd(struct loop_device *lo, struct block_device *bdev, goto out_putf; fput(old_file); - if (max_part > 0) + if (lo->lo_flags & LO_FLAGS_PARTSCAN) ioctl_by_bdev(bdev, BLKRRPART, 0); return 0; @@ -808,16 +808,25 @@ static ssize_t loop_attr_autoclear_show(struct loop_device *lo, char *buf) return sprintf(buf, "%s\n", autoclear ? "1" : "0"); } +static ssize_t loop_attr_partscan_show(struct loop_device *lo, char *buf) +{ + int partscan = (lo->lo_flags & LO_FLAGS_PARTSCAN); + + return sprintf(buf, "%s\n", partscan ? "1" : "0"); +} + LOOP_ATTR_RO(backing_file); LOOP_ATTR_RO(offset); LOOP_ATTR_RO(sizelimit); LOOP_ATTR_RO(autoclear); +LOOP_ATTR_RO(partscan); static struct attribute *loop_attrs[] = { &loop_attr_backing_file.attr, &loop_attr_offset.attr, &loop_attr_sizelimit.attr, &loop_attr_autoclear.attr, + &loop_attr_partscan.attr, NULL, }; @@ -979,7 +988,9 @@ static int loop_set_fd(struct loop_device *lo, fmode_t mode, } lo->lo_state = Lo_bound; wake_up_process(lo->lo_thread); - if (max_part > 0) + if (part_shift) + lo->lo_flags |= LO_FLAGS_PARTSCAN; + if (lo->lo_flags & LO_FLAGS_PARTSCAN) ioctl_by_bdev(bdev, BLKRRPART, 0); return 0; @@ -1070,7 +1081,6 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) lo->lo_offset = 0; lo->lo_sizelimit = 0; lo->lo_encrypt_key_size = 0; - lo->lo_flags = 0; lo->lo_thread = NULL; memset(lo->lo_encrypt_key, 0, LO_KEY_SIZE); memset(lo->lo_crypt_name, 0, LO_NAME_SIZE); @@ -1088,8 +1098,11 @@ static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) lo->lo_state = Lo_unbound; /* This is safe: open() is still holding a reference. */ module_put(THIS_MODULE); - if (max_part > 0 && bdev) + if (lo->lo_flags & LO_FLAGS_PARTSCAN && bdev) ioctl_by_bdev(bdev, BLKRRPART, 0); + lo->lo_flags = 0; + if (!part_shift) + lo->lo_disk->flags |= GENHD_FL_NO_PART_SCAN; mutex_unlock(&lo->lo_ctl_mutex); /* * Need not hold lo_ctl_mutex to fput backing file. @@ -1159,6 +1172,13 @@ loop_set_status(struct loop_device *lo, const struct loop_info64 *info) (info->lo_flags & LO_FLAGS_AUTOCLEAR)) lo->lo_flags ^= LO_FLAGS_AUTOCLEAR; + if ((info->lo_flags & LO_FLAGS_PARTSCAN) && + !(lo->lo_flags & LO_FLAGS_PARTSCAN)) { + lo->lo_flags |= LO_FLAGS_PARTSCAN; + lo->lo_disk->flags &= ~GENHD_FL_NO_PART_SCAN; + ioctl_by_bdev(lo->lo_device, BLKRRPART, 0); + } + lo->lo_encrypt_key_size = info->lo_encrypt_key_size; lo->lo_init[0] = info->lo_init[0]; lo->lo_init[1] = info->lo_init[1]; @@ -1654,6 +1674,27 @@ static struct loop_device *loop_alloc(int i) if (!disk) goto out_free_queue; + /* + * Disable partition scanning by default. The in-kernel partition + * scanning can be requested individually per-device during its + * setup. Userspace can always add and remove partitions from all + * devices. The needed partition minors are allocated from the + * extended minor space, the main loop device numbers will continue + * to match the loop minors, regardless of the number of partitions + * used. + * + * If max_part is given, partition scanning is globally enabled for + * all loop devices. The minors for the main loop devices will be + * multiples of max_part. + * + * Note: Global-for-all-devices, set-only-at-init, read-only module + * parameteters like 'max_loop' and 'max_part' make things needlessly + * complicated, are too static, inflexible and may surprise + * userspace tools. Parameters like this in general should be avoided. + */ + if (!part_shift) + disk->flags |= GENHD_FL_NO_PART_SCAN; + disk->flags |= GENHD_FL_EXT_DEVT; mutex_init(&lo->lo_ctl_mutex); lo->lo_number = i; lo->lo_thread = NULL; -- cgit v1.2.3 From 8a9c594422ecad912d6470888acdee9a1236ad68 Mon Sep 17 00:00:00 2001 From: Phillip Susi Date: Wed, 21 Sep 2011 10:02:13 +0200 Subject: drivers/block/loop.c: emit uevent on auto release The loopback driver failed to emit the change uevent when auto releasing the device. Fixed lo_release() to pass the bdev to loop_clr_fd() so it can emit the event. Signed-off-by: Phillip Susi Cc: Jens Axboe Cc: Ayan George Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/loop.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'drivers/block/loop.c') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index b336433f8157..c2ce03cf3a58 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1583,7 +1583,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode) * In autoclear mode, stop the loop thread * and remove configuration after last close. */ - err = loop_clr_fd(lo, NULL); + err = loop_clr_fd(lo, lo->lo_device); if (!err) goto out_unlocked; } else { -- cgit v1.2.3 From 4c823cc3d568277aa6340d8df6981e34f4c4dee5 Mon Sep 17 00:00:00 2001 From: Ayan George Date: Wed, 21 Sep 2011 10:02:13 +0200 Subject: drivers/block/loop.c: remove unnecessary bdev argument from loop_clr_fd() If the loop device is associated (lo->lo_state == Lo_bound), it will have a valid bdev pointed to by lo->lo_device. There is no reason to ever pass an additional block_device pointer. Signed-off-by: Ayan George Cc: Phillip Susi Cc: Jens Axboe Signed-off-by: Andrew Morton Signed-off-by: Jens Axboe --- drivers/block/loop.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'drivers/block/loop.c') diff --git a/drivers/block/loop.c b/drivers/block/loop.c index c2ce03cf3a58..9b2f5d3c19ab 100644 --- a/drivers/block/loop.c +++ b/drivers/block/loop.c @@ -1051,10 +1051,11 @@ loop_init_xfer(struct loop_device *lo, struct loop_func_table *xfer, return err; } -static int loop_clr_fd(struct loop_device *lo, struct block_device *bdev) +static int loop_clr_fd(struct loop_device *lo) { struct file *filp = lo->lo_backing_file; gfp_t gfp = lo->old_gfp_mask; + struct block_device *bdev = lo->lo_device; if (lo->lo_state != Lo_bound) return -ENXIO; @@ -1372,7 +1373,7 @@ static int lo_ioctl(struct block_device *bdev, fmode_t mode, break; case LOOP_CLR_FD: /* loop_clr_fd would have unlocked lo_ctl_mutex on success */ - err = loop_clr_fd(lo, bdev); + err = loop_clr_fd(lo); if (!err) goto out_unlocked; break; @@ -1583,7 +1584,7 @@ static int lo_release(struct gendisk *disk, fmode_t mode) * In autoclear mode, stop the loop thread * and remove configuration after last close. */ - err = loop_clr_fd(lo, lo->lo_device); + err = loop_clr_fd(lo); if (!err) goto out_unlocked; } else { -- cgit v1.2.3