summaryrefslogtreecommitdiff
path: root/block
diff options
context:
space:
mode:
Diffstat (limited to 'block')
-rw-r--r--block/bdev.c70
-rw-r--r--block/bio-integrity.c17
-rw-r--r--block/blk-cgroup.c2
-rw-r--r--block/blk-settings.c8
-rw-r--r--block/blk-sysfs.c2
-rw-r--r--block/blk-throttle.h1
-rw-r--r--block/blk-zoned.c5
-rw-r--r--block/blk.h3
-rw-r--r--block/fops.c18
-rw-r--r--block/ioctl.c6
10 files changed, 99 insertions, 33 deletions
diff --git a/block/bdev.c b/block/bdev.c
index 4844d1e27b6f..889ec6e002d7 100644
--- a/block/bdev.c
+++ b/block/bdev.c
@@ -152,27 +152,65 @@ static void set_init_blocksize(struct block_device *bdev)
get_order(bsize));
}
-int set_blocksize(struct file *file, int size)
+/**
+ * bdev_validate_blocksize - check that this block size is acceptable
+ * @bdev: blockdevice to check
+ * @block_size: block size to check
+ *
+ * For block device users that do not use buffer heads or the block device
+ * page cache, make sure that this block size can be used with the device.
+ *
+ * Return: On success zero is returned, negative error code on failure.
+ */
+int bdev_validate_blocksize(struct block_device *bdev, int block_size)
{
- struct inode *inode = file->f_mapping->host;
- struct block_device *bdev = I_BDEV(inode);
-
- if (blk_validate_block_size(size))
+ if (blk_validate_block_size(block_size))
return -EINVAL;
/* Size cannot be smaller than the size supported by the device */
- if (size < bdev_logical_block_size(bdev))
+ if (block_size < bdev_logical_block_size(bdev))
return -EINVAL;
+ return 0;
+}
+EXPORT_SYMBOL_GPL(bdev_validate_blocksize);
+
+int set_blocksize(struct file *file, int size)
+{
+ struct inode *inode = file->f_mapping->host;
+ struct block_device *bdev = I_BDEV(inode);
+ int ret;
+
+ ret = bdev_validate_blocksize(bdev, size);
+ if (ret)
+ return ret;
+
if (!file->private_data)
return -EINVAL;
/* Don't change the size if it is same as current */
if (inode->i_blkbits != blksize_bits(size)) {
+ /*
+ * Flush and truncate the pagecache before we reconfigure the
+ * mapping geometry because folio sizes are variable now. If a
+ * reader has already allocated a folio whose size is smaller
+ * than the new min_order but invokes readahead after the new
+ * min_order becomes visible, readahead will think there are
+ * "zero" blocks per folio and crash. Take the inode and
+ * invalidation locks to avoid racing with
+ * read/write/fallocate.
+ */
+ inode_lock(inode);
+ filemap_invalidate_lock(inode->i_mapping);
+
sync_blockdev(bdev);
+ kill_bdev(bdev);
+
inode->i_blkbits = blksize_bits(size);
mapping_set_folio_min_order(inode->i_mapping, get_order(size));
kill_bdev(bdev);
+ filemap_invalidate_unlock(inode->i_mapping);
+ inode_unlock(inode);
}
return 0;
}
@@ -777,13 +815,13 @@ static void blkdev_put_part(struct block_device *part)
blkdev_put_whole(whole);
}
-struct block_device *blkdev_get_no_open(dev_t dev)
+struct block_device *blkdev_get_no_open(dev_t dev, bool autoload)
{
struct block_device *bdev;
struct inode *inode;
inode = ilookup(blockdev_superblock, dev);
- if (!inode && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
+ if (!inode && autoload && IS_ENABLED(CONFIG_BLOCK_LEGACY_AUTOLOAD)) {
blk_request_module(dev);
inode = ilookup(blockdev_superblock, dev);
if (inode)
@@ -1005,7 +1043,7 @@ struct file *bdev_file_open_by_dev(dev_t dev, blk_mode_t mode, void *holder,
if (ret)
return ERR_PTR(ret);
- bdev = blkdev_get_no_open(dev);
+ bdev = blkdev_get_no_open(dev, true);
if (!bdev)
return ERR_PTR(-ENXIO);
@@ -1272,21 +1310,17 @@ void sync_bdevs(bool wait)
/*
* Handle STATX_{DIOALIGN, WRITE_ATOMIC} for block devices.
*/
-void bdev_statx(struct path *path, struct kstat *stat,
- u32 request_mask)
+void bdev_statx(const struct path *path, struct kstat *stat, u32 request_mask)
{
- struct inode *backing_inode;
struct block_device *bdev;
- backing_inode = d_backing_inode(path->dentry);
-
/*
- * Note that backing_inode is the inode of a block device node file,
- * not the block device's internal inode. Therefore it is *not* valid
- * to use I_BDEV() here; the block device has to be looked up by i_rdev
+ * Note that d_backing_inode() returns the block device node inode, not
+ * the block device's internal inode. Therefore it is *not* valid to
+ * use I_BDEV() here; the block device has to be looked up by i_rdev
* instead.
*/
- bdev = blkdev_get_no_open(backing_inode->i_rdev);
+ bdev = blkdev_get_no_open(d_backing_inode(path->dentry)->i_rdev, false);
if (!bdev)
return;
diff --git a/block/bio-integrity.c b/block/bio-integrity.c
index 608594a154a5..43ef6bd06c85 100644
--- a/block/bio-integrity.c
+++ b/block/bio-integrity.c
@@ -66,16 +66,12 @@ struct bio_integrity_payload *bio_integrity_alloc(struct bio *bio,
}
EXPORT_SYMBOL(bio_integrity_alloc);
-static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs,
- bool dirty)
+static void bio_integrity_unpin_bvec(struct bio_vec *bv, int nr_vecs)
{
int i;
- for (i = 0; i < nr_vecs; i++) {
- if (dirty && !PageCompound(bv[i].bv_page))
- set_page_dirty_lock(bv[i].bv_page);
+ for (i = 0; i < nr_vecs; i++)
unpin_user_page(bv[i].bv_page);
- }
}
static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
@@ -91,7 +87,7 @@ static void bio_integrity_uncopy_user(struct bio_integrity_payload *bip)
ret = copy_to_iter(bvec_virt(bounce_bvec), bytes, &orig_iter);
WARN_ON_ONCE(ret != bytes);
- bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs, true);
+ bio_integrity_unpin_bvec(orig_bvecs, orig_nr_vecs);
}
/**
@@ -111,8 +107,7 @@ void bio_integrity_unmap_user(struct bio *bio)
return;
}
- bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt,
- bio_data_dir(bio) == READ);
+ bio_integrity_unpin_bvec(bip->bip_vec, bip->bip_max_vcnt);
}
/**
@@ -198,7 +193,7 @@ static int bio_integrity_copy_user(struct bio *bio, struct bio_vec *bvec,
}
if (write)
- bio_integrity_unpin_bvec(bvec, nr_vecs, false);
+ bio_integrity_unpin_bvec(bvec, nr_vecs);
else
memcpy(&bip->bip_vec[1], bvec, nr_vecs * sizeof(*bvec));
@@ -319,7 +314,7 @@ int bio_integrity_map_user(struct bio *bio, struct iov_iter *iter)
return 0;
release_pages:
- bio_integrity_unpin_bvec(bvec, nr_bvecs, false);
+ bio_integrity_unpin_bvec(bvec, nr_bvecs);
free_bvec:
if (bvec != stack_vec)
kfree(bvec);
diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c
index 5905f277057b..ce93706555c5 100644
--- a/block/blk-cgroup.c
+++ b/block/blk-cgroup.c
@@ -797,7 +797,7 @@ int blkg_conf_open_bdev(struct blkg_conf_ctx *ctx)
return -EINVAL;
input = skip_spaces(input);
- bdev = blkdev_get_no_open(MKDEV(major, minor));
+ bdev = blkdev_get_no_open(MKDEV(major, minor), false);
if (!bdev)
return -ENODEV;
if (bdev_is_partition(bdev)) {
diff --git a/block/blk-settings.c b/block/blk-settings.c
index 6b2dbe645d23..4817e7ca03f8 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -61,8 +61,14 @@ void blk_apply_bdi_limits(struct backing_dev_info *bdi,
/*
* For read-ahead of large files to be effective, we need to read ahead
* at least twice the optimal I/O size.
+ *
+ * There is no hardware limitation for the read-ahead size and the user
+ * might have increased the read-ahead size through sysfs, so don't ever
+ * decrease it.
*/
- bdi->ra_pages = max(lim->io_opt * 2 / PAGE_SIZE, VM_READAHEAD_PAGES);
+ bdi->ra_pages = max3(bdi->ra_pages,
+ lim->io_opt * 2 / PAGE_SIZE,
+ VM_READAHEAD_PAGES);
bdi->io_pages = lim->max_sectors >> PAGE_SECTORS_SHIFT;
}
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index a2882751f0d2..1f9b45b0b9ee 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -909,6 +909,8 @@ out_unregister_ia_ranges:
out_debugfs_remove:
blk_debugfs_remove(disk);
mutex_unlock(&q->sysfs_lock);
+ if (queue_is_mq(q))
+ blk_mq_sysfs_unregister(disk);
out_put_queue_kobj:
kobject_put(&disk->queue_kobj);
return ret;
diff --git a/block/blk-throttle.h b/block/blk-throttle.h
index 7964cc041e06..f9f8666891ab 100644
--- a/block/blk-throttle.h
+++ b/block/blk-throttle.h
@@ -1,3 +1,4 @@
+/* SPDX-License-Identifier: GPL-2.0 */
#ifndef BLK_THROTTLE_H
#define BLK_THROTTLE_H
diff --git a/block/blk-zoned.c b/block/blk-zoned.c
index 0c77244a35c9..8f15d1aa6eb8 100644
--- a/block/blk-zoned.c
+++ b/block/blk-zoned.c
@@ -343,6 +343,7 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
op = REQ_OP_ZONE_RESET;
/* Invalidate the page cache, including dirty pages. */
+ inode_lock(bdev->bd_mapping->host);
filemap_invalidate_lock(bdev->bd_mapping);
ret = blkdev_truncate_zone_range(bdev, mode, &zrange);
if (ret)
@@ -364,8 +365,10 @@ int blkdev_zone_mgmt_ioctl(struct block_device *bdev, blk_mode_t mode,
ret = blkdev_zone_mgmt(bdev, op, zrange.sector, zrange.nr_sectors);
fail:
- if (cmd == BLKRESETZONE)
+ if (cmd == BLKRESETZONE) {
filemap_invalidate_unlock(bdev->bd_mapping);
+ inode_unlock(bdev->bd_mapping->host);
+ }
return ret;
}
diff --git a/block/blk.h b/block/blk.h
index 006e3be433d2..328075787814 100644
--- a/block/blk.h
+++ b/block/blk.h
@@ -94,6 +94,9 @@ static inline void blk_wait_io(struct completion *done)
wait_for_completion_io(done);
}
+struct block_device *blkdev_get_no_open(dev_t dev, bool autoload);
+void blkdev_put_no_open(struct block_device *bdev);
+
#define BIO_INLINE_VECS 4
struct bio_vec *bvec_alloc(mempool_t *pool, unsigned short *nr_vecs,
gfp_t gfp_mask);
diff --git a/block/fops.c b/block/fops.c
index be9f1dbea9ce..82b672d15ea4 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -642,7 +642,7 @@ static int blkdev_open(struct inode *inode, struct file *filp)
if (ret)
return ret;
- bdev = blkdev_get_no_open(inode->i_rdev);
+ bdev = blkdev_get_no_open(inode->i_rdev, true);
if (!bdev)
return -ENXIO;
@@ -746,7 +746,14 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
ret = direct_write_fallback(iocb, from, ret,
blkdev_buffered_write(iocb, from));
} else {
+ /*
+ * Take i_rwsem and invalidate_lock to avoid racing with
+ * set_blocksize changing i_blkbits/folio order and punching
+ * out the pagecache.
+ */
+ inode_lock_shared(bd_inode);
ret = blkdev_buffered_write(iocb, from);
+ inode_unlock_shared(bd_inode);
}
if (ret > 0)
@@ -757,6 +764,7 @@ static ssize_t blkdev_write_iter(struct kiocb *iocb, struct iov_iter *from)
static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
{
+ struct inode *bd_inode = bdev_file_inode(iocb->ki_filp);
struct block_device *bdev = I_BDEV(iocb->ki_filp->f_mapping->host);
loff_t size = bdev_nr_bytes(bdev);
loff_t pos = iocb->ki_pos;
@@ -793,7 +801,13 @@ static ssize_t blkdev_read_iter(struct kiocb *iocb, struct iov_iter *to)
goto reexpand;
}
+ /*
+ * Take i_rwsem and invalidate_lock to avoid racing with set_blocksize
+ * changing i_blkbits/folio order and punching out the pagecache.
+ */
+ inode_lock_shared(bd_inode);
ret = filemap_read(iocb, to, ret);
+ inode_unlock_shared(bd_inode);
reexpand:
if (unlikely(shorted))
@@ -836,6 +850,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
if ((start | len) & (bdev_logical_block_size(bdev) - 1))
return -EINVAL;
+ inode_lock(inode);
filemap_invalidate_lock(inode->i_mapping);
/*
@@ -868,6 +883,7 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
fail:
filemap_invalidate_unlock(inode->i_mapping);
+ inode_unlock(inode);
return error;
}
diff --git a/block/ioctl.c b/block/ioctl.c
index faa40f383e27..e472cc1030c6 100644
--- a/block/ioctl.c
+++ b/block/ioctl.c
@@ -142,6 +142,7 @@ static int blk_ioctl_discard(struct block_device *bdev, blk_mode_t mode,
if (err)
return err;
+ inode_lock(bdev->bd_mapping->host);
filemap_invalidate_lock(bdev->bd_mapping);
err = truncate_bdev_range(bdev, mode, start, start + len - 1);
if (err)
@@ -174,6 +175,7 @@ out_unplug:
blk_finish_plug(&plug);
fail:
filemap_invalidate_unlock(bdev->bd_mapping);
+ inode_unlock(bdev->bd_mapping->host);
return err;
}
@@ -199,12 +201,14 @@ static int blk_ioctl_secure_erase(struct block_device *bdev, blk_mode_t mode,
end > bdev_nr_bytes(bdev))
return -EINVAL;
+ inode_lock(bdev->bd_mapping->host);
filemap_invalidate_lock(bdev->bd_mapping);
err = truncate_bdev_range(bdev, mode, start, end - 1);
if (!err)
err = blkdev_issue_secure_erase(bdev, start >> 9, len >> 9,
GFP_KERNEL);
filemap_invalidate_unlock(bdev->bd_mapping);
+ inode_unlock(bdev->bd_mapping->host);
return err;
}
@@ -236,6 +240,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
return -EINVAL;
/* Invalidate the page cache, including dirty pages */
+ inode_lock(bdev->bd_mapping->host);
filemap_invalidate_lock(bdev->bd_mapping);
err = truncate_bdev_range(bdev, mode, start, end);
if (err)
@@ -246,6 +251,7 @@ static int blk_ioctl_zeroout(struct block_device *bdev, blk_mode_t mode,
fail:
filemap_invalidate_unlock(bdev->bd_mapping);
+ inode_unlock(bdev->bd_mapping->host);
return err;
}