summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/ABI/stable/sysfs-block33
-rw-r--r--block/blk-settings.c20
-rw-r--r--block/blk-sysfs.c26
-rw-r--r--block/fops.c44
-rw-r--r--drivers/md/dm-table.c4
-rw-r--r--drivers/nvme/host/core.c20
-rw-r--r--drivers/nvme/target/io-cmd-bdev.c4
-rw-r--r--drivers/scsi/sd.c5
-rw-r--r--fs/ext4/extents.c66
-rw-r--r--fs/open.c1
-rw-r--r--include/linux/blkdev.h10
-rw-r--r--include/linux/falloc.h3
-rw-r--r--include/trace/events/ext4.h3
-rw-r--r--include/uapi/linux/falloc.h17
14 files changed, 212 insertions, 44 deletions
diff --git a/Documentation/ABI/stable/sysfs-block b/Documentation/ABI/stable/sysfs-block
index 4ba771b56b3b..803f578dc023 100644
--- a/Documentation/ABI/stable/sysfs-block
+++ b/Documentation/ABI/stable/sysfs-block
@@ -778,6 +778,39 @@ Description:
0, write zeroes is not supported by the device.
+What: /sys/block/<disk>/queue/write_zeroes_unmap_max_hw_bytes
+Date: January 2025
+Contact: Zhang Yi <yi.zhang@huawei.com>
+Description:
+ [RO] This file indicates whether a device supports zeroing data
+ in a specified block range without incurring the cost of
+ physically writing zeroes to the media for each individual
+ block. If this parameter is set to write_zeroes_max_bytes, the
+ device implements a zeroing operation which opportunistically
+ avoids writing zeroes to media while still guaranteeing that
+ subsequent reads from the specified block range will return
+ zeroed data. This operation is a best-effort optimization, a
+ device may fall back to physically writing zeroes to the media
+ due to other factors such as misalignment or being asked to
+ clear a block range smaller than the device's internal
+ allocation unit. If this parameter is set to 0, the device may
+ have to write each logical block media during a zeroing
+ operation.
+
+
+What: /sys/block/<disk>/queue/write_zeroes_unmap_max_bytes
+Date: January 2025
+Contact: Zhang Yi <yi.zhang@huawei.com>
+Description:
+ [RW] While write_zeroes_unmap_max_hw_bytes is the hardware limit
+ for the device, this setting is the software limit. Since the
+ unmap write zeroes operation is a best-effort optimization, some
+ devices may still physically writing zeroes to media. So the
+ speed of this operation is not guaranteed. Writing a value of
+ '0' to this file disables this operation. Otherwise, this
+ parameter should be equal to write_zeroes_unmap_max_hw_bytes.
+
+
What: /sys/block/<disk>/queue/zone_append_max_bytes
Date: May 2020
Contact: linux-block@vger.kernel.org
diff --git a/block/blk-settings.c b/block/blk-settings.c
index a000daafbfb4..b5c75f0ac3e9 100644
--- a/block/blk-settings.c
+++ b/block/blk-settings.c
@@ -50,6 +50,8 @@ void blk_set_stacking_limits(struct queue_limits *lim)
lim->max_sectors = UINT_MAX;
lim->max_dev_sectors = UINT_MAX;
lim->max_write_zeroes_sectors = UINT_MAX;
+ lim->max_hw_wzeroes_unmap_sectors = UINT_MAX;
+ lim->max_user_wzeroes_unmap_sectors = UINT_MAX;
lim->max_hw_zone_append_sectors = UINT_MAX;
lim->max_user_discard_sectors = UINT_MAX;
}
@@ -333,6 +335,12 @@ int blk_validate_limits(struct queue_limits *lim)
if (!lim->max_segments)
lim->max_segments = BLK_MAX_SEGMENTS;
+ if (lim->max_hw_wzeroes_unmap_sectors &&
+ lim->max_hw_wzeroes_unmap_sectors != lim->max_write_zeroes_sectors)
+ return -EINVAL;
+ lim->max_wzeroes_unmap_sectors = min(lim->max_hw_wzeroes_unmap_sectors,
+ lim->max_user_wzeroes_unmap_sectors);
+
lim->max_discard_sectors =
min(lim->max_hw_discard_sectors, lim->max_user_discard_sectors);
@@ -418,10 +426,11 @@ int blk_set_default_limits(struct queue_limits *lim)
{
/*
* Most defaults are set by capping the bounds in blk_validate_limits,
- * but max_user_discard_sectors is special and needs an explicit
- * initialization to the max value here.
+ * but these limits are special and need an explicit initialization to
+ * the max value here.
*/
lim->max_user_discard_sectors = UINT_MAX;
+ lim->max_user_wzeroes_unmap_sectors = UINT_MAX;
return blk_validate_limits(lim);
}
@@ -708,6 +717,13 @@ int blk_stack_limits(struct queue_limits *t, struct queue_limits *b,
t->max_dev_sectors = min_not_zero(t->max_dev_sectors, b->max_dev_sectors);
t->max_write_zeroes_sectors = min(t->max_write_zeroes_sectors,
b->max_write_zeroes_sectors);
+ t->max_user_wzeroes_unmap_sectors =
+ min(t->max_user_wzeroes_unmap_sectors,
+ b->max_user_wzeroes_unmap_sectors);
+ t->max_hw_wzeroes_unmap_sectors =
+ min(t->max_hw_wzeroes_unmap_sectors,
+ b->max_hw_wzeroes_unmap_sectors);
+
t->max_hw_zone_append_sectors = min(t->max_hw_zone_append_sectors,
b->max_hw_zone_append_sectors);
diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c
index b2b9b89d6967..48c7ecbb531f 100644
--- a/block/blk-sysfs.c
+++ b/block/blk-sysfs.c
@@ -161,6 +161,8 @@ static ssize_t queue_##_field##_show(struct gendisk *disk, char *page) \
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_discard_sectors)
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_discard_sectors)
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_write_zeroes_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_hw_wzeroes_unmap_sectors)
+QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_wzeroes_unmap_sectors)
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_max_sectors)
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(atomic_write_boundary_sectors)
QUEUE_SYSFS_LIMIT_SHOW_SECTORS_TO_BYTES(max_zone_append_sectors)
@@ -205,6 +207,24 @@ static int queue_max_discard_sectors_store(struct gendisk *disk,
return 0;
}
+static int queue_max_wzeroes_unmap_sectors_store(struct gendisk *disk,
+ const char *page, size_t count, struct queue_limits *lim)
+{
+ unsigned long max_zeroes_bytes, max_hw_zeroes_bytes;
+ ssize_t ret;
+
+ ret = queue_var_store(&max_zeroes_bytes, page, count);
+ if (ret < 0)
+ return ret;
+
+ max_hw_zeroes_bytes = lim->max_hw_wzeroes_unmap_sectors << SECTOR_SHIFT;
+ if (max_zeroes_bytes != 0 && max_zeroes_bytes != max_hw_zeroes_bytes)
+ return -EINVAL;
+
+ lim->max_user_wzeroes_unmap_sectors = max_zeroes_bytes >> SECTOR_SHIFT;
+ return 0;
+}
+
static int
queue_max_sectors_store(struct gendisk *disk, const char *page, size_t count,
struct queue_limits *lim)
@@ -514,6 +534,10 @@ QUEUE_LIM_RO_ENTRY(queue_atomic_write_unit_min, "atomic_write_unit_min_bytes");
QUEUE_RO_ENTRY(queue_write_same_max, "write_same_max_bytes");
QUEUE_LIM_RO_ENTRY(queue_max_write_zeroes_sectors, "write_zeroes_max_bytes");
+QUEUE_LIM_RO_ENTRY(queue_max_hw_wzeroes_unmap_sectors,
+ "write_zeroes_unmap_max_hw_bytes");
+QUEUE_LIM_RW_ENTRY(queue_max_wzeroes_unmap_sectors,
+ "write_zeroes_unmap_max_bytes");
QUEUE_LIM_RO_ENTRY(queue_max_zone_append_sectors, "zone_append_max_bytes");
QUEUE_LIM_RO_ENTRY(queue_zone_write_granularity, "zone_write_granularity");
@@ -662,6 +686,8 @@ static struct attribute *queue_attrs[] = {
&queue_atomic_write_unit_min_entry.attr,
&queue_atomic_write_unit_max_entry.attr,
&queue_max_write_zeroes_sectors_entry.attr,
+ &queue_max_hw_wzeroes_unmap_sectors_entry.attr,
+ &queue_max_wzeroes_unmap_sectors_entry.attr,
&queue_max_zone_append_sectors_entry.attr,
&queue_zone_write_granularity_entry.attr,
&queue_rotational_entry.attr,
diff --git a/block/fops.c b/block/fops.c
index 1309861d4c2c..474656a0de70 100644
--- a/block/fops.c
+++ b/block/fops.c
@@ -841,7 +841,7 @@ reexpand:
#define BLKDEV_FALLOC_FL_SUPPORTED \
(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE | \
- FALLOC_FL_ZERO_RANGE)
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_WRITE_ZEROES)
static long blkdev_fallocate(struct file *file, int mode, loff_t start,
loff_t len)
@@ -850,11 +850,19 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
struct block_device *bdev = I_BDEV(inode);
loff_t end = start + len - 1;
loff_t isize;
+ unsigned int flags;
int error;
/* Fail if we don't recognize the flags. */
if (mode & ~BLKDEV_FALLOC_FL_SUPPORTED)
return -EOPNOTSUPP;
+ /*
+ * Don't allow writing zeroes if the device does not enable the
+ * unmap write zeroes operation.
+ */
+ if ((mode & FALLOC_FL_WRITE_ZEROES) &&
+ !bdev_write_zeroes_unmap_sectors(bdev))
+ return -EOPNOTSUPP;
/* Don't go off the end of the device. */
isize = bdev_nr_bytes(bdev);
@@ -877,34 +885,32 @@ static long blkdev_fallocate(struct file *file, int mode, loff_t start,
inode_lock(inode);
filemap_invalidate_lock(inode->i_mapping);
- /*
- * Invalidate the page cache, including dirty pages, for valid
- * de-allocate mode calls to fallocate().
- */
switch (mode) {
case FALLOC_FL_ZERO_RANGE:
case FALLOC_FL_ZERO_RANGE | FALLOC_FL_KEEP_SIZE:
- error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
- if (error)
- goto fail;
-
- error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
- len >> SECTOR_SHIFT, GFP_KERNEL,
- BLKDEV_ZERO_NOUNMAP);
+ flags = BLKDEV_ZERO_NOUNMAP;
break;
case FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE:
- error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
- if (error)
- goto fail;
-
- error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
- len >> SECTOR_SHIFT, GFP_KERNEL,
- BLKDEV_ZERO_NOFALLBACK);
+ flags = BLKDEV_ZERO_NOFALLBACK;
+ break;
+ case FALLOC_FL_WRITE_ZEROES:
+ flags = 0;
break;
default:
error = -EOPNOTSUPP;
+ goto fail;
}
+ /*
+ * Invalidate the page cache, including dirty pages, for valid
+ * de-allocate mode calls to fallocate().
+ */
+ error = truncate_bdev_range(bdev, file_to_blk_mode(file), start, end);
+ if (error)
+ goto fail;
+
+ error = blkdev_issue_zeroout(bdev, start >> SECTOR_SHIFT,
+ len >> SECTOR_SHIFT, GFP_KERNEL, flags);
fail:
filemap_invalidate_unlock(inode->i_mapping);
inode_unlock(inode);
diff --git a/drivers/md/dm-table.c b/drivers/md/dm-table.c
index 24a857ff6d0b..d9d5e6aa5707 100644
--- a/drivers/md/dm-table.c
+++ b/drivers/md/dm-table.c
@@ -2065,8 +2065,10 @@ int dm_table_set_restrictions(struct dm_table *t, struct request_queue *q,
limits->discard_alignment = 0;
}
- if (!dm_table_supports_write_zeroes(t))
+ if (!dm_table_supports_write_zeroes(t)) {
limits->max_write_zeroes_sectors = 0;
+ limits->max_hw_wzeroes_unmap_sectors = 0;
+ }
if (!dm_table_supports_secure_erase(t))
limits->max_secure_erase_sectors = 0;
diff --git a/drivers/nvme/host/core.c b/drivers/nvme/host/core.c
index 92697f98c601..90af4a15b696 100644
--- a/drivers/nvme/host/core.c
+++ b/drivers/nvme/host/core.c
@@ -2420,22 +2420,24 @@ static int nvme_update_ns_info_block(struct nvme_ns *ns,
else
lim.write_stream_granularity = 0;
- ret = queue_limits_commit_update(ns->disk->queue, &lim);
- if (ret) {
- blk_mq_unfreeze_queue(ns->disk->queue, memflags);
- goto out;
- }
-
- set_capacity_and_notify(ns->disk, capacity);
-
/*
* Only set the DEAC bit if the device guarantees that reads from
* deallocated data return zeroes. While the DEAC bit does not
* require that, it must be a no-op if reads from deallocated data
* do not return zeroes.
*/
- if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3)))
+ if ((id->dlfeat & 0x7) == 0x1 && (id->dlfeat & (1 << 3))) {
ns->head->features |= NVME_NS_DEAC;
+ lim.max_hw_wzeroes_unmap_sectors = lim.max_write_zeroes_sectors;
+ }
+
+ ret = queue_limits_commit_update(ns->disk->queue, &lim);
+ if (ret) {
+ blk_mq_unfreeze_queue(ns->disk->queue, memflags);
+ goto out;
+ }
+
+ set_capacity_and_notify(ns->disk, capacity);
set_disk_ro(ns->disk, nvme_ns_is_readonly(ns, info));
set_bit(NVME_NS_READY, &ns->flags);
blk_mq_unfreeze_queue(ns->disk->queue, memflags);
diff --git a/drivers/nvme/target/io-cmd-bdev.c b/drivers/nvme/target/io-cmd-bdev.c
index eba42df2f821..31d6c4a63fa5 100644
--- a/drivers/nvme/target/io-cmd-bdev.c
+++ b/drivers/nvme/target/io-cmd-bdev.c
@@ -46,6 +46,10 @@ void nvmet_bdev_set_limits(struct block_device *bdev, struct nvme_id_ns *id)
id->npda = id->npdg;
/* NOWS = Namespace Optimal Write Size */
id->nows = to0based(bdev_io_opt(bdev) / bdev_logical_block_size(bdev));
+
+ /* Set WZDS and DRB if device supports unmapped write zeroes */
+ if (bdev_write_zeroes_unmap_sectors(bdev))
+ id->dlfeat = (1 << 3) | 0x1;
}
void nvmet_bdev_ns_disable(struct nvmet_ns *ns)
diff --git a/drivers/scsi/sd.c b/drivers/scsi/sd.c
index 3f6e87705b62..877dc34e8f37 100644
--- a/drivers/scsi/sd.c
+++ b/drivers/scsi/sd.c
@@ -1141,6 +1141,11 @@ static void sd_config_write_same(struct scsi_disk *sdkp,
out:
lim->max_write_zeroes_sectors =
sdkp->max_ws_blocks * (logical_block_size >> SECTOR_SHIFT);
+
+ if (sdkp->zeroing_mode == SD_ZERO_WS16_UNMAP ||
+ sdkp->zeroing_mode == SD_ZERO_WS10_UNMAP)
+ lim->max_hw_wzeroes_unmap_sectors =
+ lim->max_write_zeroes_sectors;
}
static blk_status_t sd_setup_flush_cmnd(struct scsi_cmnd *cmd)
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index b543a46fc809..b43aa82c1b39 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -4501,6 +4501,8 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
struct ext4_map_blocks map;
unsigned int credits;
loff_t epos, old_size = i_size_read(inode);
+ unsigned int blkbits = inode->i_blkbits;
+ bool alloc_zero = false;
BUG_ON(!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS));
map.m_lblk = offset;
@@ -4514,6 +4516,17 @@ static int ext4_alloc_file_blocks(struct file *file, ext4_lblk_t offset,
flags |= EXT4_GET_BLOCKS_NO_NORMALIZE;
/*
+ * Do the actual write zero during a running journal transaction
+ * costs a lot. First allocate an unwritten extent and then
+ * convert it to written after zeroing it out.
+ */
+ if (flags & EXT4_GET_BLOCKS_ZERO) {
+ flags &= ~EXT4_GET_BLOCKS_ZERO;
+ flags |= EXT4_GET_BLOCKS_UNWRIT_EXT;
+ alloc_zero = true;
+ }
+
+ /*
* credits to insert 1 extent into extent tree
*/
credits = ext4_chunk_trans_blocks(inode, len);
@@ -4549,9 +4562,7 @@ retry:
* allow a full retry cycle for any remaining allocations
*/
retries = 0;
- map.m_lblk += ret;
- map.m_len = len = len - ret;
- epos = (loff_t)map.m_lblk << inode->i_blkbits;
+ epos = (loff_t)(map.m_lblk + ret) << blkbits;
inode_set_ctime_current(inode);
if (new_size) {
if (epos > new_size)
@@ -4571,6 +4582,21 @@ retry:
ret2 = ret3 ? ret3 : ret2;
if (unlikely(ret2))
break;
+
+ if (alloc_zero &&
+ (map.m_flags & (EXT4_MAP_MAPPED | EXT4_MAP_UNWRITTEN))) {
+ ret2 = ext4_issue_zeroout(inode, map.m_lblk, map.m_pblk,
+ map.m_len);
+ if (likely(!ret2))
+ ret2 = ext4_convert_unwritten_extents(NULL,
+ inode, (loff_t)map.m_lblk << blkbits,
+ (loff_t)map.m_len << blkbits);
+ if (ret2)
+ break;
+ }
+
+ map.m_lblk += ret;
+ map.m_len = len = len - ret;
}
if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries))
goto retry;
@@ -4636,7 +4662,11 @@ static long ext4_zero_range(struct file *file, loff_t offset,
if (end_lblk > start_lblk) {
ext4_lblk_t zero_blks = end_lblk - start_lblk;
- flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN | EXT4_EX_NOCACHE);
+ if (mode & FALLOC_FL_WRITE_ZEROES)
+ flags = EXT4_GET_BLOCKS_CREATE_ZERO | EXT4_EX_NOCACHE;
+ else
+ flags |= (EXT4_GET_BLOCKS_CONVERT_UNWRITTEN |
+ EXT4_EX_NOCACHE);
ret = ext4_alloc_file_blocks(file, start_lblk, zero_blks,
new_size, flags);
if (ret)
@@ -4745,11 +4775,18 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (IS_ENCRYPTED(inode) &&
(mode & (FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_INSERT_RANGE)))
return -EOPNOTSUPP;
+ /*
+ * Don't allow writing zeroes if the underlying device does not
+ * enable the unmap write zeroes operation.
+ */
+ if ((mode & FALLOC_FL_WRITE_ZEROES) &&
+ !bdev_write_zeroes_unmap_sectors(inode->i_sb->s_bdev))
+ return -EOPNOTSUPP;
/* Return error if mode is not supported */
if (mode & ~(FALLOC_FL_KEEP_SIZE | FALLOC_FL_PUNCH_HOLE |
- FALLOC_FL_COLLAPSE_RANGE | FALLOC_FL_ZERO_RANGE |
- FALLOC_FL_INSERT_RANGE))
+ FALLOC_FL_ZERO_RANGE | FALLOC_FL_COLLAPSE_RANGE |
+ FALLOC_FL_INSERT_RANGE | FALLOC_FL_WRITE_ZEROES))
return -EOPNOTSUPP;
inode_lock(inode);
@@ -4780,16 +4817,23 @@ long ext4_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
if (ret)
goto out_invalidate_lock;
- if (mode & FALLOC_FL_PUNCH_HOLE)
+ switch (mode & FALLOC_FL_MODE_MASK) {
+ case FALLOC_FL_PUNCH_HOLE:
ret = ext4_punch_hole(file, offset, len);
- else if (mode & FALLOC_FL_COLLAPSE_RANGE)
+ break;
+ case FALLOC_FL_COLLAPSE_RANGE:
ret = ext4_collapse_range(file, offset, len);
- else if (mode & FALLOC_FL_INSERT_RANGE)
+ break;
+ case FALLOC_FL_INSERT_RANGE:
ret = ext4_insert_range(file, offset, len);
- else if (mode & FALLOC_FL_ZERO_RANGE)
+ break;
+ case FALLOC_FL_ZERO_RANGE:
+ case FALLOC_FL_WRITE_ZEROES:
ret = ext4_zero_range(file, offset, len, mode);
- else
+ break;
+ default:
ret = -EOPNOTSUPP;
+ }
out_invalidate_lock:
filemap_invalidate_unlock(mapping);
diff --git a/fs/open.c b/fs/open.c
index 7828234a7caa..b777e11e5522 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -281,6 +281,7 @@ int vfs_fallocate(struct file *file, int mode, loff_t offset, loff_t len)
break;
case FALLOC_FL_COLLAPSE_RANGE:
case FALLOC_FL_INSERT_RANGE:
+ case FALLOC_FL_WRITE_ZEROES:
if (mode & FALLOC_FL_KEEP_SIZE)
return -EOPNOTSUPP;
break;
diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h
index a59880c809c7..1a5725c1f93d 100644
--- a/include/linux/blkdev.h
+++ b/include/linux/blkdev.h
@@ -383,6 +383,9 @@ struct queue_limits {
unsigned int max_user_discard_sectors;
unsigned int max_secure_erase_sectors;
unsigned int max_write_zeroes_sectors;
+ unsigned int max_wzeroes_unmap_sectors;
+ unsigned int max_hw_wzeroes_unmap_sectors;
+ unsigned int max_user_wzeroes_unmap_sectors;
unsigned int max_hw_zone_append_sectors;
unsigned int max_zone_append_sectors;
unsigned int discard_granularity;
@@ -1042,6 +1045,7 @@ static inline void blk_queue_disable_secure_erase(struct request_queue *q)
static inline void blk_queue_disable_write_zeroes(struct request_queue *q)
{
q->limits.max_write_zeroes_sectors = 0;
+ q->limits.max_wzeroes_unmap_sectors = 0;
}
/*
@@ -1378,6 +1382,12 @@ static inline unsigned int bdev_write_zeroes_sectors(struct block_device *bdev)
return bdev_limits(bdev)->max_write_zeroes_sectors;
}
+static inline unsigned int
+bdev_write_zeroes_unmap_sectors(struct block_device *bdev)
+{
+ return bdev_limits(bdev)->max_wzeroes_unmap_sectors;
+}
+
static inline bool bdev_nonrot(struct block_device *bdev)
{
return blk_queue_nonrot(bdev_get_queue(bdev));
diff --git a/include/linux/falloc.h b/include/linux/falloc.h
index 3f49f3df6af5..7c38c6b76b60 100644
--- a/include/linux/falloc.h
+++ b/include/linux/falloc.h
@@ -36,7 +36,8 @@ struct space_resv {
FALLOC_FL_COLLAPSE_RANGE | \
FALLOC_FL_ZERO_RANGE | \
FALLOC_FL_INSERT_RANGE | \
- FALLOC_FL_UNSHARE_RANGE)
+ FALLOC_FL_UNSHARE_RANGE | \
+ FALLOC_FL_WRITE_ZEROES)
/* on ia32 l_start is on a 32-bit boundary */
#if defined(CONFIG_X86_64)
diff --git a/include/trace/events/ext4.h b/include/trace/events/ext4.h
index 156908641e68..6f9cf2811733 100644
--- a/include/trace/events/ext4.h
+++ b/include/trace/events/ext4.h
@@ -92,7 +92,8 @@ TRACE_DEFINE_ENUM(ES_REFERENCED_B);
{ FALLOC_FL_KEEP_SIZE, "KEEP_SIZE"}, \
{ FALLOC_FL_PUNCH_HOLE, "PUNCH_HOLE"}, \
{ FALLOC_FL_COLLAPSE_RANGE, "COLLAPSE_RANGE"}, \
- { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"})
+ { FALLOC_FL_ZERO_RANGE, "ZERO_RANGE"}, \
+ { FALLOC_FL_WRITE_ZEROES, "WRITE_ZEROES"})
TRACE_DEFINE_ENUM(EXT4_FC_REASON_XATTR);
TRACE_DEFINE_ENUM(EXT4_FC_REASON_CROSS_RENAME);
diff --git a/include/uapi/linux/falloc.h b/include/uapi/linux/falloc.h
index 5810371ed72b..1f9ca757d02d 100644
--- a/include/uapi/linux/falloc.h
+++ b/include/uapi/linux/falloc.h
@@ -78,4 +78,21 @@
*/
#define FALLOC_FL_UNSHARE_RANGE 0x40
+/*
+ * FALLOC_FL_WRITE_ZEROES zeroes a specified file range in such a way that
+ * subsequent writes to that range do not require further changes to the file
+ * mapping metadata. This flag is beneficial for subsequent pure overwriting
+ * within this range, as it can save on block allocation and, consequently,
+ * significant metadata changes. Therefore, filesystems that always require
+ * out-of-place writes should not support this flag.
+ *
+ * Different filesystems may implement different limitations on the
+ * granularity of the zeroing operation. Most will preferably be accelerated
+ * by submitting write zeroes command if the backing storage supports, which
+ * may not physically write zeros to the media.
+ *
+ * This flag cannot be specified in conjunction with the FALLOC_FL_KEEP_SIZE.
+ */
+#define FALLOC_FL_WRITE_ZEROES 0x80
+
#endif /* _UAPI_FALLOC_H_ */