summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorKonstantin Komarov <almaz.alexandrovich@paragon-software.com>2026-02-16 17:10:26 +0100
committerKonstantin Komarov <almaz.alexandrovich@paragon-software.com>2026-02-16 17:23:51 +0100
commit10d7c95af043b45a85dc738c3271bf760ff3577e (patch)
tree2c10058d39457c5a01ef3c1c2532212ade3eed9b
parentc5226b96c08a010ebef5fdf4c90572bcd89e4299 (diff)
fs/ntfs3: add delayed-allocation (delalloc) support
This patch implements delayed allocation (delalloc) in ntfs3 driver. It introduces an in-memory delayed-runlist (run_da) and the helpers to track, reserve and later convert those delayed reservations into real clusters at writeback time. The change keeps on-disk formats untouched and focuses on pagecache integration, correctness and safe interaction with fallocate, truncate, and dio/iomap paths. Key points: - add run_da (delay-allocated run tree) and bookkeeping for delayed clusters. - mark ranges as delalloc (DELALLOC_LCN) instead of immediately allocating. Actual allocation performed later (writeback / attr_set_size_ex / explicit flush paths). - direct i/o / iomap paths updated to avoid dio collisions with delalloc: dio falls back or forces allocation of delayed blocks before proceeding. - punch/collapse/truncate/fallocate check and cancel delay-alloc reservations. Sparse/compressed files handled specially. - free-space checks updated (ntfs_check_free_space) to account for reserved delalloc clusters and MFT record budgeting. - delayed allocations are committed on last writer (file release) and on explicit allocation flush paths. Tested-by: syzbot@syzkaller.appspotmail.com Reported-by: syzbot+2bd8e813c7f767aa9bb1@syzkaller.appspotmail.com Signed-off-by: Konstantin Komarov <almaz.alexandrovich@paragon-software.com>
-rw-r--r--fs/ntfs3/attrib.c333
-rw-r--r--fs/ntfs3/attrlist.c8
-rw-r--r--fs/ntfs3/file.c306
-rw-r--r--fs/ntfs3/frecord.c72
-rw-r--r--fs/ntfs3/fsntfs.c53
-rw-r--r--fs/ntfs3/index.c23
-rw-r--r--fs/ntfs3/inode.c161
-rw-r--r--fs/ntfs3/ntfs.h3
-rw-r--r--fs/ntfs3/ntfs_fs.h91
-rw-r--r--fs/ntfs3/run.c150
-rw-r--r--fs/ntfs3/super.c28
-rw-r--r--fs/ntfs3/xattr.c2
12 files changed, 886 insertions, 344 deletions
diff --git a/fs/ntfs3/attrib.c b/fs/ntfs3/attrib.c
index aa745fb226f5..6cb9bc5d605c 100644
--- a/fs/ntfs3/attrib.c
+++ b/fs/ntfs3/attrib.c
@@ -91,7 +91,8 @@ static int attr_load_runs(struct ATTRIB *attr, struct ntfs_inode *ni,
* run_deallocate_ex - Deallocate clusters.
*/
static int run_deallocate_ex(struct ntfs_sb_info *sbi, struct runs_tree *run,
- CLST vcn, CLST len, CLST *done, bool trim)
+ CLST vcn, CLST len, CLST *done, bool trim,
+ struct runs_tree *run_da)
{
int err = 0;
CLST vcn_next, vcn0 = vcn, lcn, clen, dn = 0;
@@ -120,6 +121,16 @@ failed:
if (sbi) {
/* mark bitmap range [lcn + clen) as free and trim clusters. */
mark_as_free_ex(sbi, lcn, clen, trim);
+
+ if (run_da) {
+ CLST da_len;
+ if (!run_remove_range(run_da, vcn, clen,
+ &da_len)) {
+ err = -ENOMEM;
+ goto failed;
+ }
+ ntfs_sub_da(sbi, da_len);
+ }
}
dn += clen;
}
@@ -147,9 +158,10 @@ out:
* attr_allocate_clusters - Find free space, mark it as used and store in @run.
*/
int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
- CLST vcn, CLST lcn, CLST len, CLST *pre_alloc,
- enum ALLOCATE_OPT opt, CLST *alen, const size_t fr,
- CLST *new_lcn, CLST *new_len)
+ struct runs_tree *run_da, CLST vcn, CLST lcn,
+ CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt,
+ CLST *alen, const size_t fr, CLST *new_lcn,
+ CLST *new_len)
{
int err;
CLST flen, vcn0 = vcn, pre = pre_alloc ? *pre_alloc : 0;
@@ -185,12 +197,21 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
/* Add new fragment into run storage. */
if (!run_add_entry(run, vcn, lcn, flen, opt & ALLOCATE_MFT)) {
+undo_alloc:
/* Undo last 'ntfs_look_for_free_space' */
mark_as_free_ex(sbi, lcn, len, false);
err = -ENOMEM;
goto out;
}
+ if (run_da) {
+ CLST da_len;
+ if (!run_remove_range(run_da, vcn, flen, &da_len)) {
+ goto undo_alloc;
+ }
+ ntfs_sub_da(sbi, da_len);
+ }
+
if (opt & ALLOCATE_ZERO) {
u8 shift = sbi->cluster_bits - SECTOR_SHIFT;
@@ -205,7 +226,7 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
vcn += flen;
if (flen >= len || (opt & ALLOCATE_MFT) ||
- (fr && run->count - cnt >= fr)) {
+ (opt & ALLOCATE_ONE_FR) || (fr && run->count - cnt >= fr)) {
*alen = vcn - vcn0;
return 0;
}
@@ -216,7 +237,8 @@ int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
out:
/* Undo 'ntfs_look_for_free_space' */
if (vcn - vcn0) {
- run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false);
+ run_deallocate_ex(sbi, run, vcn0, vcn - vcn0, NULL, false,
+ run_da);
run_truncate(run, vcn0);
}
@@ -281,7 +303,7 @@ int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
} else {
const char *data = resident_data(attr);
- err = attr_allocate_clusters(sbi, run, 0, 0, len, NULL,
+ err = attr_allocate_clusters(sbi, run, NULL, 0, 0, len, NULL,
ALLOCATE_DEF, &alen, 0, NULL,
NULL);
if (err)
@@ -397,7 +419,7 @@ static int attr_set_size_res(struct ntfs_inode *ni, struct ATTRIB *attr,
}
/*
- * attr_set_size - Change the size of attribute.
+ * attr_set_size_ex - Change the size of attribute.
*
* Extend:
* - Sparse/compressed: No allocated clusters.
@@ -405,24 +427,28 @@ static int attr_set_size_res(struct ntfs_inode *ni, struct ATTRIB *attr,
* Shrink:
* - No deallocate if @keep_prealloc is set.
*/
-int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type,
- const __le16 *name, u8 name_len, struct runs_tree *run,
- u64 new_size, const u64 *new_valid, bool keep_prealloc,
- struct ATTRIB **ret)
+int attr_set_size_ex(struct ntfs_inode *ni, enum ATTR_TYPE type,
+ const __le16 *name, u8 name_len, struct runs_tree *run,
+ u64 new_size, const u64 *new_valid, bool keep_prealloc,
+ struct ATTRIB **ret, bool no_da)
{
int err = 0;
struct ntfs_sb_info *sbi = ni->mi.sbi;
u8 cluster_bits = sbi->cluster_bits;
bool is_mft = ni->mi.rno == MFT_REC_MFT && type == ATTR_DATA &&
!name_len;
- u64 old_valid, old_size, old_alloc, new_alloc, new_alloc_tmp;
+ u64 old_valid, old_size, old_alloc, new_alloc_tmp;
+ u64 new_alloc = 0;
struct ATTRIB *attr = NULL, *attr_b;
struct ATTR_LIST_ENTRY *le, *le_b;
struct mft_inode *mi, *mi_b;
CLST alen, vcn, lcn, new_alen, old_alen, svcn, evcn;
CLST next_svcn, pre_alloc = -1, done = 0;
- bool is_ext, is_bad = false;
+ bool is_ext = false, is_bad = false;
bool dirty = false;
+ struct runs_tree *run_da = run == &ni->file.run ? &ni->file.run_da :
+ NULL;
+ bool da = !is_mft && sbi->options->delalloc && run_da && !no_da;
u32 align;
struct MFT_REC *rec;
@@ -457,6 +483,7 @@ again:
if (is_ext) {
align <<= attr_b->nres.c_unit;
keep_prealloc = false;
+ da = false;
}
old_valid = le64_to_cpu(attr_b->nres.valid_size);
@@ -475,6 +502,37 @@ again_1:
goto ok;
}
+ if (da &&
+ (vcn = old_alen + run_len(&ni->file.run_da), new_alen > vcn)) {
+ /* Resize up normal file. Delay new clusters allocation. */
+ alen = new_alen - vcn;
+
+ if (ntfs_check_free_space(sbi, alen, 0, true)) {
+ if (!run_add_entry(&ni->file.run_da, vcn, SPARSE_LCN,
+ alen, false)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ ntfs_add_da(sbi, alen);
+ goto ok1;
+ }
+ }
+
+ if (!keep_prealloc && run_da && run_da->count &&
+ (vcn = run_get_max_vcn(run_da), new_alen < vcn)) {
+ /* Shrink delayed clusters. */
+
+ /* Try to remove fragment from delay allocated run. */
+ if (!run_remove_range(run_da, new_alen, vcn - new_alen,
+ &alen)) {
+ err = -ENOMEM;
+ goto out;
+ }
+
+ ntfs_sub_da(sbi, alen);
+ }
+
vcn = old_alen - 1;
svcn = le64_to_cpu(attr_b->nres.svcn);
@@ -580,7 +638,8 @@ add_alloc_in_same_attr_seg:
} else {
/* ~3 bytes per fragment. */
err = attr_allocate_clusters(
- sbi, run, vcn, lcn, to_allocate, &pre_alloc,
+ sbi, run, run_da, vcn, lcn, to_allocate,
+ &pre_alloc,
is_mft ? ALLOCATE_MFT : ALLOCATE_DEF, &alen,
is_mft ? 0 :
(sbi->record_size -
@@ -759,14 +818,14 @@ pack_runs:
mi_b->dirty = dirty = true;
err = run_deallocate_ex(sbi, run, vcn, evcn - vcn + 1, &dlen,
- true);
+ true, run_da);
if (err)
goto out;
if (is_ext) {
/* dlen - really deallocated clusters. */
le64_sub_cpu(&attr_b->nres.total_size,
- ((u64)dlen << cluster_bits));
+ (u64)dlen << cluster_bits);
}
run_truncate(run, vcn);
@@ -821,14 +880,14 @@ ok1:
if (((type == ATTR_DATA && !name_len) ||
(type == ATTR_ALLOC && name == I30_NAME))) {
/* Update inode_set_bytes. */
- if (attr_b->non_res) {
- new_alloc = le64_to_cpu(attr_b->nres.alloc_size);
- if (inode_get_bytes(&ni->vfs_inode) != new_alloc) {
- inode_set_bytes(&ni->vfs_inode, new_alloc);
- dirty = true;
- }
+ if (attr_b->non_res &&
+ inode_get_bytes(&ni->vfs_inode) != new_alloc) {
+ inode_set_bytes(&ni->vfs_inode, new_alloc);
+ dirty = true;
}
+ i_size_write(&ni->vfs_inode, new_size);
+
/* Don't forget to update duplicate information in parent. */
if (dirty) {
ni->ni_flags |= NI_FLAG_UPDATE_PARENT;
@@ -869,7 +928,7 @@ restore_run:
is_bad = true;
undo_1:
- run_deallocate_ex(sbi, run, vcn, alen, NULL, false);
+ run_deallocate_ex(sbi, run, vcn, alen, NULL, false, run_da);
run_truncate(run, vcn);
out:
@@ -892,20 +951,9 @@ bad_inode:
* - new allocated clusters are zeroed via blkdev_issue_zeroout.
*/
int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
- CLST *len, bool *new, bool zero, void **res)
+ CLST *len, bool *new, bool zero, void **res, bool no_da)
{
- int err = 0;
- struct runs_tree *run = &ni->file.run;
- struct ntfs_sb_info *sbi;
- u8 cluster_bits;
- struct ATTRIB *attr, *attr_b;
- struct ATTR_LIST_ENTRY *le, *le_b;
- struct mft_inode *mi, *mi_b;
- CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0, alen;
- CLST alloc, evcn;
- unsigned fr;
- u64 total_size, total_size0;
- int step = 0;
+ int err;
if (new)
*new = false;
@@ -914,23 +962,63 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
/* Try to find in cache. */
down_read(&ni->file.run_lock);
- if (!run_lookup_entry(run, vcn, lcn, len, NULL))
+ if (!no_da && run_lookup_entry(&ni->file.run_da, vcn, lcn, len, NULL)) {
+ /* The requested vcn is delay allocated. */
+ *lcn = DELALLOC_LCN;
+ } else if (run_lookup_entry(&ni->file.run, vcn, lcn, len, NULL)) {
+ /* The requested vcn is known in current run. */
+ } else {
*len = 0;
+ }
up_read(&ni->file.run_lock);
if (*len && (*lcn != SPARSE_LCN || !new))
return 0; /* Fast normal way without allocation. */
/* No cluster in cache or we need to allocate cluster in hole. */
- sbi = ni->mi.sbi;
- cluster_bits = sbi->cluster_bits;
-
ni_lock(ni);
down_write(&ni->file.run_lock);
- /* Repeat the code above (under write lock). */
- if (!run_lookup_entry(run, vcn, lcn, len, NULL))
+ err = attr_data_get_block_locked(ni, vcn, clen, lcn, len, new, zero,
+ res, no_da);
+
+ up_write(&ni->file.run_lock);
+ ni_unlock(ni);
+
+ return err;
+}
+
+/*
+ * attr_data_get_block_locked - Helper for attr_data_get_block.
+ */
+int attr_data_get_block_locked(struct ntfs_inode *ni, CLST vcn, CLST clen,
+ CLST *lcn, CLST *len, bool *new, bool zero,
+ void **res, bool no_da)
+{
+ int err = 0;
+ struct ntfs_sb_info *sbi = ni->mi.sbi;
+ struct runs_tree *run = &ni->file.run;
+ struct runs_tree *run_da = &ni->file.run_da;
+ bool da = sbi->options->delalloc && !no_da;
+ u8 cluster_bits;
+ struct ATTRIB *attr, *attr_b;
+ struct ATTR_LIST_ENTRY *le, *le_b;
+ struct mft_inode *mi, *mi_b;
+ CLST hint, svcn, to_alloc, evcn1, next_svcn, asize, end, vcn0;
+ CLST alloc, evcn;
+ unsigned fr;
+ u64 total_size, total_size0;
+ int step;
+
+again:
+ if (da && run_lookup_entry(run_da, vcn, lcn, len, NULL)) {
+ /* The requested vcn is delay allocated. */
+ *lcn = DELALLOC_LCN;
+ } else if (run_lookup_entry(run, vcn, lcn, len, NULL)) {
+ /* The requested vcn is known in current run. */
+ } else {
*len = 0;
+ }
if (*len) {
if (*lcn != SPARSE_LCN || !new)
@@ -939,6 +1027,9 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
clen = *len;
}
+ cluster_bits = sbi->cluster_bits;
+ step = 0;
+
le_b = NULL;
attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL, &mi_b);
if (!attr_b) {
@@ -1061,11 +1152,38 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
if (err)
goto out;
}
+ da = false; /* no delalloc for compressed file. */
}
if (vcn + to_alloc > asize)
to_alloc = asize - vcn;
+ if (da) {
+ CLST rlen1, rlen2;
+ if (!ntfs_check_free_space(sbi, to_alloc, 0, true)) {
+ err = ni_allocate_da_blocks_locked(ni);
+ if (err)
+ goto out;
+ /* Layout of records may be changed. Start again without 'da'. */
+ da = false;
+ goto again;
+ }
+
+ /* run_add_entry consolidates existed ranges. */
+ rlen1 = run_len(run_da);
+ if (!run_add_entry(run_da, vcn, SPARSE_LCN, to_alloc, false)) {
+ err = -ENOMEM;
+ goto out;
+ }
+ rlen2 = run_len(run_da);
+
+ /* new added delay clusters = rlen2 - rlen1. */
+ ntfs_add_da(sbi, rlen2 - rlen1);
+ *len = to_alloc;
+ *lcn = DELALLOC_LCN;
+ goto ok;
+ }
+
/* Get the last LCN to allocate from. */
hint = 0;
@@ -1080,18 +1198,19 @@ int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
}
/* Allocate and zeroout new clusters. */
- err = attr_allocate_clusters(sbi, run, vcn, hint + 1, to_alloc, NULL,
- zero ? ALLOCATE_ZERO : ALLOCATE_DEF, &alen,
- fr, lcn, len);
+ err = attr_allocate_clusters(sbi, run, run_da, vcn, hint + 1, to_alloc,
+ NULL,
+ zero ? ALLOCATE_ZERO : ALLOCATE_ONE_FR,
+ len, fr, lcn, len);
if (err)
goto out;
*new = true;
step = 1;
- end = vcn + alen;
+ end = vcn + *len;
/* Save 'total_size0' to restore if error. */
total_size0 = le64_to_cpu(attr_b->nres.total_size);
- total_size = total_size0 + ((u64)alen << cluster_bits);
+ total_size = total_size0 + ((u64)*len << cluster_bits);
if (vcn != vcn0) {
if (!run_lookup_entry(run, vcn0, lcn, len, NULL)) {
@@ -1157,7 +1276,7 @@ repack:
* in 'ni_insert_nonresident'.
* Return in advance -ENOSPC here if there are no free cluster and no free MFT.
*/
- if (!ntfs_check_for_free_space(sbi, 1, 1)) {
+ if (!ntfs_check_free_space(sbi, 1, 1, false)) {
/* Undo step 1. */
err = -ENOSPC;
goto undo1;
@@ -1242,8 +1361,6 @@ out:
/* Too complex to restore. */
_ntfs_bad_inode(&ni->vfs_inode);
}
- up_write(&ni->file.run_lock);
- ni_unlock(ni);
return err;
@@ -1252,8 +1369,8 @@ undo1:
attr_b->nres.total_size = cpu_to_le64(total_size0);
inode_set_bytes(&ni->vfs_inode, total_size0);
- if (run_deallocate_ex(sbi, run, vcn, alen, NULL, false) ||
- !run_add_entry(run, vcn, SPARSE_LCN, alen, false) ||
+ if (run_deallocate_ex(sbi, run, vcn, *len, NULL, false, run_da) ||
+ !run_add_entry(run, vcn, SPARSE_LCN, *len, false) ||
mi_pack_runs(mi, attr, run, max(end, evcn1) - svcn)) {
_ntfs_bad_inode(&ni->vfs_inode);
}
@@ -1688,7 +1805,7 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size,
if (len < clst_data) {
err = run_deallocate_ex(sbi, run, vcn + len, clst_data - len,
- NULL, true);
+ NULL, true, NULL);
if (err)
goto out;
@@ -1708,7 +1825,7 @@ int attr_allocate_frame(struct ntfs_inode *ni, CLST frame, size_t compr_size,
hint = -1;
}
- err = attr_allocate_clusters(sbi, run, vcn + clst_data,
+ err = attr_allocate_clusters(sbi, run, NULL, vcn + clst_data,
hint + 1, len - clst_data, NULL,
ALLOCATE_DEF, &alen, 0, NULL,
NULL);
@@ -1863,6 +1980,7 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
CLST vcn, end;
u64 valid_size, data_size, alloc_size, total_size;
u32 mask;
+ u64 i_size;
__le16 a_flags;
if (!bytes)
@@ -1878,52 +1996,79 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
return 0;
}
- data_size = le64_to_cpu(attr_b->nres.data_size);
- alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
- a_flags = attr_b->flags;
-
- if (is_attr_ext(attr_b)) {
- total_size = le64_to_cpu(attr_b->nres.total_size);
- mask = (sbi->cluster_size << attr_b->nres.c_unit) - 1;
- } else {
- total_size = alloc_size;
- mask = sbi->cluster_mask;
- }
-
- if ((vbo & mask) || (bytes & mask)) {
+ mask = is_attr_ext(attr_b) ?
+ ((sbi->cluster_size << attr_b->nres.c_unit) - 1) :
+ sbi->cluster_mask;
+ if ((vbo | bytes) & mask) {
/* Allow to collapse only cluster aligned ranges. */
return -EINVAL;
}
- if (vbo > data_size)
+ /* i_size - size of file with delay allocated clusters. */
+ i_size = ni->vfs_inode.i_size;
+
+ if (vbo > i_size)
return -EINVAL;
down_write(&ni->file.run_lock);
- if (vbo + bytes >= data_size) {
- u64 new_valid = min(ni->i_valid, vbo);
+ if (vbo + bytes >= i_size) {
+ valid_size = min(ni->i_valid, vbo);
/* Simple truncate file at 'vbo'. */
truncate_setsize(&ni->vfs_inode, vbo);
err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, vbo,
- &new_valid, true, NULL);
+ &valid_size, true);
- if (!err && new_valid < ni->i_valid)
- ni->i_valid = new_valid;
+ if (!err && valid_size < ni->i_valid)
+ ni->i_valid = valid_size;
goto out;
}
- /*
- * Enumerate all attribute segments and collapse.
- */
- alen = alloc_size >> sbi->cluster_bits;
vcn = vbo >> sbi->cluster_bits;
len = bytes >> sbi->cluster_bits;
end = vcn + len;
dealloc = 0;
done = 0;
+ /*
+ * Check delayed clusters.
+ */
+ if (ni->file.run_da.count) {
+ struct runs_tree *run_da = &ni->file.run_da;
+ if (run_is_mapped_full(run_da, vcn, end - 1)) {
+ /*
+ * The requested range is full in delayed clusters.
+ */
+ err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, run,
+ i_size - bytes, NULL, false,
+ NULL, true);
+ goto out;
+ }
+
+ /* Collapse request crosses real and delayed clusters. */
+ err = ni_allocate_da_blocks_locked(ni);
+ if (err)
+ goto out;
+
+ /* Layout of records maybe changed. */
+ le_b = NULL;
+ attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL,
+ &mi_b);
+ if (!attr_b || !attr_b->non_res) {
+ err = -ENOENT;
+ goto out;
+ }
+ }
+
+ data_size = le64_to_cpu(attr_b->nres.data_size);
+ alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
+ total_size = is_attr_ext(attr_b) ?
+ le64_to_cpu(attr_b->nres.total_size) :
+ alloc_size;
+ alen = alloc_size >> sbi->cluster_bits;
+ a_flags = attr_b->flags;
svcn = le64_to_cpu(attr_b->nres.svcn);
evcn1 = le64_to_cpu(attr_b->nres.evcn) + 1;
@@ -1946,6 +2091,9 @@ int attr_collapse_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
goto out;
}
+ /*
+ * Enumerate all attribute segments and collapse.
+ */
for (;;) {
CLST vcn1, eat, next_svcn;
@@ -1973,13 +2121,13 @@ check_seg:
vcn1 = vcn + done; /* original vcn in attr/run. */
eat = min(end, evcn1) - vcn1;
- err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc, true);
+ err = run_deallocate_ex(sbi, run, vcn1, eat, &dealloc, true,
+ NULL);
if (err)
goto out;
if (svcn + eat < evcn1) {
/* Collapse a part of this attribute segment. */
-
if (!run_collapse_range(run, vcn1, eat, done)) {
err = -ENOMEM;
goto out;
@@ -2160,9 +2308,9 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
bytes = alloc_size;
bytes -= vbo;
- if ((vbo & mask) || (bytes & mask)) {
+ if ((vbo | bytes) & mask) {
/* We have to zero a range(s). */
- if (frame_size == NULL) {
+ if (!frame_size) {
/* Caller insists range is aligned. */
return -EINVAL;
}
@@ -2221,7 +2369,8 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
* Calculate how many clusters there are.
* Don't do any destructive actions.
*/
- err = run_deallocate_ex(NULL, run, vcn1, zero, &hole2, false);
+ err = run_deallocate_ex(NULL, run, vcn1, zero, &hole2, false,
+ NULL);
if (err)
goto done;
@@ -2259,7 +2408,8 @@ int attr_punch_hole(struct ntfs_inode *ni, u64 vbo, u64 bytes, u32 *frame_size)
}
/* Real deallocate. Should not fail. */
- run_deallocate_ex(sbi, &run2, vcn1, zero, &hole, true);
+ run_deallocate_ex(sbi, &run2, vcn1, zero, &hole, true,
+ &ni->file.run_da);
next_attr:
/* Free all allocated memory. */
@@ -2371,7 +2521,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
return -EINVAL;
}
- if ((vbo & mask) || (bytes & mask)) {
+ if ((vbo | bytes) & mask) {
/* Allow to insert only frame aligned ranges. */
return -EINVAL;
}
@@ -2390,7 +2540,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
if (!attr_b->non_res) {
err = attr_set_size(ni, ATTR_DATA, NULL, 0, run,
- data_size + bytes, NULL, false, NULL);
+ data_size + bytes, NULL, false);
le_b = NULL;
attr_b = ni_find_attr(ni, NULL, &le_b, ATTR_DATA, NULL, 0, NULL,
@@ -2413,7 +2563,7 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
goto done;
}
- /* Resident files becomes nonresident. */
+ /* Resident file becomes nonresident. */
data_size = le64_to_cpu(attr_b->nres.data_size);
alloc_size = le64_to_cpu(attr_b->nres.alloc_size);
}
@@ -2450,10 +2600,13 @@ int attr_insert_range(struct ntfs_inode *ni, u64 vbo, u64 bytes)
if (err)
goto out;
- if (!run_insert_range(run, vcn, len)) {
- err = -ENOMEM;
+ err = run_insert_range(run, vcn, len);
+ if (err)
+ goto out;
+
+ err = run_insert_range_da(&ni->file.run_da, vcn, len);
+ if (err)
goto out;
- }
/* Try to pack in current record as much as possible. */
err = mi_pack_runs(mi, attr, run, evcn1 + len - svcn);
diff --git a/fs/ntfs3/attrlist.c b/fs/ntfs3/attrlist.c
index 098bd7e8c3d6..270a29323530 100644
--- a/fs/ntfs3/attrlist.c
+++ b/fs/ntfs3/attrlist.c
@@ -345,8 +345,8 @@ int al_add_le(struct ntfs_inode *ni, enum ATTR_TYPE type, const __le16 *name,
le->id = id;
memcpy(le->name, name, sizeof(short) * name_len);
- err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, new_size,
- &new_size, true, &attr);
+ err = attr_set_size_ex(ni, ATTR_LIST, NULL, 0, &al->run, new_size,
+ &new_size, true, &attr, false);
if (err) {
/* Undo memmove above. */
memmove(le, Add2Ptr(le, sz), old_size - off);
@@ -404,8 +404,8 @@ int al_update(struct ntfs_inode *ni, int sync)
* Attribute list increased on demand in al_add_le.
* Attribute list decreased here.
*/
- err = attr_set_size(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL,
- false, &attr);
+ err = attr_set_size_ex(ni, ATTR_LIST, NULL, 0, &al->run, al->size, NULL,
+ false, &attr, false);
if (err)
goto out;
diff --git a/fs/ntfs3/file.c b/fs/ntfs3/file.c
index 1be77f865d78..79e4c7a78c26 100644
--- a/fs/ntfs3/file.c
+++ b/fs/ntfs3/file.c
@@ -26,6 +26,38 @@
*/
#define NTFS3_IOC_SHUTDOWN _IOR('X', 125, __u32)
+/*
+ * Helper for ntfs_should_use_dio.
+ */
+static u32 ntfs_dio_alignment(struct inode *inode)
+{
+ struct ntfs_inode *ni = ntfs_i(inode);
+
+ if (is_resident(ni)) {
+ /* Check delalloc. */
+ if (!ni->file.run_da.count)
+ return 0;
+ }
+
+ /* In most cases this is bdev_logical_block_size(bdev). */
+ return ni->mi.sbi->bdev_blocksize;
+}
+
+/*
+ * Returns %true if the given DIO request should be attempted with DIO, or
+ * %false if it should fall back to buffered I/O.
+ */
+static bool ntfs_should_use_dio(struct kiocb *iocb, struct iov_iter *iter)
+{
+ struct inode *inode = file_inode(iocb->ki_filp);
+ u32 dio_align = ntfs_dio_alignment(inode);
+
+ if (!dio_align)
+ return false;
+
+ return IS_ALIGNED(iocb->ki_pos | iov_iter_alignment(iter), dio_align);
+}
+
static int ntfs_ioctl_fitrim(struct ntfs_sb_info *sbi, unsigned long arg)
{
struct fstrim_range __user *user_range;
@@ -186,10 +218,10 @@ int ntfs_getattr(struct mnt_idmap *idmap, const struct path *path,
static int ntfs_extend_initialized_size(struct file *file,
struct ntfs_inode *ni,
- const loff_t valid,
const loff_t new_valid)
{
struct inode *inode = &ni->vfs_inode;
+ const loff_t valid = ni->i_valid;
int err;
if (valid >= new_valid)
@@ -200,8 +232,6 @@ static int ntfs_extend_initialized_size(struct file *file,
return 0;
}
- WARN_ON(is_compressed(ni));
-
err = iomap_zero_range(inode, valid, new_valid - valid, NULL,
&ntfs_iomap_ops, &ntfs_iomap_folio_ops, NULL);
if (err) {
@@ -291,7 +321,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc)
for (; vcn < end; vcn += len) {
err = attr_data_get_block(ni, vcn, 1, &lcn,
&len, &new, true,
- NULL);
+ NULL, false);
if (err)
goto out;
}
@@ -302,8 +332,7 @@ static int ntfs_file_mmap_prepare(struct vm_area_desc *desc)
err = -EAGAIN;
goto out;
}
- err = ntfs_extend_initialized_size(file, ni,
- ni->i_valid, to);
+ err = ntfs_extend_initialized_size(file, ni, to);
inode_unlock(inode);
if (err)
goto out;
@@ -333,55 +362,23 @@ static int ntfs_extend(struct inode *inode, loff_t pos, size_t count,
ntfs_set_state(ni->mi.sbi, NTFS_DIRTY_DIRTY);
if (end > inode->i_size) {
+ /*
+ * Normal files: increase file size, allocate space.
+ * Sparse/Compressed: increase file size. No space allocated.
+ */
err = ntfs_set_size(inode, end);
if (err)
goto out;
}
if (extend_init && !is_compressed(ni)) {
- err = ntfs_extend_initialized_size(file, ni, ni->i_valid, pos);
+ err = ntfs_extend_initialized_size(file, ni, pos);
if (err)
goto out;
} else {
err = 0;
}
- if (file && is_sparsed(ni)) {
- /*
- * This code optimizes large writes to sparse file.
- * TODO: merge this fragment with fallocate fragment.
- */
- struct ntfs_sb_info *sbi = ni->mi.sbi;
- CLST vcn = pos >> sbi->cluster_bits;
- CLST cend = bytes_to_cluster(sbi, end);
- CLST cend_v = bytes_to_cluster(sbi, ni->i_valid);
- CLST lcn, clen;
- bool new;
-
- if (cend_v > cend)
- cend_v = cend;
-
- /*
- * Allocate and zero new clusters.
- * Zeroing these clusters may be too long.
- */
- for (; vcn < cend_v; vcn += clen) {
- err = attr_data_get_block(ni, vcn, cend_v - vcn, &lcn,
- &clen, &new, true, NULL);
- if (err)
- goto out;
- }
- /*
- * Allocate but not zero new clusters.
- */
- for (; vcn < cend; vcn += clen) {
- err = attr_data_get_block(ni, vcn, cend - vcn, &lcn,
- &clen, &new, false, NULL);
- if (err)
- goto out;
- }
- }
-
inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode));
mark_inode_dirty(inode);
@@ -414,8 +411,9 @@ static int ntfs_truncate(struct inode *inode, loff_t new_size)
ni_lock(ni);
down_write(&ni->file.run_lock);
- err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
- &new_valid, ni->mi.sbi->options->prealloc, NULL);
+ err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
+ &new_valid, ni->mi.sbi->options->prealloc, NULL,
+ false);
up_write(&ni->file.run_lock);
ni->i_valid = new_valid;
@@ -507,7 +505,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
if (mode & FALLOC_FL_PUNCH_HOLE) {
u32 frame_size;
- loff_t mask, vbo_a, end_a, tmp;
+ loff_t mask, vbo_a, end_a, tmp, from;
err = filemap_write_and_wait_range(mapping, vbo_down,
LLONG_MAX);
@@ -527,28 +525,24 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
/* Process not aligned punch. */
err = 0;
+ if (end > i_size)
+ end = i_size;
mask = frame_size - 1;
vbo_a = (vbo + mask) & ~mask;
end_a = end & ~mask;
tmp = min(vbo_a, end);
- if (tmp > vbo) {
- err = iomap_zero_range(inode, vbo, tmp - vbo, NULL,
- &ntfs_iomap_ops,
- &ntfs_iomap_folio_ops, NULL);
- if (err)
- goto out;
- }
-
- if (vbo < end_a && end_a < end) {
- err = iomap_zero_range(inode, end_a, end - end_a, NULL,
+ from = min_t(loff_t, ni->i_valid, vbo);
+ /* Zero head of punch. */
+ if (tmp > from) {
+ err = iomap_zero_range(inode, from, tmp - from, NULL,
&ntfs_iomap_ops,
&ntfs_iomap_folio_ops, NULL);
if (err)
goto out;
}
- /* Aligned punch_hole */
+ /* Aligned punch_hole. Deallocate clusters. */
if (end_a > vbo_a) {
ni_lock(ni);
err = attr_punch_hole(ni, vbo_a, end_a - vbo_a, NULL);
@@ -556,6 +550,15 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
if (err)
goto out;
}
+
+ /* Zero tail of punch. */
+ if (vbo < end_a && end_a < end) {
+ err = iomap_zero_range(inode, end_a, end - end_a, NULL,
+ &ntfs_iomap_ops,
+ &ntfs_iomap_folio_ops, NULL);
+ if (err)
+ goto out;
+ }
} else if (mode & FALLOC_FL_COLLAPSE_RANGE) {
/*
* Write tail of the last page before removed range since
@@ -653,17 +656,26 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
for (; vcn < cend_v; vcn += clen) {
err = attr_data_get_block(ni, vcn, cend_v - vcn,
&lcn, &clen, &new,
- true, NULL);
+ true, NULL, false);
if (err)
goto out;
}
+
+ /*
+ * Moving up 'valid size'.
+ */
+ err = ntfs_extend_initialized_size(
+ file, ni, (u64)cend_v << cluster_bits);
+ if (err)
+ goto out;
+
/*
* Allocate but not zero new clusters.
*/
for (; vcn < cend; vcn += clen) {
err = attr_data_get_block(ni, vcn, cend - vcn,
&lcn, &clen, &new,
- false, NULL);
+ false, NULL, false);
if (err)
goto out;
}
@@ -674,7 +686,7 @@ static long ntfs_fallocate(struct file *file, int mode, loff_t vbo, loff_t len)
/* True - Keep preallocated. */
err = attr_set_size(ni, ATTR_DATA, NULL, 0,
&ni->file.run, i_size, &ni->i_valid,
- true, NULL);
+ true);
ni_unlock(ni);
if (err)
goto out;
@@ -816,6 +828,8 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
struct inode *inode = file_inode(file);
struct ntfs_inode *ni = ntfs_i(inode);
size_t bytes = iov_iter_count(iter);
+ loff_t valid, i_size, vbo, end;
+ unsigned int dio_flags;
ssize_t err;
err = check_read_restriction(inode);
@@ -835,62 +849,63 @@ static ssize_t ntfs_file_read_iter(struct kiocb *iocb, struct iov_iter *iter)
file->f_ra.ra_pages = 0;
}
- /* Check minimum alignment for dio. */
- if ((iocb->ki_flags & IOCB_DIRECT) &&
- (is_resident(ni) || ((iocb->ki_pos | iov_iter_alignment(iter)) &
- ni->mi.sbi->bdev_blocksize_mask))) {
- /* Fallback to buffered I/O */
+ /* Fallback to buffered I/O if the inode does not support direct I/O. */
+ if (!(iocb->ki_flags & IOCB_DIRECT) ||
+ !ntfs_should_use_dio(iocb, iter)) {
iocb->ki_flags &= ~IOCB_DIRECT;
+ return generic_file_read_iter(iocb, iter);
}
- if (iocb->ki_flags & IOCB_DIRECT) {
- loff_t valid, i_size;
- loff_t vbo = iocb->ki_pos;
- loff_t end = vbo + bytes;
- unsigned int dio_flags = IOMAP_DIO_PARTIAL;
-
- if (iocb->ki_flags & IOCB_NOWAIT) {
- if (!inode_trylock_shared(inode))
- return -EAGAIN;
- } else {
- inode_lock_shared(inode);
- }
-
- valid = ni->i_valid;
- i_size = inode->i_size;
+ if (iocb->ki_flags & IOCB_NOWAIT) {
+ if (!inode_trylock_shared(inode))
+ return -EAGAIN;
+ } else {
+ inode_lock_shared(inode);
+ }
- if (vbo < valid) {
- if (valid < end) {
- /* read cross 'valid' size. */
- dio_flags |= IOMAP_DIO_FORCE_WAIT;
- }
+ vbo = iocb->ki_pos;
+ end = vbo + bytes;
+ dio_flags = 0;
+ valid = ni->i_valid;
+ i_size = inode->i_size;
- err = iomap_dio_rw(iocb, iter, &ntfs_iomap_ops, NULL,
- dio_flags, NULL, 0);
+ if (vbo < valid) {
+ if (valid < end) {
+ /* read cross 'valid' size. */
+ dio_flags |= IOMAP_DIO_FORCE_WAIT;
+ }
- if (err > 0) {
- end = vbo + err;
- if (valid < end) {
- size_t to_zero = end - valid;
- /* Fix iter. */
- iov_iter_revert(iter, to_zero);
- iov_iter_zero(to_zero, iter);
- }
- }
- } else if (vbo < i_size) {
- if (end > i_size)
- bytes = i_size - vbo;
- iov_iter_zero(bytes, iter);
- iocb->ki_pos += bytes;
- err = bytes;
+ if (ni->file.run_da.count) {
+ /* Direct I/O is not compatible with delalloc. */
+ err = ni_allocate_da_blocks(ni);
+ if (err)
+ goto out;
}
- inode_unlock_shared(inode);
- file_accessed(iocb->ki_filp);
- return err;
+ err = iomap_dio_rw(iocb, iter, &ntfs_iomap_ops, NULL, dio_flags,
+ NULL, 0);
+
+ if (err <= 0)
+ goto out;
+ end = vbo + err;
+ if (valid < end) {
+ size_t to_zero = end - valid;
+ /* Fix iter. */
+ iov_iter_revert(iter, to_zero);
+ iov_iter_zero(to_zero, iter);
+ }
+ } else if (vbo < i_size) {
+ if (end > i_size)
+ bytes = i_size - vbo;
+ iov_iter_zero(bytes, iter);
+ iocb->ki_pos += bytes;
+ err = bytes;
}
- return generic_file_read_iter(iocb, iter);
+out:
+ inode_unlock_shared(inode);
+ file_accessed(iocb->ki_filp);
+ return err;
}
/*
@@ -1011,17 +1026,13 @@ static ssize_t ntfs_compress_write(struct kiocb *iocb, struct iov_iter *from)
off = valid & (frame_size - 1);
err = attr_data_get_block(ni, frame << NTFS_LZNT_CUNIT, 1, &lcn,
- &clen, NULL, false, NULL);
+ &clen, NULL, false, NULL, false);
if (err)
goto out;
if (lcn == SPARSE_LCN) {
- valid = frame_vbo + ((u64)clen << sbi->cluster_bits);
- if (ni->i_valid == valid) {
- err = -EINVAL;
- goto out;
- }
- ni->i_valid = valid;
+ ni->i_valid = valid =
+ frame_vbo + ((u64)clen << sbi->cluster_bits);
continue;
}
@@ -1207,6 +1218,9 @@ static int check_write_restriction(struct inode *inode)
return -EOPNOTSUPP;
}
+ if (unlikely(IS_IMMUTABLE(inode)))
+ return -EPERM;
+
return 0;
}
@@ -1218,8 +1232,6 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
struct file *file = iocb->ki_filp;
struct inode *inode = file_inode(file);
struct ntfs_inode *ni = ntfs_i(inode);
- struct super_block *sb = inode->i_sb;
- struct ntfs_sb_info *sbi = sb->s_fs_info;
ssize_t ret, err;
if (!inode_trylock(inode)) {
@@ -1263,15 +1275,11 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
goto out;
}
- /* Check minimum alignment for dio. */
- if ((iocb->ki_flags & IOCB_DIRECT) &&
- (is_resident(ni) || ((iocb->ki_pos | iov_iter_alignment(from)) &
- sbi->bdev_blocksize_mask))) {
- /* Fallback to buffered I/O */
+ /* Fallback to buffered I/O if the inode does not support direct I/O. */
+ if (!(iocb->ki_flags & IOCB_DIRECT) ||
+ !ntfs_should_use_dio(iocb, from)) {
iocb->ki_flags &= ~IOCB_DIRECT;
- }
- if (!(iocb->ki_flags & IOCB_DIRECT)) {
ret = iomap_file_buffered_write(iocb, from, &ntfs_iomap_ops,
&ntfs_iomap_folio_ops, NULL);
inode_unlock(inode);
@@ -1282,8 +1290,14 @@ static ssize_t ntfs_file_write_iter(struct kiocb *iocb, struct iov_iter *from)
return ret;
}
- ret = iomap_dio_rw(iocb, from, &ntfs_iomap_ops, NULL, IOMAP_DIO_PARTIAL,
- NULL, 0);
+ if (ni->file.run_da.count) {
+ /* Direct I/O is not compatible with delalloc. */
+ ret = ni_allocate_da_blocks(ni);
+ if (ret)
+ goto out;
+ }
+
+ ret = iomap_dio_rw(iocb, from, &ntfs_iomap_ops, NULL, 0, NULL, 0);
if (ret == -ENOTBLK) {
/* Returns -ENOTBLK in case of a page invalidation failure for writes.*/
@@ -1370,34 +1384,42 @@ int ntfs_file_open(struct inode *inode, struct file *file)
/*
* ntfs_file_release - file_operations::release
+ *
+ * Called when an inode is released. Note that this is different
+ * from ntfs_file_open: open gets called at every open, but release
+ * gets called only when /all/ the files are closed.
*/
static int ntfs_file_release(struct inode *inode, struct file *file)
{
- struct ntfs_inode *ni = ntfs_i(inode);
- struct ntfs_sb_info *sbi = ni->mi.sbi;
- int err = 0;
-
- /* If we are last writer on the inode, drop the block reservation. */
- if (sbi->options->prealloc &&
- ((file->f_mode & FMODE_WRITE) &&
- atomic_read(&inode->i_writecount) == 1)
- /*
- * The only file when inode->i_fop = &ntfs_file_operations and
- * init_rwsem(&ni->file.run_lock) is not called explicitly is MFT.
- *
- * Add additional check here.
- */
- && inode->i_ino != MFT_REC_MFT) {
+ int err;
+ struct ntfs_inode *ni;
+
+ if (!(file->f_mode & FMODE_WRITE) ||
+ atomic_read(&inode->i_writecount) != 1 ||
+ inode->i_ino == MFT_REC_MFT) {
+ return 0;
+ }
+
+ /* Close the last writer on the inode. */
+ ni = ntfs_i(inode);
+
+ /* Allocate delayed blocks (clusters). */
+ err = ni_allocate_da_blocks(ni);
+ if (err)
+ goto out;
+
+ if (ni->mi.sbi->options->prealloc) {
ni_lock(ni);
down_write(&ni->file.run_lock);
+ /* Deallocate preallocated. */
err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
- i_size_read(inode), &ni->i_valid, false,
- NULL);
+ inode->i_size, &ni->i_valid, false);
up_write(&ni->file.run_lock);
ni_unlock(ni);
}
+out:
return err;
}
@@ -1506,7 +1528,7 @@ static loff_t ntfs_llseek(struct file *file, loff_t offset, int whence)
if (whence == SEEK_DATA || whence == SEEK_HOLE) {
inode_lock_shared(inode);
- /* Scan fragments for hole or data. */
+ /* Scan file for hole or data. */
ret = ni_seek_data_or_hole(ni, offset, whence == SEEK_DATA);
inode_unlock_shared(inode);
diff --git a/fs/ntfs3/frecord.c b/fs/ntfs3/frecord.c
index 0dc28815331e..bd0fa481e4b3 100644
--- a/fs/ntfs3/frecord.c
+++ b/fs/ntfs3/frecord.c
@@ -123,6 +123,8 @@ void ni_clear(struct ntfs_inode *ni)
indx_clear(&ni->dir);
else {
run_close(&ni->file.run);
+ ntfs_sub_da(ni->mi.sbi, run_len(&ni->file.run_da));
+ run_close(&ni->file.run_da);
#ifdef CONFIG_NTFS3_LZX_XPRESS
if (ni->file.offs_folio) {
/* On-demand allocated page for offsets. */
@@ -2014,7 +2016,8 @@ int ni_decompress_file(struct ntfs_inode *ni)
for (vcn = vbo >> sbi->cluster_bits; vcn < end; vcn += clen) {
err = attr_data_get_block(ni, vcn, cend - vcn, &lcn,
- &clen, &new, false, NULL);
+ &clen, &new, false, NULL,
+ false);
if (err)
goto out;
}
@@ -2235,7 +2238,7 @@ int ni_read_frame(struct ntfs_inode *ni, u64 frame_vbo, struct page **pages,
struct runs_tree *run = &ni->file.run;
u64 valid_size = ni->i_valid;
u64 vbo_disk;
- size_t unc_size;
+ size_t unc_size = 0;
u32 frame_size, i, ondisk_size;
struct page *pg;
struct ATTRIB *attr;
@@ -2846,7 +2849,7 @@ loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data)
/* Enumerate all fragments. */
for (vcn = offset >> cluster_bits;; vcn += clen) {
err = attr_data_get_block(ni, vcn, 1, &lcn, &clen, NULL, false,
- NULL);
+ NULL, false);
if (err) {
return err;
}
@@ -2886,9 +2889,9 @@ loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data)
}
} else {
/*
- * Adjust the file offset to the next hole in the file greater than or
+ * Adjust the file offset to the next hole in the file greater than or
* equal to offset. If offset points into the middle of a hole, then the
- * file offset is set to offset. If there is no hole past offset, then the
+ * file offset is set to offset. If there is no hole past offset, then the
* file offset is adjusted to the end of the file
* (i.e., there is an implicit hole at the end of any file).
*/
@@ -3235,3 +3238,62 @@ out:
return 0;
}
+
+/*
+ * Force to allocate all delay allocated clusters.
+ */
+int ni_allocate_da_blocks(struct ntfs_inode *ni)
+{
+ int err;
+
+ ni_lock(ni);
+ down_write(&ni->file.run_lock);
+
+ err = ni_allocate_da_blocks_locked(ni);
+
+ up_write(&ni->file.run_lock);
+ ni_unlock(ni);
+
+ return err;
+}
+
+/*
+ * Force to allocate all delay allocated clusters.
+ */
+int ni_allocate_da_blocks_locked(struct ntfs_inode *ni)
+{
+ int err;
+
+ if (!ni->file.run_da.count)
+ return 0;
+
+ if (is_sparsed(ni)) {
+ CLST vcn, lcn, clen, alen;
+ bool new;
+
+ /*
+ * Sparse file allocates clusters in 'attr_data_get_block_locked'
+ */
+ while (run_get_entry(&ni->file.run_da, 0, &vcn, &lcn, &clen)) {
+ /* TODO: zero=true? */
+ err = attr_data_get_block_locked(ni, vcn, clen, &lcn,
+ &alen, &new, true,
+ NULL, true);
+ if (err)
+ break;
+ if (!new) {
+ err = -EINVAL;
+ break;
+ }
+ }
+ } else {
+ /*
+ * Normal file allocates clusters in 'attr_set_size'
+ */
+ err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run,
+ ni->vfs_inode.i_size, &ni->i_valid,
+ false, NULL, true);
+ }
+
+ return err;
+}
diff --git a/fs/ntfs3/fsntfs.c b/fs/ntfs3/fsntfs.c
index 2ef500f1a9fa..5f44e91d7997 100644
--- a/fs/ntfs3/fsntfs.c
+++ b/fs/ntfs3/fsntfs.c
@@ -445,36 +445,59 @@ up_write:
}
/*
- * ntfs_check_for_free_space
+ * ntfs_check_free_space
*
* Check if it is possible to allocate 'clen' clusters and 'mlen' Mft records
*/
-bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen)
+bool ntfs_check_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen,
+ bool da)
{
size_t free, zlen, avail;
struct wnd_bitmap *wnd;
+ CLST da_clusters = ntfs_get_da(sbi);
wnd = &sbi->used.bitmap;
down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_CLUSTERS);
free = wnd_zeroes(wnd);
+
+ if (free >= da_clusters) {
+ free -= da_clusters;
+ } else {
+ free = 0;
+ }
+
zlen = min_t(size_t, NTFS_MIN_MFT_ZONE, wnd_zone_len(wnd));
up_read(&wnd->rw_lock);
- if (free < zlen + clen)
+ if (free < zlen + clen) {
return false;
+ }
avail = free - (zlen + clen);
- wnd = &sbi->mft.bitmap;
- down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
- free = wnd_zeroes(wnd);
- zlen = wnd_zone_len(wnd);
- up_read(&wnd->rw_lock);
+ /*
+ * When delalloc is active then keep in mind some reserved space.
+ * The worst case: 1 mft record per each ~500 clusters.
+ */
+ if (da) {
+ /* 1 mft record per each 1024 clusters. */
+ mlen += da_clusters >> 10;
+ }
+
+ if (mlen || !avail) {
+ wnd = &sbi->mft.bitmap;
+ down_read_nested(&wnd->rw_lock, BITMAP_MUTEX_MFT);
+ free = wnd_zeroes(wnd);
+ zlen = wnd_zone_len(wnd);
+ up_read(&wnd->rw_lock);
- if (free >= zlen + mlen)
- return true;
+ if (free < zlen + mlen &&
+ avail < bytes_to_cluster(sbi, mlen << sbi->record_bits)) {
+ return false;
+ }
+ }
- return avail >= bytes_to_cluster(sbi, mlen << sbi->record_bits);
+ return true;
}
/*
@@ -509,8 +532,8 @@ static int ntfs_extend_mft(struct ntfs_sb_info *sbi)
/* Step 1: Resize $MFT::DATA. */
down_write(&ni->file.run_lock);
- err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run,
- new_mft_bytes, NULL, false, &attr);
+ err = attr_set_size_ex(ni, ATTR_DATA, NULL, 0, &ni->file.run,
+ new_mft_bytes, NULL, false, &attr, false);
if (err) {
up_write(&ni->file.run_lock);
@@ -525,7 +548,7 @@ static int ntfs_extend_mft(struct ntfs_sb_info *sbi)
new_bitmap_bytes = ntfs3_bitmap_size(new_mft_total);
err = attr_set_size(ni, ATTR_BITMAP, NULL, 0, &sbi->mft.bitmap.run,
- new_bitmap_bytes, &new_bitmap_bytes, true, NULL);
+ new_bitmap_bytes, &new_bitmap_bytes, true);
/* Refresh MFT Zone if necessary. */
down_write_nested(&sbi->used.bitmap.rw_lock, BITMAP_MUTEX_CLUSTERS);
@@ -2191,7 +2214,7 @@ int ntfs_insert_security(struct ntfs_sb_info *sbi,
if (new_sds_size > ni->vfs_inode.i_size) {
err = attr_set_size(ni, ATTR_DATA, SDS_NAME,
ARRAY_SIZE(SDS_NAME), &ni->file.run,
- new_sds_size, &new_sds_size, false, NULL);
+ new_sds_size, &new_sds_size, false);
if (err)
goto out;
}
diff --git a/fs/ntfs3/index.c b/fs/ntfs3/index.c
index d08bee3c20fa..2416c61050f1 100644
--- a/fs/ntfs3/index.c
+++ b/fs/ntfs3/index.c
@@ -1446,8 +1446,8 @@ static int indx_create_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
run_init(&run);
- err = attr_allocate_clusters(sbi, &run, 0, 0, len, NULL, ALLOCATE_DEF,
- &alen, 0, NULL, NULL);
+ err = attr_allocate_clusters(sbi, &run, NULL, 0, 0, len, NULL,
+ ALLOCATE_DEF, &alen, 0, NULL, NULL);
if (err)
goto out;
@@ -1531,8 +1531,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
/* Increase bitmap. */
err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
&indx->bitmap_run,
- ntfs3_bitmap_size(bit + 1), NULL, true,
- NULL);
+ ntfs3_bitmap_size(bit + 1), NULL, true);
if (err)
goto out1;
}
@@ -1553,8 +1552,7 @@ static int indx_add_allocate(struct ntfs_index *indx, struct ntfs_inode *ni,
/* Increase allocation. */
err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
- &indx->alloc_run, data_size, &data_size, true,
- NULL);
+ &indx->alloc_run, data_size, &data_size, true);
if (err) {
if (bmp)
goto out2;
@@ -1572,7 +1570,7 @@ out:
out2:
/* Ops. No space? */
attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
- &indx->bitmap_run, bmp_size, &bmp_size_v, false, NULL);
+ &indx->bitmap_run, bmp_size, &bmp_size_v, false);
out1:
return err;
@@ -2106,7 +2104,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni,
new_data = (u64)bit << indx->index_bits;
err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
- &indx->alloc_run, new_data, &new_data, false, NULL);
+ &indx->alloc_run, new_data, &new_data, false);
if (err)
return err;
@@ -2118,7 +2116,7 @@ static int indx_shrink(struct ntfs_index *indx, struct ntfs_inode *ni,
return 0;
err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
- &indx->bitmap_run, bpb, &bpb, false, NULL);
+ &indx->bitmap_run, bpb, &bpb, false);
return err;
}
@@ -2333,6 +2331,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
hdr = &root->ihdr;
e = fnd->root_de;
n = NULL;
+ ib = NULL;
}
e_size = le16_to_cpu(e->size);
@@ -2355,7 +2354,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
* Check to see if removing that entry made
* the leaf empty.
*/
- if (ib_is_leaf(ib) && ib_is_empty(ib)) {
+ if (ib && ib_is_leaf(ib) && ib_is_empty(ib)) {
fnd_pop(fnd);
fnd_push(fnd2, n, e);
}
@@ -2603,7 +2602,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
in = &s_index_names[indx->type];
err = attr_set_size(ni, ATTR_ALLOC, in->name, in->name_len,
- &indx->alloc_run, 0, NULL, false, NULL);
+ &indx->alloc_run, 0, NULL, false);
if (in->name == I30_NAME)
i_size_write(&ni->vfs_inode, 0);
@@ -2612,7 +2611,7 @@ int indx_delete_entry(struct ntfs_index *indx, struct ntfs_inode *ni,
run_close(&indx->alloc_run);
err = attr_set_size(ni, ATTR_BITMAP, in->name, in->name_len,
- &indx->bitmap_run, 0, NULL, false, NULL);
+ &indx->bitmap_run, 0, NULL, false);
err = ni_remove_attr(ni, ATTR_BITMAP, in->name, in->name_len,
false, NULL);
run_close(&indx->bitmap_run);
diff --git a/fs/ntfs3/inode.c b/fs/ntfs3/inode.c
index 2147fce8e0b2..aca774f1aed1 100644
--- a/fs/ntfs3/inode.c
+++ b/fs/ntfs3/inode.c
@@ -40,7 +40,7 @@ static struct inode *ntfs_read_mft(struct inode *inode,
u32 rp_fa = 0, asize, t32;
u16 roff, rsize, names = 0, links = 0;
const struct ATTR_FILE_NAME *fname = NULL;
- const struct INDEX_ROOT *root;
+ const struct INDEX_ROOT *root = NULL;
struct REPARSE_DATA_BUFFER rp; // 0x18 bytes
u64 t64;
struct MFT_REC *rec;
@@ -556,6 +556,25 @@ struct inode *ntfs_iget5(struct super_block *sb, const struct MFT_REF *ref,
static sector_t ntfs_bmap(struct address_space *mapping, sector_t block)
{
+ struct inode *inode = mapping->host;
+ struct ntfs_inode *ni = ntfs_i(inode);
+
+ /*
+ * We can get here for an inline file via the FIBMAP ioctl
+ */
+ if (is_resident(ni))
+ return 0;
+
+ if (mapping_tagged(mapping, PAGECACHE_TAG_DIRTY) &&
+ !run_is_empty(&ni->file.run_da)) {
+ /*
+ * With delalloc data we want to sync the file so
+ * that we can make sure we allocate blocks for file and data
+ * is in place for the user to see it
+ */
+ ni_allocate_da_blocks(ni);
+ }
+
return iomap_bmap(mapping, block, &ntfs_iomap_ops);
}
@@ -722,7 +741,7 @@ int ntfs_set_size(struct inode *inode, u64 new_size)
down_write(&ni->file.run_lock);
err = attr_set_size(ni, ATTR_DATA, NULL, 0, &ni->file.run, new_size,
- &ni->i_valid, true, NULL);
+ &ni->i_valid, true);
if (!err) {
i_size_write(inode, new_size);
@@ -736,6 +755,10 @@ int ntfs_set_size(struct inode *inode, u64 new_size)
}
/*
+ * Special value to detect ntfs_writeback_range call
+ */
+#define WB_NO_DA (struct iomap *)1
+/*
* Function to get mapping vbo -> lbo.
* used with:
* - iomap_zero_range
@@ -760,22 +783,40 @@ static int ntfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
loff_t endbyte = offset + length;
void *res = NULL;
int err;
- CLST lcn, clen, clen_max;
+ CLST lcn, clen, clen_max = 1;
bool new_clst = false;
+ bool no_da;
+ bool zero = false;
if (unlikely(ntfs3_forced_shutdown(sbi->sb)))
return -EIO;
- if ((flags & IOMAP_REPORT) && offset > ntfs_get_maxbytes(ni)) {
- /* called from fiemap/bmap. */
- return -EINVAL;
+ if (flags & IOMAP_REPORT) {
+ if (offset > ntfs_get_maxbytes(ni)) {
+ /* called from fiemap/bmap. */
+ return -EINVAL;
+ }
+
+ if (offset >= inode->i_size) {
+ /* special code for report. */
+ return -ENOENT;
+ }
}
- clen_max = rw ? (bytes_to_cluster(sbi, endbyte) - vcn) : 1;
+ if (IOMAP_ZERO == flags && (endbyte & sbi->cluster_mask)) {
+ rw = true;
+ } else if (rw) {
+ clen_max = bytes_to_cluster(sbi, endbyte) - vcn;
+ }
- err = attr_data_get_block(
- ni, vcn, clen_max, &lcn, &clen, rw ? &new_clst : NULL,
- flags == IOMAP_WRITE && (off || (endbyte & sbi->cluster_mask)),
- &res);
+ /*
+ * Force to allocate clusters if directIO(write) or writeback_range.
+ * NOTE: attr_data_get_block allocates clusters only for sparse file.
+ * Normal file allocates clusters in attr_set_size.
+ */
+ no_da = flags == (IOMAP_DIRECT | IOMAP_WRITE) || srcmap == WB_NO_DA;
+
+ err = attr_data_get_block(ni, vcn, clen_max, &lcn, &clen,
+ rw ? &new_clst : NULL, zero, &res, no_da);
if (err) {
return err;
@@ -795,6 +836,8 @@ static int ntfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
lcn = SPARSE_LCN;
}
+ iomap->flags = new_clst ? IOMAP_F_NEW : 0;
+
if (lcn == RESIDENT_LCN) {
if (offset >= clen) {
kfree(res);
@@ -809,7 +852,6 @@ static int ntfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
iomap->type = IOMAP_INLINE;
iomap->offset = 0;
iomap->length = clen; /* resident size in bytes. */
- iomap->flags = 0;
return 0;
}
@@ -818,42 +860,52 @@ static int ntfs_iomap_begin(struct inode *inode, loff_t offset, loff_t length,
return -EINVAL;
}
+ iomap->bdev = inode->i_sb->s_bdev;
+ iomap->offset = offset;
+ iomap->length = ((loff_t)clen << cluster_bits) - off;
+
if (lcn == COMPRESSED_LCN) {
/* should never be here. */
return -EOPNOTSUPP;
}
- iomap->flags = new_clst ? IOMAP_F_NEW : 0;
- iomap->bdev = inode->i_sb->s_bdev;
-
- /* Translate clusters into bytes. */
- iomap->offset = offset;
- iomap->addr = ((loff_t)lcn << cluster_bits) + off;
- iomap->length = ((loff_t)clen << cluster_bits) - off;
- if (length && iomap->length > length)
- iomap->length = length;
- else
- endbyte = offset + iomap->length;
-
- if (lcn == SPARSE_LCN) {
+ if (lcn == DELALLOC_LCN) {
+ iomap->type = IOMAP_DELALLOC;
iomap->addr = IOMAP_NULL_ADDR;
- iomap->type = IOMAP_HOLE;
- } else if (endbyte <= ni->i_valid) {
- iomap->type = IOMAP_MAPPED;
- } else if (offset < ni->i_valid) {
- iomap->type = IOMAP_MAPPED;
- if (flags & IOMAP_REPORT)
- iomap->length = ni->i_valid - offset;
- } else if (rw || (flags & IOMAP_ZERO)) {
- iomap->type = IOMAP_MAPPED;
} else {
- iomap->type = IOMAP_UNWRITTEN;
+
+ /* Translate clusters into bytes. */
+ iomap->addr = ((loff_t)lcn << cluster_bits) + off;
+ if (length && iomap->length > length)
+ iomap->length = length;
+ else
+ endbyte = offset + iomap->length;
+
+ if (lcn == SPARSE_LCN) {
+ iomap->addr = IOMAP_NULL_ADDR;
+ iomap->type = IOMAP_HOLE;
+ // if (IOMAP_ZERO == flags && !off) {
+ // iomap->length = (endbyte - offset) &
+ // sbi->cluster_mask_inv;
+ // }
+ } else if (endbyte <= ni->i_valid) {
+ iomap->type = IOMAP_MAPPED;
+ } else if (offset < ni->i_valid) {
+ iomap->type = IOMAP_MAPPED;
+ if (flags & IOMAP_REPORT)
+ iomap->length = ni->i_valid - offset;
+ } else if (rw || (flags & IOMAP_ZERO)) {
+ iomap->type = IOMAP_MAPPED;
+ } else {
+ iomap->type = IOMAP_UNWRITTEN;
+ }
}
- if ((flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED) {
+ if ((flags & IOMAP_ZERO) &&
+ (iomap->type == IOMAP_MAPPED || iomap->type == IOMAP_DELALLOC)) {
/* Avoid too large requests. */
u32 tail;
- u32 off_a = iomap->addr & (PAGE_SIZE - 1);
+ u32 off_a = offset & (PAGE_SIZE - 1);
if (off_a)
tail = PAGE_SIZE - off_a;
else
@@ -904,7 +956,9 @@ static int ntfs_iomap_end(struct inode *inode, loff_t pos, loff_t length,
}
}
- if ((flags & IOMAP_ZERO) && iomap->type == IOMAP_MAPPED) {
+ if ((flags & IOMAP_ZERO) &&
+ (iomap->type == IOMAP_MAPPED || iomap->type == IOMAP_DELALLOC)) {
+ /* Pair for code in ntfs_iomap_begin. */
balance_dirty_pages_ratelimited(inode->i_mapping);
cond_resched();
}
@@ -933,7 +987,7 @@ static void ntfs_iomap_put_folio(struct inode *inode, loff_t pos,
loff_t f_pos = folio_pos(folio);
loff_t f_end = f_pos + f_size;
- if (ni->i_valid < end && end < f_end) {
+ if (ni->i_valid <= end && end < f_end) {
/* zero range [end - f_end). */
/* The only thing ntfs_iomap_put_folio used for. */
folio_zero_segment(folio, offset_in_folio(folio, end), f_size);
@@ -942,23 +996,31 @@ static void ntfs_iomap_put_folio(struct inode *inode, loff_t pos,
folio_put(folio);
}
+/*
+ * iomap_writeback_ops::writeback_range
+ */
static ssize_t ntfs_writeback_range(struct iomap_writepage_ctx *wpc,
struct folio *folio, u64 offset,
unsigned int len, u64 end_pos)
{
struct iomap *iomap = &wpc->iomap;
- struct inode *inode = wpc->inode;
-
/* Check iomap position. */
- if (!(iomap->offset <= offset &&
- offset < iomap->offset + iomap->length)) {
+ if (iomap->offset + iomap->length <= offset || offset < iomap->offset) {
int err;
+ struct inode *inode = wpc->inode;
+ struct ntfs_inode *ni = ntfs_i(inode);
struct ntfs_sb_info *sbi = ntfs_sb(inode->i_sb);
loff_t i_size_up = ntfs_up_cluster(sbi, inode->i_size);
loff_t len_max = i_size_up - offset;
- err = ntfs_iomap_begin(inode, offset, len_max, IOMAP_WRITE,
- iomap, NULL);
+ err = ni->file.run_da.count ? ni_allocate_da_blocks(ni) : 0;
+
+ if (!err) {
+ /* Use local special value 'WB_NO_DA' to disable delalloc. */
+ err = ntfs_iomap_begin(inode, offset, len_max,
+ IOMAP_WRITE, iomap, WB_NO_DA);
+ }
+
if (err) {
ntfs_set_state(sbi, NTFS_DIRTY_DIRTY);
return err;
@@ -1532,9 +1594,10 @@ int ntfs_create_inode(struct mnt_idmap *idmap, struct inode *dir,
attr->nres.alloc_size =
cpu_to_le64(ntfs_up_cluster(sbi, nsize));
- err = attr_allocate_clusters(sbi, &ni->file.run, 0, 0,
- clst, NULL, ALLOCATE_DEF,
- &alen, 0, NULL, NULL);
+ err = attr_allocate_clusters(sbi, &ni->file.run, NULL,
+ 0, 0, clst, NULL,
+ ALLOCATE_DEF, &alen, 0,
+ NULL, NULL);
if (err)
goto out5;
@@ -1675,7 +1738,7 @@ out6:
/* Delete ATTR_EA, if non-resident. */
struct runs_tree run;
run_init(&run);
- attr_set_size(ni, ATTR_EA, NULL, 0, &run, 0, NULL, false, NULL);
+ attr_set_size(ni, ATTR_EA, NULL, 0, &run, 0, NULL, false);
run_close(&run);
}
diff --git a/fs/ntfs3/ntfs.h b/fs/ntfs3/ntfs.h
index ae0a6ba102c0..892f13e65d42 100644
--- a/fs/ntfs3/ntfs.h
+++ b/fs/ntfs3/ntfs.h
@@ -77,11 +77,14 @@ static_assert(sizeof(size_t) == 8);
typedef u32 CLST;
#endif
+/* On-disk sparsed cluster is marked as -1. */
#define SPARSE_LCN64 ((u64)-1)
#define SPARSE_LCN ((CLST)-1)
+/* Below is virtual (not on-disk) values. */
#define RESIDENT_LCN ((CLST)-2)
#define COMPRESSED_LCN ((CLST)-3)
#define EOF_LCN ((CLST)-4)
+#define DELALLOC_LCN ((CLST)-5)
enum RECORD_NUM {
MFT_REC_MFT = 0,
diff --git a/fs/ntfs3/ntfs_fs.h b/fs/ntfs3/ntfs_fs.h
index b7017dd4d7cd..a705923de75e 100644
--- a/fs/ntfs3/ntfs_fs.h
+++ b/fs/ntfs3/ntfs_fs.h
@@ -108,6 +108,7 @@ struct ntfs_mount_options {
unsigned force : 1; /* RW mount dirty volume. */
unsigned prealloc : 1; /* Preallocate space when file is growing. */
unsigned nocase : 1; /* case insensitive. */
+ unsigned delalloc : 1; /* delay allocation. */
};
/* Special value to unpack and deallocate. */
@@ -132,7 +133,8 @@ struct ntfs_buffers {
enum ALLOCATE_OPT {
ALLOCATE_DEF = 0, // Allocate all clusters.
ALLOCATE_MFT = 1, // Allocate for MFT.
- ALLOCATE_ZERO = 2, // Zeroout new allocated clusters
+ ALLOCATE_ZERO = 2, // Zeroout new allocated clusters.
+ ALLOCATE_ONE_FR = 4, // Allocate one fragment only.
};
enum bitmap_mutex_classes {
@@ -213,7 +215,7 @@ struct ntfs_sb_info {
u32 discard_granularity;
u64 discard_granularity_mask_inv; // ~(discard_granularity_mask_inv-1)
- u32 bdev_blocksize_mask; // bdev_logical_block_size(bdev) - 1;
+ u32 bdev_blocksize; // bdev_logical_block_size(bdev)
u32 cluster_size; // bytes per cluster
u32 cluster_mask; // == cluster_size - 1
@@ -272,6 +274,12 @@ struct ntfs_sb_info {
struct {
struct wnd_bitmap bitmap; // $Bitmap::Data
CLST next_free_lcn;
+ /* Total sum of delay allocated clusters in all files. */
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+ atomic64_t da;
+#else
+ atomic_t da;
+#endif
} used;
struct {
@@ -379,7 +387,7 @@ struct ntfs_inode {
*/
u8 mi_loaded;
- /*
+ /*
* Use this field to avoid any write(s).
* If inode is bad during initialization - use make_bad_inode
* If inode is bad during operations - use this field
@@ -390,7 +398,14 @@ struct ntfs_inode {
struct ntfs_index dir;
struct {
struct rw_semaphore run_lock;
+ /* Unpacked runs from just one record. */
struct runs_tree run;
+ /*
+ * Pairs [vcn, len] for all delay allocated clusters.
+ * Normal file always contains delayed clusters in one fragment.
+ * TODO: use 2 CLST per pair instead of 3.
+ */
+ struct runs_tree run_da;
#ifdef CONFIG_NTFS3_LZX_XPRESS
struct folio *offs_folio;
#endif
@@ -430,19 +445,32 @@ enum REPARSE_SIGN {
/* Functions from attrib.c */
int attr_allocate_clusters(struct ntfs_sb_info *sbi, struct runs_tree *run,
- CLST vcn, CLST lcn, CLST len, CLST *pre_alloc,
- enum ALLOCATE_OPT opt, CLST *alen, const size_t fr,
- CLST *new_lcn, CLST *new_len);
+ struct runs_tree *run_da, CLST vcn, CLST lcn,
+ CLST len, CLST *pre_alloc, enum ALLOCATE_OPT opt,
+ CLST *alen, const size_t fr, CLST *new_lcn,
+ CLST *new_len);
int attr_make_nonresident(struct ntfs_inode *ni, struct ATTRIB *attr,
struct ATTR_LIST_ENTRY *le, struct mft_inode *mi,
u64 new_size, struct runs_tree *run,
struct ATTRIB **ins_attr, struct page *page);
-int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type,
- const __le16 *name, u8 name_len, struct runs_tree *run,
- u64 new_size, const u64 *new_valid, bool keep_prealloc,
- struct ATTRIB **ret);
+int attr_set_size_ex(struct ntfs_inode *ni, enum ATTR_TYPE type,
+ const __le16 *name, u8 name_len, struct runs_tree *run,
+ u64 new_size, const u64 *new_valid, bool keep_prealloc,
+ struct ATTRIB **ret, bool no_da);
+static inline int attr_set_size(struct ntfs_inode *ni, enum ATTR_TYPE type,
+ const __le16 *name, u8 name_len,
+ struct runs_tree *run, u64 new_size,
+ const u64 *new_valid, bool keep_prealloc)
+{
+ return attr_set_size_ex(ni, type, name, name_len, run, new_size,
+ new_valid, keep_prealloc, NULL, false);
+}
int attr_data_get_block(struct ntfs_inode *ni, CLST vcn, CLST clen, CLST *lcn,
- CLST *len, bool *new, bool zero, void **res);
+ CLST *len, bool *new, bool zero, void **res,
+ bool no_da);
+int attr_data_get_block_locked(struct ntfs_inode *ni, CLST vcn, CLST clen,
+ CLST *lcn, CLST *len, bool *new, bool zero,
+ void **res, bool no_da);
int attr_data_write_resident(struct ntfs_inode *ni, struct folio *folio);
int attr_load_runs_vcn(struct ntfs_inode *ni, enum ATTR_TYPE type,
const __le16 *name, u8 name_len, struct runs_tree *run,
@@ -590,6 +618,8 @@ int ni_rename(struct ntfs_inode *dir_ni, struct ntfs_inode *new_dir_ni,
bool ni_is_dirty(struct inode *inode);
loff_t ni_seek_data_or_hole(struct ntfs_inode *ni, loff_t offset, bool data);
int ni_write_parents(struct ntfs_inode *ni, int sync);
+int ni_allocate_da_blocks(struct ntfs_inode *ni);
+int ni_allocate_da_blocks_locked(struct ntfs_inode *ni);
/* Globals from fslog.c */
bool check_index_header(const struct INDEX_HDR *hdr, size_t bytes);
@@ -605,7 +635,8 @@ int ntfs_loadlog_and_replay(struct ntfs_inode *ni, struct ntfs_sb_info *sbi);
int ntfs_look_for_free_space(struct ntfs_sb_info *sbi, CLST lcn, CLST len,
CLST *new_lcn, CLST *new_len,
enum ALLOCATE_OPT opt);
-bool ntfs_check_for_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen);
+bool ntfs_check_free_space(struct ntfs_sb_info *sbi, CLST clen, CLST mlen,
+ bool da);
int ntfs_look_free_mft(struct ntfs_sb_info *sbi, CLST *rno, bool mft,
struct ntfs_inode *ni, struct mft_inode **mi);
void ntfs_mark_rec_free(struct ntfs_sb_info *sbi, CLST rno, bool is_mft);
@@ -831,7 +862,8 @@ void run_truncate_around(struct runs_tree *run, CLST vcn);
bool run_add_entry(struct runs_tree *run, CLST vcn, CLST lcn, CLST len,
bool is_mft);
bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len, CLST sub);
-bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len);
+int run_insert_range(struct runs_tree *run, CLST vcn, CLST len);
+int run_insert_range_da(struct runs_tree *run, CLST vcn, CLST len);
bool run_get_entry(const struct runs_tree *run, size_t index, CLST *vcn,
CLST *lcn, CLST *len);
bool run_is_mapped_full(const struct runs_tree *run, CLST svcn, CLST evcn);
@@ -851,6 +883,9 @@ int run_unpack_ex(struct runs_tree *run, struct ntfs_sb_info *sbi, CLST ino,
#endif
int run_get_highest_vcn(CLST vcn, const u8 *run_buf, u64 *highest_vcn);
int run_clone(const struct runs_tree *run, struct runs_tree *new_run);
+bool run_remove_range(struct runs_tree *run, CLST vcn, CLST len, CLST *done);
+CLST run_len(const struct runs_tree *run);
+CLST run_get_max_vcn(const struct runs_tree *run);
/* Globals from super.c */
void *ntfs_set_shared(void *ptr, u32 bytes);
@@ -1027,6 +1062,36 @@ static inline int ntfs3_forced_shutdown(struct super_block *sb)
return test_bit(NTFS_FLAGS_SHUTDOWN_BIT, &ntfs_sb(sb)->flags);
}
+/* Returns total sum of delay allocated clusters in all files. */
+static inline CLST ntfs_get_da(struct ntfs_sb_info *sbi)
+{
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+ return atomic64_read(&sbi->used.da);
+#else
+ return atomic_read(&sbi->used.da);
+#endif
+}
+
+/* Update total count of delay allocated clusters. */
+static inline void ntfs_add_da(struct ntfs_sb_info *sbi, CLST da)
+{
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+ atomic64_add(da, &sbi->used.da);
+#else
+ atomic_add(da, &sbi->used.da);
+#endif
+}
+
+/* Update total count of delay allocated clusters. */
+static inline void ntfs_sub_da(struct ntfs_sb_info *sbi, CLST da)
+{
+#ifdef CONFIG_NTFS3_64BIT_CLUSTER
+ atomic64_sub(da, &sbi->used.da);
+#else
+ atomic_sub(da, &sbi->used.da);
+#endif
+}
+
/*
* ntfs_up_cluster - Align up on cluster boundary.
*/
diff --git a/fs/ntfs3/run.c b/fs/ntfs3/run.c
index dc59cad4fa37..c0324cdc174d 100644
--- a/fs/ntfs3/run.c
+++ b/fs/ntfs3/run.c
@@ -454,7 +454,7 @@ requires_new_range:
/*
* If existing range fits then were done.
- * Otherwise extend found one and fall back to range jocode.
+ * Otherwise extend found one and fall back to range join code.
*/
if (r->vcn + r->len < vcn + len)
r->len += len - ((r->vcn + r->len) - vcn);
@@ -482,7 +482,8 @@ requires_new_range:
return true;
}
-/* run_collapse_range
+/*
+ * run_collapse_range
*
* Helper for attr_collapse_range(),
* which is helper for fallocate(collapse_range).
@@ -493,8 +494,9 @@ bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len, CLST sub)
struct ntfs_run *r, *e, *eat_start, *eat_end;
CLST end;
- if (WARN_ON(!run_lookup(run, vcn, &index)))
- return true; /* Should never be here. */
+ if (!run_lookup(run, vcn, &index) && index >= run->count) {
+ return true;
+ }
e = run->runs + run->count;
r = run->runs + index;
@@ -560,13 +562,13 @@ bool run_collapse_range(struct runs_tree *run, CLST vcn, CLST len, CLST sub)
* Helper for attr_insert_range(),
* which is helper for fallocate(insert_range).
*/
-bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len)
+int run_insert_range(struct runs_tree *run, CLST vcn, CLST len)
{
size_t index;
struct ntfs_run *r, *e;
if (WARN_ON(!run_lookup(run, vcn, &index)))
- return false; /* Should never be here. */
+ return -EINVAL; /* Should never be here. */
e = run->runs + run->count;
r = run->runs + index;
@@ -588,13 +590,49 @@ bool run_insert_range(struct runs_tree *run, CLST vcn, CLST len)
r->len = len1;
if (!run_add_entry(run, vcn + len, lcn2, len2, false))
- return false;
+ return -ENOMEM;
}
if (!run_add_entry(run, vcn, SPARSE_LCN, len, false))
- return false;
+ return -ENOMEM;
- return true;
+ return 0;
+}
+
+/* run_insert_range_da
+ *
+ * Helper for attr_insert_range(),
+ * which is helper for fallocate(insert_range).
+ */
+int run_insert_range_da(struct runs_tree *run, CLST vcn, CLST len)
+{
+ struct ntfs_run *r, *r0 = NULL, *e = run->runs + run->count;
+ ;
+
+ for (r = run->runs; r < e; r++) {
+ CLST end = r->vcn + r->len;
+
+ if (vcn >= end)
+ continue;
+
+ if (!r0 && r->vcn < vcn) {
+ r0 = r;
+ } else {
+ r->vcn += len;
+ }
+ }
+
+ if (r0) {
+ /* split fragment. */
+ CLST len1 = vcn - r0->vcn;
+ CLST len2 = r0->len - len1;
+
+ r0->len = len1;
+ if (!run_add_entry(run, vcn + len, SPARSE_LCN, len2, false))
+ return -ENOMEM;
+ }
+
+ return 0;
}
/*
@@ -1209,3 +1247,97 @@ int run_clone(const struct runs_tree *run, struct runs_tree *new_run)
new_run->count = run->count;
return 0;
}
+
+/*
+ * run_remove_range
+ *
+ */
+bool run_remove_range(struct runs_tree *run, CLST vcn, CLST len, CLST *done)
+{
+ size_t index, eat;
+ struct ntfs_run *r, *e, *eat_start, *eat_end;
+ CLST end, d;
+
+ *done = 0;
+
+ /* Fast check. */
+ if (!run->count)
+ return true;
+
+ if (!run_lookup(run, vcn, &index) && index >= run->count) {
+ /* No entries in this run. */
+ return true;
+ }
+
+
+ e = run->runs + run->count;
+ r = run->runs + index;
+ end = vcn + len;
+
+ if (vcn > r->vcn) {
+ CLST r_end = r->vcn + r->len;
+ d = vcn - r->vcn;
+
+ if (r_end > end) {
+ /* Remove a middle part, split. */
+ *done += len;
+ r->len = d;
+ return run_add_entry(run, end, r->lcn, r_end - end,
+ false);
+ }
+ /* Remove tail of run .*/
+ *done += r->len - d;
+ r->len = d;
+ r += 1;
+ }
+
+ eat_start = r;
+ eat_end = r;
+
+ for (; r < e; r++) {
+ if (r->vcn >= end)
+ continue;
+
+ if (r->vcn + r->len <= end) {
+ /* Eat this run. */
+ *done += r->len;
+ eat_end = r + 1;
+ continue;
+ }
+
+ d = end - r->vcn;
+ *done += d;
+ if (r->lcn != SPARSE_LCN)
+ r->lcn += d;
+ r->len -= d;
+ r->vcn = end;
+ }
+
+ eat = eat_end - eat_start;
+ memmove(eat_start, eat_end, (e - eat_end) * sizeof(*r));
+ run->count -= eat;
+
+ return true;
+}
+
+CLST run_len(const struct runs_tree *run)
+{
+ const struct ntfs_run *r, *e;
+ CLST len = 0;
+
+ for (r = run->runs, e = r + run->count; r < e; r++) {
+ len += r->len;
+ }
+
+ return len;
+}
+
+CLST run_get_max_vcn(const struct runs_tree *run)
+{
+ const struct ntfs_run *r;
+ if (!run->count)
+ return 0;
+
+ r = run->runs + run->count - 1;
+ return r->vcn + r->len;
+}
diff --git a/fs/ntfs3/super.c b/fs/ntfs3/super.c
index a3c07f2b604f..27411203082a 100644
--- a/fs/ntfs3/super.c
+++ b/fs/ntfs3/super.c
@@ -269,6 +269,8 @@ enum Opt {
Opt_prealloc,
Opt_prealloc_bool,
Opt_nocase,
+ Opt_delalloc,
+ Opt_delalloc_bool,
Opt_err,
};
@@ -293,6 +295,8 @@ static const struct fs_parameter_spec ntfs_fs_parameters[] = {
fsparam_flag("prealloc", Opt_prealloc),
fsparam_bool("prealloc", Opt_prealloc_bool),
fsparam_flag("nocase", Opt_nocase),
+ fsparam_flag("delalloc", Opt_delalloc),
+ fsparam_bool("delalloc", Opt_delalloc_bool),
{}
};
// clang-format on
@@ -410,6 +414,12 @@ static int ntfs_fs_parse_param(struct fs_context *fc,
case Opt_nocase:
opts->nocase = 1;
break;
+ case Opt_delalloc:
+ opts->delalloc = 1;
+ break;
+ case Opt_delalloc_bool:
+ opts->delalloc = result.boolean;
+ break;
default:
/* Should not be here unless we forget add case. */
return -EINVAL;
@@ -726,14 +736,22 @@ static int ntfs_statfs(struct dentry *dentry, struct kstatfs *buf)
struct super_block *sb = dentry->d_sb;
struct ntfs_sb_info *sbi = sb->s_fs_info;
struct wnd_bitmap *wnd = &sbi->used.bitmap;
+ CLST da_clusters = ntfs_get_da(sbi);
buf->f_type = sb->s_magic;
- buf->f_bsize = sbi->cluster_size;
+ buf->f_bsize = buf->f_frsize = sbi->cluster_size;
buf->f_blocks = wnd->nbits;
- buf->f_bfree = buf->f_bavail = wnd_zeroes(wnd);
+ buf->f_bfree = wnd_zeroes(wnd);
+ if (buf->f_bfree > da_clusters) {
+ buf->f_bfree -= da_clusters;
+ } else {
+ buf->f_bfree = 0;
+ }
+ buf->f_bavail = buf->f_bfree;
+
buf->f_fsid.val[0] = sbi->volume.ser_num;
- buf->f_fsid.val[1] = (sbi->volume.ser_num >> 32);
+ buf->f_fsid.val[1] = sbi->volume.ser_num >> 32;
buf->f_namelen = NTFS_NAME_LEN;
return 0;
@@ -778,6 +796,8 @@ static int ntfs_show_options(struct seq_file *m, struct dentry *root)
seq_puts(m, ",prealloc");
if (opts->nocase)
seq_puts(m, ",nocase");
+ if (opts->delalloc)
+ seq_puts(m, ",delalloc");
return 0;
}
@@ -1088,7 +1108,7 @@ read_boot:
dev_size += sector_size - 1;
}
- sbi->bdev_blocksize_mask = max(boot_sector_size, sector_size) - 1;
+ sbi->bdev_blocksize = max(boot_sector_size, sector_size);
sbi->mft.lbo = mlcn << cluster_bits;
sbi->mft.lbo2 = mlcn2 << cluster_bits;
diff --git a/fs/ntfs3/xattr.c b/fs/ntfs3/xattr.c
index c93df55e98d0..2302539852ef 100644
--- a/fs/ntfs3/xattr.c
+++ b/fs/ntfs3/xattr.c
@@ -460,7 +460,7 @@ update_ea:
new_sz = size;
err = attr_set_size(ni, ATTR_EA, NULL, 0, &ea_run, new_sz, &new_sz,
- false, NULL);
+ false);
if (err)
goto out;