summaryrefslogtreecommitdiff
path: root/fs/ext4
diff options
context:
space:
mode:
Diffstat (limited to 'fs/ext4')
-rw-r--r--fs/ext4/balloc.c7
-rw-r--r--fs/ext4/crypto.c56
-rw-r--r--fs/ext4/dir.c13
-rw-r--r--fs/ext4/ext4.h1
-rw-r--r--fs/ext4/extents.c4
-rw-r--r--fs/ext4/file.c28
-rw-r--r--fs/ext4/ialloc.c6
-rw-r--r--fs/ext4/inode.c78
-rw-r--r--fs/ext4/ioctl.c7
-rw-r--r--fs/ext4/mballoc.c2
-rw-r--r--fs/ext4/move_extent.c15
-rw-r--r--fs/ext4/namei.c26
-rw-r--r--fs/ext4/resize.c2
13 files changed, 188 insertions, 57 deletions
diff --git a/fs/ext4/balloc.c b/fs/ext4/balloc.c
index ec0668a60678..fe1f50fe764f 100644
--- a/fs/ext4/balloc.c
+++ b/fs/ext4/balloc.c
@@ -191,7 +191,6 @@ static int ext4_init_block_bitmap(struct super_block *sb,
/* If checksum is bad mark all blocks used to prevent allocation
* essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
- ext4_error(sb, "Checksum bad for group %u", block_group);
grp = ext4_get_group_info(sb, block_group);
if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -442,14 +441,16 @@ ext4_read_block_bitmap_nowait(struct super_block *sb, ext4_group_t block_group)
}
ext4_lock_group(sb, block_group);
if (desc->bg_flags & cpu_to_le16(EXT4_BG_BLOCK_UNINIT)) {
-
err = ext4_init_block_bitmap(sb, bh, block_group, desc);
set_bitmap_uptodate(bh);
set_buffer_uptodate(bh);
ext4_unlock_group(sb, block_group);
unlock_buffer(bh);
- if (err)
+ if (err) {
+ ext4_error(sb, "Failed to init block bitmap for group "
+ "%u: %d", block_group, err);
goto out;
+ }
goto verify;
}
ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/crypto.c b/fs/ext4/crypto.c
index c8021208a7eb..38f7562489bb 100644
--- a/fs/ext4/crypto.c
+++ b/fs/ext4/crypto.c
@@ -467,3 +467,59 @@ uint32_t ext4_validate_encryption_key_size(uint32_t mode, uint32_t size)
return size;
return 0;
}
+
+/*
+ * Validate dentries for encrypted directories to make sure we aren't
+ * potentially caching stale data after a key has been added or
+ * removed.
+ */
+static int ext4_d_revalidate(struct dentry *dentry, unsigned int flags)
+{
+ struct inode *dir = d_inode(dentry->d_parent);
+ struct ext4_crypt_info *ci = EXT4_I(dir)->i_crypt_info;
+ int dir_has_key, cached_with_key;
+
+ if (!ext4_encrypted_inode(dir))
+ return 0;
+
+ if (ci && ci->ci_keyring_key &&
+ (ci->ci_keyring_key->flags & ((1 << KEY_FLAG_INVALIDATED) |
+ (1 << KEY_FLAG_REVOKED) |
+ (1 << KEY_FLAG_DEAD))))
+ ci = NULL;
+
+ /* this should eventually be an flag in d_flags */
+ cached_with_key = dentry->d_fsdata != NULL;
+ dir_has_key = (ci != NULL);
+
+ /*
+ * If the dentry was cached without the key, and it is a
+ * negative dentry, it might be a valid name. We can't check
+ * if the key has since been made available due to locking
+ * reasons, so we fail the validation so ext4_lookup() can do
+ * this check.
+ *
+ * We also fail the validation if the dentry was created with
+ * the key present, but we no longer have the key, or vice versa.
+ */
+ if ((!cached_with_key && d_is_negative(dentry)) ||
+ (!cached_with_key && dir_has_key) ||
+ (cached_with_key && !dir_has_key)) {
+#if 0 /* Revalidation debug */
+ char buf[80];
+ char *cp = simple_dname(dentry, buf, sizeof(buf));
+
+ if (IS_ERR(cp))
+ cp = (char *) "???";
+ pr_err("revalidate: %s %p %d %d %d\n", cp, dentry->d_fsdata,
+ cached_with_key, d_is_negative(dentry),
+ dir_has_key);
+#endif
+ return 0;
+ }
+ return 1;
+}
+
+const struct dentry_operations ext4_encrypted_d_ops = {
+ .d_revalidate = ext4_d_revalidate,
+};
diff --git a/fs/ext4/dir.c b/fs/ext4/dir.c
index 1d1bca74f844..33f5e2a50cf8 100644
--- a/fs/ext4/dir.c
+++ b/fs/ext4/dir.c
@@ -111,6 +111,12 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
int dir_has_error = 0;
struct ext4_str fname_crypto_str = {.name = NULL, .len = 0};
+ if (ext4_encrypted_inode(inode)) {
+ err = ext4_get_encryption_info(inode);
+ if (err && err != -ENOKEY)
+ return err;
+ }
+
if (is_dx_dir(inode)) {
err = ext4_dx_readdir(file, ctx);
if (err != ERR_BAD_DX_DIR) {
@@ -157,8 +163,11 @@ static int ext4_readdir(struct file *file, struct dir_context *ctx)
index, 1);
file->f_ra.prev_pos = (loff_t)index << PAGE_CACHE_SHIFT;
bh = ext4_bread(NULL, inode, map.m_lblk, 0);
- if (IS_ERR(bh))
- return PTR_ERR(bh);
+ if (IS_ERR(bh)) {
+ err = PTR_ERR(bh);
+ bh = NULL;
+ goto errout;
+ }
}
if (!bh) {
diff --git a/fs/ext4/ext4.h b/fs/ext4/ext4.h
index 0662b285dc8a..157b458a69d4 100644
--- a/fs/ext4/ext4.h
+++ b/fs/ext4/ext4.h
@@ -2302,6 +2302,7 @@ struct page *ext4_encrypt(struct inode *inode,
int ext4_decrypt(struct page *page);
int ext4_encrypted_zeroout(struct inode *inode, ext4_lblk_t lblk,
ext4_fsblk_t pblk, ext4_lblk_t len);
+extern const struct dentry_operations ext4_encrypted_d_ops;
#ifdef CONFIG_EXT4_FS_ENCRYPTION
int ext4_init_crypto(void);
diff --git a/fs/ext4/extents.c b/fs/ext4/extents.c
index 0ffabaf90aa5..3753ceb0b0dd 100644
--- a/fs/ext4/extents.c
+++ b/fs/ext4/extents.c
@@ -3928,7 +3928,7 @@ static int
convert_initialized_extent(handle_t *handle, struct inode *inode,
struct ext4_map_blocks *map,
struct ext4_ext_path **ppath, int flags,
- unsigned int allocated, ext4_fsblk_t newblock)
+ unsigned int allocated)
{
struct ext4_ext_path *path = *ppath;
struct ext4_extent *ex;
@@ -4347,7 +4347,7 @@ int ext4_ext_map_blocks(handle_t *handle, struct inode *inode,
(flags & EXT4_GET_BLOCKS_CONVERT_UNWRITTEN)) {
allocated = convert_initialized_extent(
handle, inode, map, &path,
- flags, allocated, newblock);
+ flags, allocated);
goto out2;
} else if (!ext4_ext_is_unwritten(ex))
goto out;
diff --git a/fs/ext4/file.c b/fs/ext4/file.c
index 1126436dada1..4cd318f31cbe 100644
--- a/fs/ext4/file.c
+++ b/fs/ext4/file.c
@@ -262,23 +262,8 @@ static int ext4_dax_pmd_fault(struct vm_area_struct *vma, unsigned long addr,
return result;
}
-static int ext4_dax_mkwrite(struct vm_area_struct *vma, struct vm_fault *vmf)
-{
- int err;
- struct inode *inode = file_inode(vma->vm_file);
-
- sb_start_pagefault(inode->i_sb);
- file_update_time(vma->vm_file);
- down_read(&EXT4_I(inode)->i_mmap_sem);
- err = __dax_mkwrite(vma, vmf, ext4_dax_mmap_get_block, NULL);
- up_read(&EXT4_I(inode)->i_mmap_sem);
- sb_end_pagefault(inode->i_sb);
-
- return err;
-}
-
/*
- * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_mkwrite()
+ * Handle write fault for VM_MIXEDMAP mappings. Similarly to ext4_dax_fault()
* handler we check for races agaist truncate. Note that since we cycle through
* i_mmap_sem, we are sure that also any hole punching that began before we
* were called is finished by now and so if it included part of the file we
@@ -311,7 +296,7 @@ static int ext4_dax_pfn_mkwrite(struct vm_area_struct *vma,
static const struct vm_operations_struct ext4_dax_vm_ops = {
.fault = ext4_dax_fault,
.pmd_fault = ext4_dax_pmd_fault,
- .page_mkwrite = ext4_dax_mkwrite,
+ .page_mkwrite = ext4_dax_fault,
.pfn_mkwrite = ext4_dax_pfn_mkwrite,
};
#else
@@ -350,6 +335,7 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
struct super_block *sb = inode->i_sb;
struct ext4_sb_info *sbi = EXT4_SB(inode->i_sb);
struct vfsmount *mnt = filp->f_path.mnt;
+ struct inode *dir = filp->f_path.dentry->d_parent->d_inode;
struct path path;
char buf[64], *cp;
int ret;
@@ -393,6 +379,14 @@ static int ext4_file_open(struct inode * inode, struct file * filp)
if (ext4_encryption_info(inode) == NULL)
return -ENOKEY;
}
+ if (ext4_encrypted_inode(dir) &&
+ !ext4_is_child_context_consistent_with_parent(dir, inode)) {
+ ext4_warning(inode->i_sb,
+ "Inconsistent encryption contexts: %lu/%lu\n",
+ (unsigned long) dir->i_ino,
+ (unsigned long) inode->i_ino);
+ return -EPERM;
+ }
/*
* Set up the jbd2_inode if we are opening the inode for
* writing and the journal is present
diff --git a/fs/ext4/ialloc.c b/fs/ext4/ialloc.c
index 3fcfd50a2e8a..acc0ad56bf2f 100644
--- a/fs/ext4/ialloc.c
+++ b/fs/ext4/ialloc.c
@@ -76,7 +76,6 @@ static int ext4_init_inode_bitmap(struct super_block *sb,
/* If checksum is bad mark all blocks and inodes use to prevent
* allocation, essentially implementing a per-group read-only flag. */
if (!ext4_group_desc_csum_verify(sb, block_group, gdp)) {
- ext4_error(sb, "Checksum bad for group %u", block_group);
grp = ext4_get_group_info(sb, block_group);
if (!EXT4_MB_GRP_BBITMAP_CORRUPT(grp))
percpu_counter_sub(&sbi->s_freeclusters_counter,
@@ -191,8 +190,11 @@ ext4_read_inode_bitmap(struct super_block *sb, ext4_group_t block_group)
set_buffer_verified(bh);
ext4_unlock_group(sb, block_group);
unlock_buffer(bh);
- if (err)
+ if (err) {
+ ext4_error(sb, "Failed to init inode bitmap for group "
+ "%u: %d", block_group, err);
goto out;
+ }
return bh;
}
ext4_unlock_group(sb, block_group);
diff --git a/fs/ext4/inode.c b/fs/ext4/inode.c
index 83bc8bfb3bea..aee960b1af34 100644
--- a/fs/ext4/inode.c
+++ b/fs/ext4/inode.c
@@ -686,6 +686,34 @@ out_sem:
return retval;
}
+/*
+ * Update EXT4_MAP_FLAGS in bh->b_state. For buffer heads attached to pages
+ * we have to be careful as someone else may be manipulating b_state as well.
+ */
+static void ext4_update_bh_state(struct buffer_head *bh, unsigned long flags)
+{
+ unsigned long old_state;
+ unsigned long new_state;
+
+ flags &= EXT4_MAP_FLAGS;
+
+ /* Dummy buffer_head? Set non-atomically. */
+ if (!bh->b_page) {
+ bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | flags;
+ return;
+ }
+ /*
+ * Someone else may be modifying b_state. Be careful! This is ugly but
+ * once we get rid of using bh as a container for mapping information
+ * to pass to / from get_block functions, this can go away.
+ */
+ do {
+ old_state = READ_ONCE(bh->b_state);
+ new_state = (old_state & ~EXT4_MAP_FLAGS) | flags;
+ } while (unlikely(
+ cmpxchg(&bh->b_state, old_state, new_state) != old_state));
+}
+
/* Maximum number of blocks we map for direct IO at once. */
#define DIO_MAX_BLOCKS 4096
@@ -722,7 +750,7 @@ static int _ext4_get_block(struct inode *inode, sector_t iblock,
ext4_io_end_t *io_end = ext4_inode_aio(inode);
map_bh(bh, inode->i_sb, map.m_pblk);
- bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+ ext4_update_bh_state(bh, map.m_flags);
if (io_end && io_end->flag & EXT4_IO_END_UNWRITTEN)
set_buffer_defer_completion(bh);
bh->b_size = inode->i_sb->s_blocksize * map.m_len;
@@ -1685,7 +1713,7 @@ int ext4_da_get_block_prep(struct inode *inode, sector_t iblock,
return ret;
map_bh(bh, inode->i_sb, map.m_pblk);
- bh->b_state = (bh->b_state & ~EXT4_MAP_FLAGS) | map.m_flags;
+ ext4_update_bh_state(bh, map.m_flags);
if (buffer_unwritten(bh)) {
/* A delayed write to unwritten bh should be marked
@@ -2450,6 +2478,10 @@ static int ext4_writepages(struct address_space *mapping,
trace_ext4_writepages(inode, wbc);
+ if (dax_mapping(mapping))
+ return dax_writeback_mapping_range(mapping, inode->i_sb->s_bdev,
+ wbc);
+
/*
* No pages to write? This is mainly a kludge to avoid starting
* a transaction for special inodes like journal inode on last iput()
@@ -3253,29 +3285,29 @@ static ssize_t ext4_ext_direct_IO(struct kiocb *iocb, struct iov_iter *iter,
* case, we allocate an io_end structure to hook to the iocb.
*/
iocb->private = NULL;
- ext4_inode_aio_set(inode, NULL);
- if (!is_sync_kiocb(iocb)) {
- io_end = ext4_init_io_end(inode, GFP_NOFS);
- if (!io_end) {
- ret = -ENOMEM;
- goto retake_lock;
- }
- /*
- * Grab reference for DIO. Will be dropped in ext4_end_io_dio()
- */
- iocb->private = ext4_get_io_end(io_end);
- /*
- * we save the io structure for current async direct
- * IO, so that later ext4_map_blocks() could flag the
- * io structure whether there is a unwritten extents
- * needs to be converted when IO is completed.
- */
- ext4_inode_aio_set(inode, io_end);
- }
-
if (overwrite) {
get_block_func = ext4_get_block_overwrite;
} else {
+ ext4_inode_aio_set(inode, NULL);
+ if (!is_sync_kiocb(iocb)) {
+ io_end = ext4_init_io_end(inode, GFP_NOFS);
+ if (!io_end) {
+ ret = -ENOMEM;
+ goto retake_lock;
+ }
+ /*
+ * Grab reference for DIO. Will be dropped in
+ * ext4_end_io_dio()
+ */
+ iocb->private = ext4_get_io_end(io_end);
+ /*
+ * we save the io structure for current async direct
+ * IO, so that later ext4_map_blocks() could flag the
+ * io structure whether there is a unwritten extents
+ * needs to be converted when IO is completed.
+ */
+ ext4_inode_aio_set(inode, io_end);
+ }
get_block_func = ext4_get_block_write;
dio_flags = DIO_LOCKING;
}
@@ -4127,7 +4159,7 @@ void ext4_set_inode_flags(struct inode *inode)
new_fl |= S_NOATIME;
if (flags & EXT4_DIRSYNC_FL)
new_fl |= S_DIRSYNC;
- if (test_opt(inode->i_sb, DAX))
+ if (test_opt(inode->i_sb, DAX) && S_ISREG(inode->i_mode))
new_fl |= S_DAX;
inode_set_flags(inode, new_fl,
S_SYNC|S_APPEND|S_IMMUTABLE|S_NOATIME|S_DIRSYNC|S_DAX);
diff --git a/fs/ext4/ioctl.c b/fs/ext4/ioctl.c
index 0f6c36922c24..eae5917c534e 100644
--- a/fs/ext4/ioctl.c
+++ b/fs/ext4/ioctl.c
@@ -208,7 +208,7 @@ static int ext4_ioctl_setflags(struct inode *inode,
{
struct ext4_inode_info *ei = EXT4_I(inode);
handle_t *handle = NULL;
- int err = EPERM, migrate = 0;
+ int err = -EPERM, migrate = 0;
struct ext4_iloc iloc;
unsigned int oldflags, mask, i;
unsigned int jflag;
@@ -583,6 +583,11 @@ group_extend_out:
"Online defrag not supported with bigalloc");
err = -EOPNOTSUPP;
goto mext_out;
+ } else if (IS_DAX(inode)) {
+ ext4_msg(sb, KERN_ERR,
+ "Online defrag not supported with DAX");
+ err = -EOPNOTSUPP;
+ goto mext_out;
}
err = mnt_want_write_file(filp);
diff --git a/fs/ext4/mballoc.c b/fs/ext4/mballoc.c
index 61eaf74dca37..4424b7bf8ac6 100644
--- a/fs/ext4/mballoc.c
+++ b/fs/ext4/mballoc.c
@@ -2285,7 +2285,7 @@ static int ext4_mb_seq_groups_show(struct seq_file *seq, void *v)
if (group == 0)
seq_puts(seq, "#group: free frags first ["
" 2^0 2^1 2^2 2^3 2^4 2^5 2^6 "
- " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]");
+ " 2^7 2^8 2^9 2^10 2^11 2^12 2^13 ]\n");
i = (sb->s_blocksize_bits + 2) * sizeof(sg.info.bb_counters[0]) +
sizeof(struct ext4_group_info);
diff --git a/fs/ext4/move_extent.c b/fs/ext4/move_extent.c
index fb6f11709ae6..e032a0423e35 100644
--- a/fs/ext4/move_extent.c
+++ b/fs/ext4/move_extent.c
@@ -265,11 +265,12 @@ move_extent_per_page(struct file *o_filp, struct inode *donor_inode,
ext4_lblk_t orig_blk_offset, donor_blk_offset;
unsigned long blocksize = orig_inode->i_sb->s_blocksize;
unsigned int tmp_data_size, data_size, replaced_size;
- int err2, jblocks, retries = 0;
+ int i, err2, jblocks, retries = 0;
int replaced_count = 0;
int from = data_offset_in_page << orig_inode->i_blkbits;
int blocks_per_page = PAGE_CACHE_SIZE >> orig_inode->i_blkbits;
struct super_block *sb = orig_inode->i_sb;
+ struct buffer_head *bh = NULL;
/*
* It needs twice the amount of ordinary journal buffers because
@@ -380,8 +381,16 @@ data_copy:
}
/* Perform all necessary steps similar write_begin()/write_end()
* but keeping in mind that i_size will not change */
- *err = __block_write_begin(pagep[0], from, replaced_size,
- ext4_get_block);
+ if (!page_has_buffers(pagep[0]))
+ create_empty_buffers(pagep[0], 1 << orig_inode->i_blkbits, 0);
+ bh = page_buffers(pagep[0]);
+ for (i = 0; i < data_offset_in_page; i++)
+ bh = bh->b_this_page;
+ for (i = 0; i < block_len_in_page; i++) {
+ *err = ext4_get_block(orig_inode, orig_blk_offset + i, bh, 0);
+ if (*err < 0)
+ break;
+ }
if (!*err)
*err = block_commit_write(pagep[0], from, from + replaced_size);
diff --git a/fs/ext4/namei.c b/fs/ext4/namei.c
index 06574dd77614..48e4b8907826 100644
--- a/fs/ext4/namei.c
+++ b/fs/ext4/namei.c
@@ -1558,6 +1558,24 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
struct ext4_dir_entry_2 *de;
struct buffer_head *bh;
+ if (ext4_encrypted_inode(dir)) {
+ int res = ext4_get_encryption_info(dir);
+
+ /*
+ * This should be a properly defined flag for
+ * dentry->d_flags when we uplift this to the VFS.
+ * d_fsdata is set to (void *) 1 if if the dentry is
+ * created while the directory was encrypted and we
+ * don't have access to the key.
+ */
+ dentry->d_fsdata = NULL;
+ if (ext4_encryption_info(dir))
+ dentry->d_fsdata = (void *) 1;
+ d_set_d_op(dentry, &ext4_encrypted_d_ops);
+ if (res && res != -ENOKEY)
+ return ERR_PTR(res);
+ }
+
if (dentry->d_name.len > EXT4_NAME_LEN)
return ERR_PTR(-ENAMETOOLONG);
@@ -1585,11 +1603,15 @@ static struct dentry *ext4_lookup(struct inode *dir, struct dentry *dentry, unsi
return ERR_PTR(-EFSCORRUPTED);
}
if (!IS_ERR(inode) && ext4_encrypted_inode(dir) &&
- (S_ISREG(inode->i_mode) || S_ISDIR(inode->i_mode) ||
- S_ISLNK(inode->i_mode)) &&
+ (S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)) &&
!ext4_is_child_context_consistent_with_parent(dir,
inode)) {
+ int nokey = ext4_encrypted_inode(inode) &&
+ !ext4_encryption_info(inode);
+
iput(inode);
+ if (nokey)
+ return ERR_PTR(-ENOKEY);
ext4_warning(inode->i_sb,
"Inconsistent encryption contexts: %lu/%lu\n",
(unsigned long) dir->i_ino,
diff --git a/fs/ext4/resize.c b/fs/ext4/resize.c
index ad62d7acc315..34038e3598d5 100644
--- a/fs/ext4/resize.c
+++ b/fs/ext4/resize.c
@@ -198,7 +198,7 @@ static struct ext4_new_flex_group_data *alloc_flex_gd(unsigned long flexbg_size)
if (flex_gd == NULL)
goto out3;
- if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_flex_group_data))
+ if (flexbg_size >= UINT_MAX / sizeof(struct ext4_new_group_data))
goto out2;
flex_gd->count = flexbg_size;