diff options
| author | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2021-05-25 11:24:42 -0700 |
|---|---|---|
| committer | Dmitry Torokhov <dmitry.torokhov@gmail.com> | 2021-05-25 11:24:42 -0700 |
| commit | cbb245239282870bc6f54d5137dfe0f84b48ea72 (patch) | |
| tree | dcdd8041d7bb5cddacfe086785dd370aa1a7a6ea /fs | |
| parent | 94a332bd8626f6c32e7ba5835dd17eeddbb76e89 (diff) | |
| parent | 9f4ad9e425a1d3b6a34617b8ea226d56a119a717 (diff) | |
Merge tag 'v5.12' into next
Sync up with mainline to get the latest device tree bindings and kernel
APIs.
Diffstat (limited to 'fs')
40 files changed, 384 insertions, 191 deletions
diff --git a/fs/afs/write.c b/fs/afs/write.c index c9195fc67fd8..eb737ed63afb 100644 --- a/fs/afs/write.c +++ b/fs/afs/write.c @@ -851,8 +851,7 @@ vm_fault_t afs_page_mkwrite(struct vm_fault *vmf) fscache_wait_on_page_write(vnode->cache, vmf->page); #endif - if (PageWriteback(vmf->page) && - wait_on_page_bit_killable(vmf->page, PG_writeback) < 0) + if (wait_on_page_writeback_killable(vmf->page)) return VM_FAULT_RETRY; if (lock_page_killable(vmf->page) < 0) diff --git a/fs/block_dev.c b/fs/block_dev.c index 92ed7d5df677..09d6f7229db9 100644 --- a/fs/block_dev.c +++ b/fs/block_dev.c @@ -275,6 +275,8 @@ __blkdev_direct_IO_simple(struct kiocb *iocb, struct iov_iter *iter, bio.bi_opf = dio_bio_write_op(iocb); task_io_account_write(ret); } + if (iocb->ki_flags & IOCB_NOWAIT) + bio.bi_opf |= REQ_NOWAIT; if (iocb->ki_flags & IOCB_HIPRI) bio_set_polled(&bio, iocb); @@ -428,6 +430,8 @@ static ssize_t __blkdev_direct_IO(struct kiocb *iocb, struct iov_iter *iter, bio->bi_opf = dio_bio_write_op(iocb); task_io_account_write(bio->bi_iter.bi_size); } + if (iocb->ki_flags & IOCB_NOWAIT) + bio->bi_opf |= REQ_NOWAIT; dio->size += bio->bi_iter.bi_size; pos += bio->bi_iter.bi_size; @@ -1240,13 +1244,13 @@ int bdev_disk_changed(struct block_device *bdev, bool invalidate) lockdep_assert_held(&bdev->bd_mutex); - clear_bit(GD_NEED_PART_SCAN, &bdev->bd_disk->state); - rescan: ret = blk_drop_partitions(bdev); if (ret) return ret; + clear_bit(GD_NEED_PART_SCAN, &disk->state); + /* * Historically we only set the capacity to zero for devices that * support partitions (independ of actually having partitions created). diff --git a/fs/btrfs/Makefile b/fs/btrfs/Makefile index b634c42115ea..b4fb997eda16 100644 --- a/fs/btrfs/Makefile +++ b/fs/btrfs/Makefile @@ -7,10 +7,12 @@ subdir-ccflags-y += -Wmissing-format-attribute subdir-ccflags-y += -Wmissing-prototypes subdir-ccflags-y += -Wold-style-definition subdir-ccflags-y += -Wmissing-include-dirs -subdir-ccflags-y += $(call cc-option, -Wunused-but-set-variable) -subdir-ccflags-y += $(call cc-option, -Wunused-const-variable) -subdir-ccflags-y += $(call cc-option, -Wpacked-not-aligned) -subdir-ccflags-y += $(call cc-option, -Wstringop-truncation) +condflags := \ + $(call cc-option, -Wunused-but-set-variable) \ + $(call cc-option, -Wunused-const-variable) \ + $(call cc-option, -Wpacked-not-aligned) \ + $(call cc-option, -Wstringop-truncation) +subdir-ccflags-y += $(condflags) # The following turn off the warnings enabled by -Wextra subdir-ccflags-y += -Wno-missing-field-initializers subdir-ccflags-y += -Wno-sign-compare diff --git a/fs/btrfs/dev-replace.c b/fs/btrfs/dev-replace.c index 3a9c1e046ebe..d05f73530af7 100644 --- a/fs/btrfs/dev-replace.c +++ b/fs/btrfs/dev-replace.c @@ -81,6 +81,9 @@ int btrfs_init_dev_replace(struct btrfs_fs_info *fs_info) struct btrfs_dev_replace_item *ptr; u64 src_devid; + if (!dev_root) + return 0; + path = btrfs_alloc_path(); if (!path) { ret = -ENOMEM; diff --git a/fs/btrfs/disk-io.c b/fs/btrfs/disk-io.c index 41b718cfea40..289f1f09481d 100644 --- a/fs/btrfs/disk-io.c +++ b/fs/btrfs/disk-io.c @@ -2387,8 +2387,9 @@ static int btrfs_read_roots(struct btrfs_fs_info *fs_info) } else { set_bit(BTRFS_ROOT_TRACK_DIRTY, &root->state); fs_info->dev_root = root; - btrfs_init_devices_late(fs_info); } + /* Initialize fs_info for all devices in any case */ + btrfs_init_devices_late(fs_info); /* If IGNOREDATACSUMS is set don't bother reading the csum root. */ if (!btrfs_test_opt(fs_info, IGNOREDATACSUMS)) { @@ -3009,6 +3010,21 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) } } + /* + * btrfs_find_orphan_roots() is responsible for finding all the dead + * roots (with 0 refs), flag them with BTRFS_ROOT_DEAD_TREE and load + * them into the fs_info->fs_roots_radix tree. This must be done before + * calling btrfs_orphan_cleanup() on the tree root. If we don't do it + * first, then btrfs_orphan_cleanup() will delete a dead root's orphan + * item before the root's tree is deleted - this means that if we unmount + * or crash before the deletion completes, on the next mount we will not + * delete what remains of the tree because the orphan item does not + * exists anymore, which is what tells us we have a pending deletion. + */ + ret = btrfs_find_orphan_roots(fs_info); + if (ret) + goto out; + ret = btrfs_cleanup_fs_roots(fs_info); if (ret) goto out; @@ -3068,7 +3084,6 @@ int btrfs_start_pre_rw_mount(struct btrfs_fs_info *fs_info) } } - ret = btrfs_find_orphan_roots(fs_info); out: return ret; } diff --git a/fs/btrfs/inode.c b/fs/btrfs/inode.c index 7cdf65be3707..a520775949a0 100644 --- a/fs/btrfs/inode.c +++ b/fs/btrfs/inode.c @@ -3099,11 +3099,13 @@ void btrfs_writepage_endio_finish_ordered(struct page *page, u64 start, * @bio_offset: offset to the beginning of the bio (in bytes) * @page: page where is the data to be verified * @pgoff: offset inside the page + * @start: logical offset in the file * * The length of such check is always one sector size. */ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, - u32 bio_offset, struct page *page, u32 pgoff) + u32 bio_offset, struct page *page, u32 pgoff, + u64 start) { struct btrfs_fs_info *fs_info = btrfs_sb(inode->i_sb); SHASH_DESC_ON_STACK(shash, fs_info->csum_shash); @@ -3130,8 +3132,8 @@ static int check_data_csum(struct inode *inode, struct btrfs_io_bio *io_bio, kunmap_atomic(kaddr); return 0; zeroit: - btrfs_print_data_csum_error(BTRFS_I(inode), page_offset(page) + pgoff, - csum, csum_expected, io_bio->mirror_num); + btrfs_print_data_csum_error(BTRFS_I(inode), start, csum, csum_expected, + io_bio->mirror_num); if (io_bio->device) btrfs_dev_stat_inc_and_print(io_bio->device, BTRFS_DEV_STAT_CORRUPTION_ERRS); @@ -3184,7 +3186,8 @@ int btrfs_verify_data_csum(struct btrfs_io_bio *io_bio, u32 bio_offset, pg_off += sectorsize, bio_offset += sectorsize) { int ret; - ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off); + ret = check_data_csum(inode, io_bio, bio_offset, page, pg_off, + page_offset(page) + pg_off); if (ret < 0) return -EIO; } @@ -7910,7 +7913,8 @@ static blk_status_t btrfs_check_read_dio_bio(struct inode *inode, ASSERT(pgoff < PAGE_SIZE); if (uptodate && (!csum || !check_data_csum(inode, io_bio, - bio_offset, bvec.bv_page, pgoff))) { + bio_offset, bvec.bv_page, + pgoff, start))) { clean_io_failure(fs_info, failure_tree, io_tree, start, bvec.bv_page, btrfs_ino(BTRFS_I(inode)), @@ -8169,10 +8173,6 @@ static blk_qc_t btrfs_submit_direct(struct inode *inode, struct iomap *iomap, bio->bi_end_io = btrfs_end_dio_bio; btrfs_io_bio(bio)->logical = file_offset; - WARN_ON_ONCE(write && btrfs_is_zoned(fs_info) && - fs_info->max_zone_append_size && - bio_op(bio) != REQ_OP_ZONE_APPEND); - if (bio_op(bio) == REQ_OP_ZONE_APPEND) { status = extract_ordered_extent(BTRFS_I(inode), bio, file_offset); diff --git a/fs/btrfs/qgroup.c b/fs/btrfs/qgroup.c index 14ff388fd3bd..f0b9ef13153a 100644 --- a/fs/btrfs/qgroup.c +++ b/fs/btrfs/qgroup.c @@ -226,7 +226,6 @@ static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, { struct btrfs_qgroup_list *list; - btrfs_sysfs_del_one_qgroup(fs_info, qgroup); list_del(&qgroup->dirty); while (!list_empty(&qgroup->groups)) { list = list_first_entry(&qgroup->groups, @@ -243,7 +242,6 @@ static void __del_qgroup_rb(struct btrfs_fs_info *fs_info, list_del(&list->next_member); kfree(list); } - kfree(qgroup); } /* must be called with qgroup_lock held */ @@ -569,6 +567,8 @@ void btrfs_free_qgroup_config(struct btrfs_fs_info *fs_info) qgroup = rb_entry(n, struct btrfs_qgroup, node); rb_erase(n, &fs_info->qgroup_tree); __del_qgroup_rb(fs_info, qgroup); + btrfs_sysfs_del_one_qgroup(fs_info, qgroup); + kfree(qgroup); } /* * We call btrfs_free_qgroup_config() when unmounting @@ -1578,6 +1578,14 @@ int btrfs_remove_qgroup(struct btrfs_trans_handle *trans, u64 qgroupid) spin_lock(&fs_info->qgroup_lock); del_qgroup_rb(fs_info, qgroupid); spin_unlock(&fs_info->qgroup_lock); + + /* + * Remove the qgroup from sysfs now without holding the qgroup_lock + * spinlock, since the sysfs_remove_group() function needs to take + * the mutex kernfs_mutex through kernfs_remove_by_name_ns(). + */ + btrfs_sysfs_del_one_qgroup(fs_info, qgroup); + kfree(qgroup); out: mutex_unlock(&fs_info->qgroup_ioctl_lock); return ret; diff --git a/fs/btrfs/volumes.c b/fs/btrfs/volumes.c index bc3b33efddc5..1c6810bbaf8b 100644 --- a/fs/btrfs/volumes.c +++ b/fs/btrfs/volumes.c @@ -7448,6 +7448,9 @@ static int btrfs_device_init_dev_stats(struct btrfs_device *device, int item_size; int i, ret, slot; + if (!device->fs_info->dev_root) + return 0; + key.objectid = BTRFS_DEV_STATS_OBJECTID; key.type = BTRFS_PERSISTENT_ITEM_KEY; key.offset = device->devid; diff --git a/fs/btrfs/zoned.c b/fs/btrfs/zoned.c index 1f972b75a9ab..eeb3ebe11d7a 100644 --- a/fs/btrfs/zoned.c +++ b/fs/btrfs/zoned.c @@ -21,9 +21,30 @@ /* Pseudo write pointer value for conventional zone */ #define WP_CONVENTIONAL ((u64)-2) +/* + * Location of the first zone of superblock logging zone pairs. + * + * - primary superblock: 0B (zone 0) + * - first copy: 512G (zone starting at that offset) + * - second copy: 4T (zone starting at that offset) + */ +#define BTRFS_SB_LOG_PRIMARY_OFFSET (0ULL) +#define BTRFS_SB_LOG_FIRST_OFFSET (512ULL * SZ_1G) +#define BTRFS_SB_LOG_SECOND_OFFSET (4096ULL * SZ_1G) + +#define BTRFS_SB_LOG_FIRST_SHIFT const_ilog2(BTRFS_SB_LOG_FIRST_OFFSET) +#define BTRFS_SB_LOG_SECOND_SHIFT const_ilog2(BTRFS_SB_LOG_SECOND_OFFSET) + /* Number of superblock log zones */ #define BTRFS_NR_SB_LOG_ZONES 2 +/* + * Maximum supported zone size. Currently, SMR disks have a zone size of + * 256MiB, and we are expecting ZNS drives to be in the 1-4GiB range. We do not + * expect the zone size to become larger than 8GiB in the near future. + */ +#define BTRFS_MAX_ZONE_SIZE SZ_8G + static int copy_zone_info_cb(struct blk_zone *zone, unsigned int idx, void *data) { struct blk_zone *zones = data; @@ -111,23 +132,22 @@ static int sb_write_pointer(struct block_device *bdev, struct blk_zone *zones, } /* - * The following zones are reserved as the circular buffer on ZONED btrfs. - * - The primary superblock: zones 0 and 1 - * - The first copy: zones 16 and 17 - * - The second copy: zones 1024 or zone at 256GB which is minimum, and - * the following one + * Get the first zone number of the superblock mirror */ static inline u32 sb_zone_number(int shift, int mirror) { - ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX); + u64 zone; + ASSERT(mirror < BTRFS_SUPER_MIRROR_MAX); switch (mirror) { - case 0: return 0; - case 1: return 16; - case 2: return min_t(u64, btrfs_sb_offset(mirror) >> shift, 1024); + case 0: zone = 0; break; + case 1: zone = 1ULL << (BTRFS_SB_LOG_FIRST_SHIFT - shift); break; + case 2: zone = 1ULL << (BTRFS_SB_LOG_SECOND_SHIFT - shift); break; } - return 0; + ASSERT(zone <= U32_MAX); + + return (u32)zone; } /* @@ -300,10 +320,21 @@ int btrfs_get_dev_zone_info(struct btrfs_device *device) zone_sectors = bdev_zone_sectors(bdev); } - nr_sectors = bdev_nr_sectors(bdev); /* Check if it's power of 2 (see is_power_of_2) */ ASSERT(zone_sectors != 0 && (zone_sectors & (zone_sectors - 1)) == 0); zone_info->zone_size = zone_sectors << SECTOR_SHIFT; + + /* We reject devices with a zone size larger than 8GB */ + if (zone_info->zone_size > BTRFS_MAX_ZONE_SIZE) { + btrfs_err_in_rcu(fs_info, + "zoned: %s: zone size %llu larger than supported maximum %llu", + rcu_str_deref(device->name), + zone_info->zone_size, BTRFS_MAX_ZONE_SIZE); + ret = -EINVAL; + goto out; + } + + nr_sectors = bdev_nr_sectors(bdev); zone_info->zone_size_shift = ilog2(zone_info->zone_size); zone_info->max_zone_append_size = (u64)queue_max_zone_append_sectors(queue) << SECTOR_SHIFT; diff --git a/fs/cachefiles/bind.c b/fs/cachefiles/bind.c index dfb14dbddf51..38bb7764b454 100644 --- a/fs/cachefiles/bind.c +++ b/fs/cachefiles/bind.c @@ -118,6 +118,12 @@ static int cachefiles_daemon_add_cache(struct cachefiles_cache *cache) cache->mnt = path.mnt; root = path.dentry; + ret = -EINVAL; + if (mnt_user_ns(path.mnt) != &init_user_ns) { + pr_warn("File cache on idmapped mounts not supported"); + goto error_unsupported; + } + /* check parameters */ ret = -EOPNOTSUPP; if (d_is_negative(root) || diff --git a/fs/cachefiles/rdwr.c b/fs/cachefiles/rdwr.c index e027c718ca01..8ffc40e84a59 100644 --- a/fs/cachefiles/rdwr.c +++ b/fs/cachefiles/rdwr.c @@ -24,17 +24,16 @@ static int cachefiles_read_waiter(wait_queue_entry_t *wait, unsigned mode, container_of(wait, struct cachefiles_one_read, monitor); struct cachefiles_object *object; struct fscache_retrieval *op = monitor->op; - struct wait_bit_key *key = _key; + struct wait_page_key *key = _key; struct page *page = wait->private; ASSERT(key); _enter("{%lu},%u,%d,{%p,%u}", monitor->netfs_page->index, mode, sync, - key->flags, key->bit_nr); + key->page, key->bit_nr); - if (key->flags != &page->flags || - key->bit_nr != PG_locked) + if (key->page != page || key->bit_nr != PG_locked) return 0; _debug("--- monitor %p %lx ---", page, page->flags); diff --git a/fs/cifs/Kconfig b/fs/cifs/Kconfig index fe03cbdae959..bf52e9326ebe 100644 --- a/fs/cifs/Kconfig +++ b/fs/cifs/Kconfig @@ -18,6 +18,7 @@ config CIFS select CRYPTO_AES select CRYPTO_LIB_DES select KEYS + select DNS_RESOLVER help This is the client VFS module for the SMB3 family of NAS protocols, (including support for the most recent, most secure dialect SMB3.1.1) @@ -112,7 +113,6 @@ config CIFS_WEAK_PW_HASH config CIFS_UPCALL bool "Kerberos/SPNEGO advanced session setup" depends on CIFS - select DNS_RESOLVER help Enables an upcall mechanism for CIFS which accesses userspace helper utilities to provide SPNEGO packaged (RFC 4178) Kerberos tickets @@ -179,7 +179,6 @@ config CIFS_DEBUG_DUMP_KEYS config CIFS_DFS_UPCALL bool "DFS feature support" depends on CIFS - select DNS_RESOLVER help Distributed File System (DFS) support is used to access shares transparently in an enterprise name space, even if the share diff --git a/fs/cifs/Makefile b/fs/cifs/Makefile index 5213b20843b5..3ee3b7de4ded 100644 --- a/fs/cifs/Makefile +++ b/fs/cifs/Makefile @@ -10,13 +10,14 @@ cifs-y := trace.o cifsfs.o cifssmb.o cifs_debug.o connect.o dir.o file.o \ cifs_unicode.o nterr.o cifsencrypt.o \ readdir.o ioctl.o sess.o export.o smb1ops.o unc.o winucase.o \ smb2ops.o smb2maperror.o smb2transport.o \ - smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o + smb2misc.o smb2pdu.o smb2inode.o smb2file.o cifsacl.o fs_context.o \ + dns_resolve.o cifs-$(CONFIG_CIFS_XATTR) += xattr.o cifs-$(CONFIG_CIFS_UPCALL) += cifs_spnego.o -cifs-$(CONFIG_CIFS_DFS_UPCALL) += dns_resolve.o cifs_dfs_ref.o dfs_cache.o +cifs-$(CONFIG_CIFS_DFS_UPCALL) += cifs_dfs_ref.o dfs_cache.o cifs-$(CONFIG_CIFS_SWN_UPCALL) += netlink.o cifs_swn.o diff --git a/fs/cifs/cifsacl.c b/fs/cifs/cifsacl.c index 2be22a5c690f..d178cf85e926 100644 --- a/fs/cifs/cifsacl.c +++ b/fs/cifs/cifsacl.c @@ -1130,8 +1130,7 @@ static int set_chmod_dacl(struct cifs_acl *pdacl, struct cifs_acl *pndacl, } /* If it's any one of the ACE we're replacing, skip! */ - if (!mode_from_sid && - ((compare_sids(&pntace->sid, &sid_unix_NFS_mode) == 0) || + if (((compare_sids(&pntace->sid, &sid_unix_NFS_mode) == 0) || (compare_sids(&pntace->sid, pownersid) == 0) || (compare_sids(&pntace->sid, pgrpsid) == 0) || (compare_sids(&pntace->sid, &sid_everyone) == 0) || diff --git a/fs/cifs/cifsfs.c b/fs/cifs/cifsfs.c index 099ad9f3660b..5ddd20b62484 100644 --- a/fs/cifs/cifsfs.c +++ b/fs/cifs/cifsfs.c @@ -476,7 +476,8 @@ static int cifs_show_devname(struct seq_file *m, struct dentry *root) seq_puts(m, "none"); else { convert_delimiter(devname, '/'); - seq_puts(m, devname); + /* escape all spaces in share names */ + seq_escape(m, devname, " \t"); kfree(devname); } return 0; diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h index 31fc8695abd6..ec824ab8c5ca 100644 --- a/fs/cifs/cifsglob.h +++ b/fs/cifs/cifsglob.h @@ -919,8 +919,8 @@ struct cifs_ses { bool binding:1; /* are we binding the session? */ __u16 session_flags; __u8 smb3signingkey[SMB3_SIGN_KEY_SIZE]; - __u8 smb3encryptionkey[SMB3_SIGN_KEY_SIZE]; - __u8 smb3decryptionkey[SMB3_SIGN_KEY_SIZE]; + __u8 smb3encryptionkey[SMB3_ENC_DEC_KEY_SIZE]; + __u8 smb3decryptionkey[SMB3_ENC_DEC_KEY_SIZE]; __u8 preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; __u8 binding_preauth_sha_hash[SMB2_PREAUTH_HASH_SIZE]; @@ -1283,8 +1283,6 @@ struct cifs_aio_ctx { bool direct_io; }; -struct cifs_readdata; - /* asynchronous read support */ struct cifs_readdata { struct kref refcount; diff --git a/fs/cifs/cifspdu.h b/fs/cifs/cifspdu.h index 64fe5a47b5e8..9adc74bd9f8f 100644 --- a/fs/cifs/cifspdu.h +++ b/fs/cifs/cifspdu.h @@ -147,6 +147,11 @@ */ #define SMB3_SIGN_KEY_SIZE (16) +/* + * Size of the smb3 encryption/decryption keys + */ +#define SMB3_ENC_DEC_KEY_SIZE (32) + #define CIFS_CLIENT_CHALLENGE_SIZE (8) #define CIFS_SERVER_CHALLENGE_SIZE (8) #define CIFS_HMAC_MD5_HASH_SIZE (16) diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c index eec8a2052da2..24668eb006c6 100644 --- a/fs/cifs/connect.c +++ b/fs/cifs/connect.c @@ -87,7 +87,6 @@ static void cifs_prune_tlinks(struct work_struct *work); * * This should be called with server->srv_mutex held. */ -#ifdef CONFIG_CIFS_DFS_UPCALL static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) { int rc; @@ -124,6 +123,7 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server) return !rc ? -1 : 0; } +#ifdef CONFIG_CIFS_DFS_UPCALL /* These functions must be called with server->srv_mutex held */ static void reconn_set_next_dfs_target(struct TCP_Server_Info *server, struct cifs_sb_info *cifs_sb, @@ -321,14 +321,29 @@ cifs_reconnect(struct TCP_Server_Info *server) #endif #ifdef CONFIG_CIFS_DFS_UPCALL + if (cifs_sb && cifs_sb->origin_fullpath) /* * Set up next DFS target server (if any) for reconnect. If DFS * feature is disabled, then we will retry last server we * connected to before. */ reconn_set_next_dfs_target(server, cifs_sb, &tgt_list, &tgt_it); + else { +#endif + /* + * Resolve the hostname again to make sure that IP address is up-to-date. + */ + rc = reconn_set_ipaddr_from_hostname(server); + if (rc) { + cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n", + __func__, rc); + } + +#ifdef CONFIG_CIFS_DFS_UPCALL + } #endif + #ifdef CONFIG_CIFS_SWN_UPCALL } #endif diff --git a/fs/cifs/file.c b/fs/cifs/file.c index 26de4329d161..042e24aad410 100644 --- a/fs/cifs/file.c +++ b/fs/cifs/file.c @@ -165,6 +165,7 @@ int cifs_posix_open(char *full_path, struct inode **pinode, goto posix_open_ret; } } else { + cifs_revalidate_mapping(*pinode); cifs_fattr_to_inode(*pinode, &fattr); } diff --git a/fs/cifs/smb2glob.h b/fs/cifs/smb2glob.h index 99a1951a01ec..d9a990c99121 100644 --- a/fs/cifs/smb2glob.h +++ b/fs/cifs/smb2glob.h @@ -58,6 +58,7 @@ #define SMB2_HMACSHA256_SIZE (32) #define SMB2_CMACAES_SIZE (16) #define SMB3_SIGNKEY_SIZE (16) +#define SMB3_GCM128_CRYPTKEY_SIZE (16) #define SMB3_GCM256_CRYPTKEY_SIZE (32) /* Maximum buffer size value we can send with 1 credit */ diff --git a/fs/cifs/smb2misc.c b/fs/cifs/smb2misc.c index b50164e2c88d..aac384f69f74 100644 --- a/fs/cifs/smb2misc.c +++ b/fs/cifs/smb2misc.c @@ -754,8 +754,8 @@ smb2_is_valid_oplock_break(char *buffer, struct TCP_Server_Info *server) } } spin_unlock(&cifs_tcp_ses_lock); - cifs_dbg(FYI, "Can not process oplock break for non-existent connection\n"); - return false; + cifs_dbg(FYI, "No file id matched, oplock break ignored\n"); + return true; } void diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c index 9bae7e8deb09..f703204fb185 100644 --- a/fs/cifs/smb2ops.c +++ b/fs/cifs/smb2ops.c @@ -2038,6 +2038,7 @@ smb2_duplicate_extents(const unsigned int xid, { int rc; unsigned int ret_data_len; + struct inode *inode; struct duplicate_extents_to_file dup_ext_buf; struct cifs_tcon *tcon = tlink_tcon(trgtfile->tlink); @@ -2054,10 +2055,21 @@ smb2_duplicate_extents(const unsigned int xid, cifs_dbg(FYI, "Duplicate extents: src off %lld dst off %lld len %lld\n", src_off, dest_off, len); - rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false); - if (rc) - goto duplicate_extents_out; + inode = d_inode(trgtfile->dentry); + if (inode->i_size < dest_off + len) { + rc = smb2_set_file_size(xid, tcon, trgtfile, dest_off + len, false); + if (rc) + goto duplicate_extents_out; + /* + * Although also could set plausible allocation size (i_blocks) + * here in addition to setting the file size, in reflink + * it is likely that the target file is sparse. Its allocation + * size will be queried on next revalidate, but it is important + * to make sure that file's cached size is updated immediately + */ + cifs_setsize(inode, dest_off + len); + } rc = SMB2_ioctl(xid, tcon, trgtfile->fid.persistent_fid, trgtfile->fid.volatile_fid, FSCTL_DUPLICATE_EXTENTS_TO_FILE, @@ -4158,7 +4170,7 @@ smb2_get_enc_key(struct TCP_Server_Info *server, __u64 ses_id, int enc, u8 *key) if (ses->Suid == ses_id) { ses_enc_key = enc ? ses->smb3encryptionkey : ses->smb3decryptionkey; - memcpy(key, ses_enc_key, SMB3_SIGN_KEY_SIZE); + memcpy(key, ses_enc_key, SMB3_ENC_DEC_KEY_SIZE); spin_unlock(&cifs_tcp_ses_lock); return 0; } @@ -4185,7 +4197,7 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, int rc = 0; struct scatterlist *sg; u8 sign[SMB2_SIGNATURE_SIZE] = {}; - u8 key[SMB3_SIGN_KEY_SIZE]; + u8 key[SMB3_ENC_DEC_KEY_SIZE]; struct aead_request *req; char *iv; unsigned int iv_len; @@ -4209,10 +4221,11 @@ crypt_message(struct TCP_Server_Info *server, int num_rqst, tfm = enc ? server->secmech.ccmaesencrypt : server->secmech.ccmaesdecrypt; - if (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM) + if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || + (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) rc = crypto_aead_setkey(tfm, key, SMB3_GCM256_CRYPTKEY_SIZE); else - rc = crypto_aead_setkey(tfm, key, SMB3_SIGN_KEY_SIZE); + rc = crypto_aead_setkey(tfm, key, SMB3_GCM128_CRYPTKEY_SIZE); if (rc) { cifs_server_dbg(VFS, "%s: Failed to set aead key %d\n", __func__, rc); diff --git a/fs/cifs/smb2transport.c b/fs/cifs/smb2transport.c index ebccd71cc60a..e6fa76ab70be 100644 --- a/fs/cifs/smb2transport.c +++ b/fs/cifs/smb2transport.c @@ -298,7 +298,8 @@ static int generate_key(struct cifs_ses *ses, struct kvec label, { unsigned char zero = 0x0; __u8 i[4] = {0, 0, 0, 1}; - __u8 L[4] = {0, 0, 0, 128}; + __u8 L128[4] = {0, 0, 0, 128}; + __u8 L256[4] = {0, 0, 1, 0}; int rc = 0; unsigned char prfhash[SMB2_HMACSHA256_SIZE]; unsigned char *hashptr = prfhash; @@ -354,8 +355,14 @@ static int generate_key(struct cifs_ses *ses, struct kvec label, goto smb3signkey_ret; } - rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, - L, 4); + if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || + (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) { + rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + L256, 4); + } else { + rc = crypto_shash_update(&server->secmech.sdeschmacsha256->shash, + L128, 4); + } if (rc) { cifs_server_dbg(VFS, "%s: Could not update with L\n", __func__); goto smb3signkey_ret; @@ -390,6 +397,9 @@ generate_smb3signingkey(struct cifs_ses *ses, const struct derivation_triplet *ptriplet) { int rc; +#ifdef CONFIG_CIFS_DEBUG_DUMP_KEYS + struct TCP_Server_Info *server = ses->server; +#endif /* * All channels use the same encryption/decryption keys but @@ -422,11 +432,11 @@ generate_smb3signingkey(struct cifs_ses *ses, rc = generate_key(ses, ptriplet->encryption.label, ptriplet->encryption.context, ses->smb3encryptionkey, - SMB3_SIGN_KEY_SIZE); + SMB3_ENC_DEC_KEY_SIZE); rc = generate_key(ses, ptriplet->decryption.label, ptriplet->decryption.context, ses->smb3decryptionkey, - SMB3_SIGN_KEY_SIZE); + SMB3_ENC_DEC_KEY_SIZE); if (rc) return rc; } @@ -442,14 +452,23 @@ generate_smb3signingkey(struct cifs_ses *ses, */ cifs_dbg(VFS, "Session Id %*ph\n", (int)sizeof(ses->Suid), &ses->Suid); + cifs_dbg(VFS, "Cipher type %d\n", server->cipher_type); cifs_dbg(VFS, "Session Key %*ph\n", SMB2_NTLMV2_SESSKEY_SIZE, ses->auth_key.response); cifs_dbg(VFS, "Signing Key %*ph\n", SMB3_SIGN_KEY_SIZE, ses->smb3signingkey); - cifs_dbg(VFS, "ServerIn Key %*ph\n", - SMB3_SIGN_KEY_SIZE, ses->smb3encryptionkey); - cifs_dbg(VFS, "ServerOut Key %*ph\n", - SMB3_SIGN_KEY_SIZE, ses->smb3decryptionkey); + if ((server->cipher_type == SMB2_ENCRYPTION_AES256_CCM) || + (server->cipher_type == SMB2_ENCRYPTION_AES256_GCM)) { + cifs_dbg(VFS, "ServerIn Key %*ph\n", + SMB3_GCM256_CRYPTKEY_SIZE, ses->smb3encryptionkey); + cifs_dbg(VFS, "ServerOut Key %*ph\n", + SMB3_GCM256_CRYPTKEY_SIZE, ses->smb3decryptionkey); + } else { + cifs_dbg(VFS, "ServerIn Key %*ph\n", + SMB3_GCM128_CRYPTKEY_SIZE, ses->smb3encryptionkey); + cifs_dbg(VFS, "ServerOut Key %*ph\n", + SMB3_GCM128_CRYPTKEY_SIZE, ses->smb3decryptionkey); + } #endif return rc; } diff --git a/fs/coda/file.c b/fs/coda/file.c index 128d63df5bfb..ef5ca22bfb3e 100644 --- a/fs/coda/file.c +++ b/fs/coda/file.c @@ -175,10 +175,10 @@ coda_file_mmap(struct file *coda_file, struct vm_area_struct *vma) ret = call_mmap(vma->vm_file, vma); if (ret) { - /* if call_mmap fails, our caller will put coda_file so we - * should drop the reference to the host_file that we got. + /* if call_mmap fails, our caller will put host_file so we + * should drop the reference to the coda_file that we got. */ - fput(host_file); + fput(coda_file); kfree(cvm_ops); } else { /* here we add redirects for the open/close vm_operations */ diff --git a/fs/direct-io.c b/fs/direct-io.c index b61491bf3166..b2e86e739d7a 100644 --- a/fs/direct-io.c +++ b/fs/direct-io.c @@ -812,6 +812,7 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, struct buffer_head *map_bh) { int ret = 0; + int boundary = sdio->boundary; /* dio_send_cur_page may clear it */ if (dio->op == REQ_OP_WRITE) { /* @@ -850,10 +851,10 @@ submit_page_section(struct dio *dio, struct dio_submit *sdio, struct page *page, sdio->cur_page_fs_offset = sdio->block_in_file << sdio->blkbits; out: /* - * If sdio->boundary then we want to schedule the IO now to + * If boundary then we want to schedule the IO now to * avoid metadata seeks. */ - if (sdio->boundary) { + if (boundary) { ret = dio_send_cur_page(dio, sdio, map_bh); if (sdio->bio) dio_bio_submit(dio, sdio); diff --git a/fs/file.c b/fs/file.c index f3a4bac2cbe9..f633348029a5 100644 --- a/fs/file.c +++ b/fs/file.c @@ -629,17 +629,30 @@ int close_fd(unsigned fd) } EXPORT_SYMBOL(close_fd); /* for ksys_close() */ +/** + * last_fd - return last valid index into fd table + * @cur_fds: files struct + * + * Context: Either rcu read lock or files_lock must be held. + * + * Returns: Last valid index into fdtable. + */ +static inline unsigned last_fd(struct fdtable *fdt) +{ + return fdt->max_fds - 1; +} + static inline void __range_cloexec(struct files_struct *cur_fds, unsigned int fd, unsigned int max_fd) { struct fdtable *fdt; - if (fd > max_fd) - return; - + /* make sure we're using the correct maximum value */ spin_lock(&cur_fds->file_lock); fdt = files_fdtable(cur_fds); - bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1); + max_fd = min(last_fd(fdt), max_fd); + if (fd <= max_fd) + bitmap_set(fdt->close_on_exec, fd, max_fd - fd + 1); spin_unlock(&cur_fds->file_lock); } diff --git a/fs/gfs2/super.c b/fs/gfs2/super.c index 97076d3f562f..8fb9602d79b4 100644 --- a/fs/gfs2/super.c +++ b/fs/gfs2/super.c @@ -162,8 +162,10 @@ int gfs2_make_fs_rw(struct gfs2_sbd *sdp) int error; error = init_threads(sdp); - if (error) + if (error) { + gfs2_withdraw_delayed(sdp); return error; + } j_gl->gl_ops->go_inval(j_gl, DIO_METADATA); if (gfs2_withdrawn(sdp)) { @@ -750,11 +752,13 @@ void gfs2_freeze_func(struct work_struct *work) static int gfs2_freeze(struct super_block *sb) { struct gfs2_sbd *sdp = sb->s_fs_info; - int error = 0; + int error; mutex_lock(&sdp->sd_freeze_mutex); - if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) + if (atomic_read(&sdp->sd_freeze_state) != SFS_UNFROZEN) { + error = -EBUSY; goto out; + } for (;;) { if (gfs2_withdrawn(sdp)) { @@ -795,10 +799,10 @@ static int gfs2_unfreeze(struct super_block *sb) struct gfs2_sbd *sdp = sb->s_fs_info; mutex_lock(&sdp->sd_freeze_mutex); - if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN || + if (atomic_read(&sdp->sd_freeze_state) != SFS_FROZEN || !gfs2_holder_initialized(&sdp->sd_freeze_gh)) { mutex_unlock(&sdp->sd_freeze_mutex); - return 0; + return -EINVAL; } gfs2_freeze_unlock(&sdp->sd_freeze_gh); diff --git a/fs/hostfs/hostfs_kern.c b/fs/hostfs/hostfs_kern.c index 29e407762626..743a005a5c64 100644 --- a/fs/hostfs/hostfs_kern.c +++ b/fs/hostfs/hostfs_kern.c @@ -144,7 +144,7 @@ static char *follow_link(char *link) char *name, *resolved, *end; int n; - name = __getname(); + name = kmalloc(PATH_MAX, GFP_KERNEL); if (!name) { n = -ENOMEM; goto out_free; @@ -173,12 +173,11 @@ static char *follow_link(char *link) goto out_free; } - __putname(name); - kfree(link); + kfree(name); return resolved; out_free: - __putname(name); + kfree(name); return ERR_PTR(n); } diff --git a/fs/io-wq.c b/fs/io-wq.c index 3dc10bfd8c3b..4eba531bea5a 100644 --- a/fs/io-wq.c +++ b/fs/io-wq.c @@ -16,7 +16,6 @@ #include <linux/rculist_nulls.h> #include <linux/cpu.h> #include <linux/tracehook.h> -#include <linux/freezer.h> #include "../kernel/sched/sched.h" #include "io-wq.h" @@ -388,11 +387,9 @@ static struct io_wq_work *io_get_next_work(struct io_wqe *wqe) static bool io_flush_signals(void) { - if (unlikely(test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL))) { + if (unlikely(test_thread_flag(TIF_NOTIFY_SIGNAL))) { __set_current_state(TASK_RUNNING); - if (current->task_works) - task_work_run(); - clear_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL); + tracehook_notify_signal(); return true; } return false; @@ -418,6 +415,7 @@ static void io_worker_handle_work(struct io_worker *worker) { struct io_wqe *wqe = worker->wqe; struct io_wq *wq = wqe->wq; + bool do_kill = test_bit(IO_WQ_BIT_EXIT, &wq->state); do { struct io_wq_work *work; @@ -447,6 +445,9 @@ get_next: unsigned int hash = io_get_work_hash(work); next_hashed = wq_next_work(work); + + if (unlikely(do_kill) && (work->flags & IO_WQ_WORK_UNBOUND)) + work->flags |= IO_WQ_WORK_CANCEL; wq->do_work(work); io_assign_current_work(worker, NULL); @@ -487,7 +488,7 @@ static int io_wqe_worker(void *data) worker->flags |= (IO_WORKER_F_UP | IO_WORKER_F_RUNNING); io_wqe_inc_running(worker); - sprintf(buf, "iou-wrk-%d", wq->task_pid); + snprintf(buf, sizeof(buf), "iou-wrk-%d", wq->task_pid); set_task_comm(current, buf); while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)) { @@ -505,10 +506,15 @@ loop: if (io_flush_signals()) continue; ret = schedule_timeout(WORKER_IDLE_TIMEOUT); - if (try_to_freeze() || ret) - continue; - if (fatal_signal_pending(current)) + if (signal_pending(current)) { + struct ksignal ksig; + + if (!get_signal(&ksig)) + continue; break; + } + if (ret) + continue; /* timed out, exit unless we're the fixed worker */ if (test_bit(IO_WQ_BIT_EXIT, &wq->state) || !(worker->flags & IO_WORKER_F_FIXED)) @@ -709,16 +715,20 @@ static int io_wq_manager(void *data) char buf[TASK_COMM_LEN]; int node; - sprintf(buf, "iou-mgr-%d", wq->task_pid); + snprintf(buf, sizeof(buf), "iou-mgr-%d", wq->task_pid); set_task_comm(current, buf); do { set_current_state(TASK_INTERRUPTIBLE); io_wq_check_workers(wq); schedule_timeout(HZ); - try_to_freeze(); - if (fatal_signal_pending(current)) + if (signal_pending(current)) { + struct ksignal ksig; + + if (!get_signal(&ksig)) + continue; set_bit(IO_WQ_BIT_EXIT, &wq->state); + } } while (!test_bit(IO_WQ_BIT_EXIT, &wq->state)); io_wq_check_workers(wq); @@ -1065,7 +1075,11 @@ static void io_wq_destroy(struct io_wq *wq) for_each_node(node) { struct io_wqe *wqe = wq->wqes[node]; - WARN_ON_ONCE(!wq_list_empty(&wqe->work_list)); + struct io_cb_cancel_data match = { + .fn = io_wq_work_match_all, + .cancel_all = true, + }; + io_wqe_cancel_pending_work(wqe, &match); kfree(wqe); } io_wq_put_hash(wq->hash); diff --git a/fs/io_uring.c b/fs/io_uring.c index 543551d70327..dff34975d86b 100644 --- a/fs/io_uring.c +++ b/fs/io_uring.c @@ -78,7 +78,6 @@ #include <linux/task_work.h> #include <linux/pagemap.h> #include <linux/io_uring.h> -#include <linux/freezer.h> #define CREATE_TRACE_POINTS #include <trace/events/io_uring.h> @@ -698,6 +697,7 @@ enum { REQ_F_NO_FILE_TABLE_BIT, REQ_F_LTIMEOUT_ACTIVE_BIT, REQ_F_COMPLETE_INLINE_BIT, + REQ_F_REISSUE_BIT, /* not a real bit, just to check we're not overflowing the space */ __REQ_F_LAST_BIT, @@ -741,6 +741,8 @@ enum { REQ_F_LTIMEOUT_ACTIVE = BIT(REQ_F_LTIMEOUT_ACTIVE_BIT), /* completion is deferred through io_comp_state */ REQ_F_COMPLETE_INLINE = BIT(REQ_F_COMPLETE_INLINE_BIT), + /* caller should reissue async */ + REQ_F_REISSUE = BIT(REQ_F_REISSUE_BIT), }; struct async_poll { @@ -1095,8 +1097,6 @@ static bool io_match_task(struct io_kiocb *head, io_for_each_link(req, head) { if (req->flags & REQ_F_INFLIGHT) return true; - if (req->task->files == files) - return true; } return false; } @@ -1216,7 +1216,7 @@ static void io_prep_async_work(struct io_kiocb *req) if (req->flags & REQ_F_ISREG) { if (def->hash_reg_file || (ctx->flags & IORING_SETUP_IOPOLL)) io_wq_hash_work(&req->work, file_inode(req->file)); - } else { + } else if (!req->file || !S_ISBLK(file_inode(req->file)->i_mode)) { if (def->unbound_nonreg_file) req->work.flags |= IO_WQ_WORK_UNBOUND; } @@ -1239,16 +1239,16 @@ static void io_queue_async_work(struct io_kiocb *req) BUG_ON(!tctx); BUG_ON(!tctx->io_wq); - trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, - &req->work, req->flags); /* init ->work of the whole link before punting */ io_prep_async_link(req); + trace_io_uring_queue_async_work(ctx, io_wq_is_hashed(&req->work), req, + &req->work, req->flags); io_wq_enqueue(tctx->io_wq, &req->work); if (link) io_queue_linked_timeout(link); } -static void io_kill_timeout(struct io_kiocb *req) +static void io_kill_timeout(struct io_kiocb *req, int status) { struct io_timeout_data *io = req->async_data; int ret; @@ -1258,31 +1258,11 @@ static void io_kill_timeout(struct io_kiocb *req) atomic_set(&req->ctx->cq_timeouts, atomic_read(&req->ctx->cq_timeouts) + 1); list_del_init(&req->timeout.list); - io_cqring_fill_event(req, 0); + io_cqring_fill_event(req, status); io_put_req_deferred(req, 1); } } -/* - * Returns true if we found and killed one or more timeouts - */ -static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, - struct files_struct *files) -{ - struct io_kiocb *req, *tmp; - int canceled = 0; - - spin_lock_irq(&ctx->completion_lock); - list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { - if (io_match_task(req, tsk, files)) { - io_kill_timeout(req); - canceled++; - } - } - spin_unlock_irq(&ctx->completion_lock); - return canceled != 0; -} - static void __io_queue_deferred(struct io_ring_ctx *ctx) { do { @@ -1327,7 +1307,7 @@ static void io_flush_timeouts(struct io_ring_ctx *ctx) break; list_del_init(&req->timeout.list); - io_kill_timeout(req); + io_kill_timeout(req, 0); } while (!list_empty(&ctx->timeout_list)); ctx->cq_last_tm_flush = seq; @@ -2499,6 +2479,11 @@ static bool io_rw_should_reissue(struct io_kiocb *req) return false; return true; } +#else +static bool io_rw_should_reissue(struct io_kiocb *req) +{ + return false; +} #endif static bool io_rw_reissue(struct io_kiocb *req) @@ -2524,13 +2509,14 @@ static void __io_complete_rw(struct io_kiocb *req, long res, long res2, { int cflags = 0; - if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_reissue(req)) + if (req->rw.kiocb.ki_flags & IOCB_WRITE) + kiocb_end_write(req); + if ((res == -EAGAIN || res == -EOPNOTSUPP) && io_rw_should_reissue(req)) { + req->flags |= REQ_F_REISSUE; return; + } if (res != req->result) req_set_fail_links(req); - - if (req->rw.kiocb.ki_flags & IOCB_WRITE) - kiocb_end_write(req); if (req->flags & REQ_F_BUFFER_SELECTED) cflags = io_put_rw_kbuf(req); __io_req_complete(req, issue_flags, res, cflags); @@ -2776,6 +2762,7 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, { struct io_kiocb *req = container_of(kiocb, struct io_kiocb, rw.kiocb); struct io_async_rw *io = req->async_data; + bool check_reissue = kiocb->ki_complete == io_complete_rw; /* add previously done IO, if any */ if (io && io->bytes_done > 0) { @@ -2791,6 +2778,18 @@ static void kiocb_done(struct kiocb *kiocb, ssize_t ret, __io_complete_rw(req, ret, 0, issue_flags); else io_rw_done(kiocb, ret); + + if (check_reissue && req->flags & REQ_F_REISSUE) { + req->flags &= ~REQ_F_REISSUE; + if (!io_rw_reissue(req)) { + int cflags = 0; + + req_set_fail_links(req); + if (req->flags & REQ_F_BUFFER_SELECTED) + cflags = io_put_rw_kbuf(req); + __io_req_complete(req, issue_flags, ret, cflags); + } + } } static int io_import_fixed(struct io_kiocb *req, int rw, struct iov_iter *iter) @@ -3307,11 +3306,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) ret = io_iter_do_read(req, iter); - if (ret == -EIOCBQUEUED) { - if (req->async_data) - iov_iter_revert(iter, io_size - iov_iter_count(iter)); - goto out_free; - } else if (ret == -EAGAIN) { + if (ret == -EAGAIN || (req->flags & REQ_F_REISSUE)) { + req->flags &= ~REQ_F_REISSUE; /* IOPOLL retry should happen for io-wq threads */ if (!force_nonblock && !(req->ctx->flags & IORING_SETUP_IOPOLL)) goto done; @@ -3321,6 +3317,8 @@ static int io_read(struct io_kiocb *req, unsigned int issue_flags) /* some cases will consume bytes even on error returns */ iov_iter_revert(iter, io_size - iov_iter_count(iter)); ret = 0; + } else if (ret == -EIOCBQUEUED) { + goto out_free; } else if (ret <= 0 || ret == io_size || !force_nonblock || (req->flags & REQ_F_NOWAIT) || !(req->flags & REQ_F_ISREG)) { /* read all, failed, already did sync or don't want to retry */ @@ -3433,6 +3431,11 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) else ret2 = -EINVAL; + if (req->flags & REQ_F_REISSUE) { + req->flags &= ~REQ_F_REISSUE; + ret2 = -EAGAIN; + } + /* * Raw bdev writes will return -EOPNOTSUPP for IOCB_NOWAIT. Just * retry them without IOCB_NOWAIT. @@ -3442,8 +3445,6 @@ static int io_write(struct io_kiocb *req, unsigned int issue_flags) /* no retry on NONBLOCK nor RWF_NOWAIT */ if (ret2 == -EAGAIN && (req->flags & REQ_F_NOWAIT)) goto done; - if (ret2 == -EIOCBQUEUED && req->async_data) - iov_iter_revert(iter, io_size - iov_iter_count(iter)); if (!force_nonblock || ret2 != -EAGAIN) { /* IOPOLL retry should happen for io-wq threads */ if ((req->ctx->flags & IORING_SETUP_IOPOLL) && ret2 == -EAGAIN) @@ -3978,6 +3979,7 @@ static int io_remove_buffers(struct io_kiocb *req, unsigned int issue_flags) static int io_provide_buffers_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe) { + unsigned long size; struct io_provide_buf *p = &req->pbuf; u64 tmp; @@ -3991,7 +3993,8 @@ static int io_provide_buffers_prep(struct io_kiocb *req, p->addr = READ_ONCE(sqe->addr); p->len = READ_ONCE(sqe->len); - if (!access_ok(u64_to_user_ptr(p->addr), (p->len * p->nbufs))) + size = (unsigned long)p->len * p->nbufs; + if (!access_ok(u64_to_user_ptr(p->addr), size)) return -EFAULT; p->bgid = READ_ONCE(sqe->buf_group); @@ -4820,7 +4823,6 @@ static int io_connect(struct io_kiocb *req, unsigned int issue_flags) ret = -ENOMEM; goto out; } - io = req->async_data; memcpy(req->async_data, &__io, sizeof(__io)); return -EAGAIN; } @@ -5583,7 +5585,8 @@ static int io_timeout_prep(struct io_kiocb *req, const struct io_uring_sqe *sqe, data->mode = io_translate_timeout_mode(flags); hrtimer_init(&data->timer, CLOCK_MONOTONIC, data->mode); - io_req_track_inflight(req); + if (is_timeout_link) + io_req_track_inflight(req); return 0; } @@ -6479,8 +6482,6 @@ static int io_submit_sqe(struct io_ring_ctx *ctx, struct io_kiocb *req, ret = io_init_req(ctx, req, sqe); if (unlikely(ret)) { fail_req: - io_put_req(req); - io_req_complete(req, ret); if (link->head) { /* fail even hard links since we don't submit */ link->head->flags |= REQ_F_FAIL_LINK; @@ -6488,6 +6489,8 @@ fail_req: io_req_complete(link->head, -ECANCELED); link->head = NULL; } + io_put_req(req); + io_req_complete(req, ret); return ret; } ret = io_req_prep(req, sqe); @@ -6740,7 +6743,7 @@ static int io_sq_thread(void *data) char buf[TASK_COMM_LEN]; DEFINE_WAIT(wait); - sprintf(buf, "iou-sqp-%d", sqd->task_pid); + snprintf(buf, sizeof(buf), "iou-sqp-%d", sqd->task_pid); set_task_comm(current, buf); current->pf_io_worker = NULL; @@ -6751,21 +6754,32 @@ static int io_sq_thread(void *data) current->flags |= PF_NO_SETAFFINITY; mutex_lock(&sqd->lock); + /* a user may had exited before the thread started */ + io_run_task_work_head(&sqd->park_task_work); + while (!test_bit(IO_SQ_THREAD_SHOULD_STOP, &sqd->state)) { int ret; bool cap_entries, sqt_spin, needs_sched; - if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state)) { + if (test_bit(IO_SQ_THREAD_SHOULD_PARK, &sqd->state) || + signal_pending(current)) { + bool did_sig = false; + mutex_unlock(&sqd->lock); + if (signal_pending(current)) { + struct ksignal ksig; + + did_sig = get_signal(&ksig); + } cond_resched(); mutex_lock(&sqd->lock); io_run_task_work(); io_run_task_work_head(&sqd->park_task_work); + if (did_sig) + break; timeout = jiffies + sqd->sq_thread_idle; continue; } - if (fatal_signal_pending(current)) - break; sqt_spin = false; cap_entries = !list_is_singular(&sqd->ctx_list); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) { @@ -6808,7 +6822,6 @@ static int io_sq_thread(void *data) mutex_unlock(&sqd->lock); schedule(); - try_to_freeze(); mutex_lock(&sqd->lock); list_for_each_entry(ctx, &sqd->ctx_list, sqd_list) io_ring_clear_wakeup_flag(ctx); @@ -6873,7 +6886,7 @@ static int io_run_task_work_sig(void) return 1; if (!signal_pending(current)) return 0; - if (test_tsk_thread_flag(current, TIF_NOTIFY_SIGNAL)) + if (test_thread_flag(TIF_NOTIFY_SIGNAL)) return -ERESTARTSYS; return -EINTR; } @@ -8563,6 +8576,14 @@ static void io_ring_exit_work(struct work_struct *work) struct io_tctx_node *node; int ret; + /* prevent SQPOLL from submitting new requests */ + if (ctx->sq_data) { + io_sq_thread_park(ctx->sq_data); + list_del_init(&ctx->sqd_list); + io_sqd_update_thread_idle(ctx->sq_data); + io_sq_thread_unpark(ctx->sq_data); + } + /* * If we're doing polled IO and end up having requests being * submitted async (out-of-line), then completions can come in while @@ -8599,6 +8620,28 @@ static void io_ring_exit_work(struct work_struct *work) io_ring_ctx_free(ctx); } +/* Returns true if we found and killed one or more timeouts */ +static bool io_kill_timeouts(struct io_ring_ctx *ctx, struct task_struct *tsk, + struct files_struct *files) +{ + struct io_kiocb *req, *tmp; + int canceled = 0; + + spin_lock_irq(&ctx->completion_lock); + list_for_each_entry_safe(req, tmp, &ctx->timeout_list, timeout.list) { + if (io_match_task(req, tsk, files)) { + io_kill_timeout(req, -ECANCELED); + canceled++; + } + } + if (canceled != 0) + io_commit_cqring(ctx); + spin_unlock_irq(&ctx->completion_lock); + if (canceled != 0) + io_cqring_ev_posted(ctx); + return canceled != 0; +} + static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) { unsigned long index; @@ -8614,14 +8657,6 @@ static void io_ring_ctx_wait_and_kill(struct io_ring_ctx *ctx) io_unregister_personality(ctx, index); mutex_unlock(&ctx->uring_lock); - /* prevent SQPOLL from submitting new requests */ - if (ctx->sq_data) { - io_sq_thread_park(ctx->sq_data); - list_del_init(&ctx->sqd_list); - io_sqd_update_thread_idle(ctx->sq_data); - io_sq_thread_unpark(ctx->sq_data); - } - io_kill_timeouts(ctx, NULL, NULL); io_poll_remove_all(ctx, NULL, NULL); @@ -8998,6 +9033,8 @@ void __io_uring_task_cancel(void) /* make sure overflow events are dropped */ atomic_inc(&tctx->in_idle); + __io_uring_files_cancel(NULL); + do { /* read completions before cancelations */ inflight = tctx_inflight(tctx); diff --git a/fs/namei.c b/fs/namei.c index 216f16e74351..48a2f288e802 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -579,6 +579,8 @@ static void set_nameidata(struct nameidata *p, int dfd, struct filename *name) p->stack = p->internal; p->dfd = dfd; p->name = name; + p->path.mnt = NULL; + p->path.dentry = NULL; p->total_link_count = old ? old->total_link_count : 0; p->saved = old; current->nameidata = p; @@ -652,6 +654,8 @@ static void terminate_walk(struct nameidata *nd) rcu_read_unlock(); } nd->depth = 0; + nd->path.mnt = NULL; + nd->path.dentry = NULL; } /* path_put is needed afterwards regardless of success or failure */ @@ -2322,8 +2326,6 @@ static const char *path_init(struct nameidata *nd, unsigned flags) } nd->root.mnt = NULL; - nd->path.mnt = NULL; - nd->path.dentry = NULL; /* Absolute pathname -- fetch the root (LOOKUP_IN_ROOT uses nd->dfd). */ if (*s == '/' && !(flags & LOOKUP_IN_ROOT)) { @@ -2419,16 +2421,16 @@ static int path_lookupat(struct nameidata *nd, unsigned flags, struct path *path while (!(err = link_path_walk(s, nd)) && (s = lookup_last(nd)) != NULL) ; + if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) { + err = handle_lookup_down(nd); + nd->flags &= ~LOOKUP_JUMPED; // no d_weak_revalidate(), please... + } if (!err) err = complete_walk(nd); if (!err && nd->flags & LOOKUP_DIRECTORY) if (!d_can_lookup(nd->path.dentry)) err = -ENOTDIR; - if (!err && unlikely(nd->flags & LOOKUP_MOUNTPOINT)) { - err = handle_lookup_down(nd); - nd->flags &= ~LOOKUP_JUMPED; // no d_weak_revalidate(), please... - } if (!err) { *path = nd->path; nd->path.mnt = NULL; diff --git a/fs/ocfs2/aops.c b/fs/ocfs2/aops.c index 3bfb4147895a..ad20403b383f 100644 --- a/fs/ocfs2/aops.c +++ b/fs/ocfs2/aops.c @@ -2295,7 +2295,7 @@ static int ocfs2_dio_end_io_write(struct inode *inode, struct ocfs2_alloc_context *meta_ac = NULL; handle_t *handle = NULL; loff_t end = offset + bytes; - int ret = 0, credits = 0, locked = 0; + int ret = 0, credits = 0; ocfs2_init_dealloc_ctxt(&dealloc); @@ -2306,13 +2306,6 @@ static int ocfs2_dio_end_io_write(struct inode *inode, !dwc->dw_orphaned) goto out; - /* ocfs2_file_write_iter will get i_mutex, so we need not lock if we - * are in that context. */ - if (dwc->dw_writer_pid != task_pid_nr(current)) { - inode_lock(inode); - locked = 1; - } - ret = ocfs2_inode_lock(inode, &di_bh, 1); if (ret < 0) { mlog_errno(ret); @@ -2393,8 +2386,6 @@ out: if (meta_ac) ocfs2_free_alloc_context(meta_ac); ocfs2_run_deallocs(osb, &dealloc); - if (locked) - inode_unlock(inode); ocfs2_dio_free_write_ctx(inode, dwc); return ret; diff --git a/fs/ocfs2/file.c b/fs/ocfs2/file.c index 6611c64ca0be..5edc1d0cf115 100644 --- a/fs/ocfs2/file.c +++ b/fs/ocfs2/file.c @@ -1245,22 +1245,24 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, goto bail_unlock; } } + down_write(&OCFS2_I(inode)->ip_alloc_sem); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS + 2 * ocfs2_quota_trans_credits(sb)); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); - goto bail_unlock; + goto bail_unlock_alloc; } status = __dquot_transfer(inode, transfer_to); if (status < 0) goto bail_commit; } else { + down_write(&OCFS2_I(inode)->ip_alloc_sem); handle = ocfs2_start_trans(osb, OCFS2_INODE_UPDATE_CREDITS); if (IS_ERR(handle)) { status = PTR_ERR(handle); mlog_errno(status); - goto bail_unlock; + goto bail_unlock_alloc; } } @@ -1273,6 +1275,8 @@ int ocfs2_setattr(struct user_namespace *mnt_userns, struct dentry *dentry, bail_commit: ocfs2_commit_trans(osb, handle); +bail_unlock_alloc: + up_write(&OCFS2_I(inode)->ip_alloc_sem); bail_unlock: if (status && inode_locked) { ocfs2_inode_unlock_tracker(inode, 1, &oh, had_lock); diff --git a/fs/overlayfs/file.c b/fs/overlayfs/file.c index dbfb35fb0ff7..3847cdc069b5 100644 --- a/fs/overlayfs/file.c +++ b/fs/overlayfs/file.c @@ -430,20 +430,11 @@ static int ovl_mmap(struct file *file, struct vm_area_struct *vma) if (WARN_ON(file != vma->vm_file)) return -EIO; - vma->vm_file = get_file(realfile); + vma_set_file(vma, realfile); old_cred = ovl_override_creds(file_inode(file)->i_sb); ret = call_mmap(vma->vm_file, vma); revert_creds(old_cred); - - if (ret) { - /* Drop reference count from new vm_file value */ - fput(realfile); - } else { - /* Drop reference count from previous vm_file value */ - fput(file); - } - ovl_file_accessed(file); return ret; diff --git a/fs/readdir.c b/fs/readdir.c index 19434b3c982c..09e8ed7d4161 100644 --- a/fs/readdir.c +++ b/fs/readdir.c @@ -150,6 +150,9 @@ static int fillonedir(struct dir_context *ctx, const char *name, int namlen, if (buf->result) return -EINVAL; + buf->result = verify_dirent_name(name, namlen); + if (buf->result < 0) + return buf->result; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->result = -EOVERFLOW; @@ -405,6 +408,9 @@ static int compat_fillonedir(struct dir_context *ctx, const char *name, if (buf->result) return -EINVAL; + buf->result = verify_dirent_name(name, namlen); + if (buf->result < 0) + return buf->result; d_ino = ino; if (sizeof(d_ino) < sizeof(ino) && d_ino != ino) { buf->result = -EOVERFLOW; diff --git a/fs/reiserfs/xattr.h b/fs/reiserfs/xattr.h index 9b3b06da568c..e47fde1182de 100644 --- a/fs/reiserfs/xattr.h +++ b/fs/reiserfs/xattr.h @@ -44,7 +44,7 @@ void reiserfs_security_free(struct reiserfs_security_handle *sec); static inline int reiserfs_xattrs_initialized(struct super_block *sb) { - return REISERFS_SB(sb)->priv_root != NULL; + return REISERFS_SB(sb)->priv_root && REISERFS_SB(sb)->xattr_root; } #define xattr_size(size) ((size) + sizeof(struct reiserfs_xattr_header)) diff --git a/fs/squashfs/export.c b/fs/squashfs/export.c index eb02072d28dd..723763746238 100644 --- a/fs/squashfs/export.c +++ b/fs/squashfs/export.c @@ -152,14 +152,18 @@ __le64 *squashfs_read_inode_lookup_table(struct super_block *sb, start = le64_to_cpu(table[n]); end = le64_to_cpu(table[n + 1]); - if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + if (start >= end + || (end - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } } start = le64_to_cpu(table[indexes - 1]); - if (start >= lookup_table_start || (lookup_table_start - start) > SQUASHFS_METADATA_SIZE) { + if (start >= lookup_table_start || + (lookup_table_start - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/id.c b/fs/squashfs/id.c index 11581bf31af4..ea5387679723 100644 --- a/fs/squashfs/id.c +++ b/fs/squashfs/id.c @@ -97,14 +97,16 @@ __le64 *squashfs_read_id_index_table(struct super_block *sb, start = le64_to_cpu(table[n]); end = le64_to_cpu(table[n + 1]); - if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + if (start >= end || (end - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } } start = le64_to_cpu(table[indexes - 1]); - if (start >= id_table_start || (id_table_start - start) > SQUASHFS_METADATA_SIZE) { + if (start >= id_table_start || (id_table_start - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } diff --git a/fs/squashfs/squashfs_fs.h b/fs/squashfs/squashfs_fs.h index 8d64edb80ebf..b3fdc8212c5f 100644 --- a/fs/squashfs/squashfs_fs.h +++ b/fs/squashfs/squashfs_fs.h @@ -17,6 +17,7 @@ /* size of metadata (inode and directory) blocks */ #define SQUASHFS_METADATA_SIZE 8192 +#define SQUASHFS_BLOCK_OFFSET 2 /* default size of block device I/O */ #ifdef CONFIG_SQUASHFS_4K_DEVBLK_SIZE diff --git a/fs/squashfs/xattr_id.c b/fs/squashfs/xattr_id.c index ead66670b41a..087cab8c78f4 100644 --- a/fs/squashfs/xattr_id.c +++ b/fs/squashfs/xattr_id.c @@ -109,14 +109,16 @@ __le64 *squashfs_read_xattr_id_table(struct super_block *sb, u64 table_start, start = le64_to_cpu(table[n]); end = le64_to_cpu(table[n + 1]); - if (start >= end || (end - start) > SQUASHFS_METADATA_SIZE) { + if (start >= end || (end - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } } start = le64_to_cpu(table[indexes - 1]); - if (start >= table_start || (table_start - start) > SQUASHFS_METADATA_SIZE) { + if (start >= table_start || (table_start - start) > + (SQUASHFS_METADATA_SIZE + SQUASHFS_BLOCK_OFFSET)) { kfree(table); return ERR_PTR(-EINVAL); } |
