summaryrefslogtreecommitdiff
path: root/fs
diff options
context:
space:
mode:
Diffstat (limited to 'fs')
-rw-r--r--fs/cifs/cifs_dfs_ref.c6
-rw-r--r--fs/cifs/cifsglob.h7
-rw-r--r--fs/cifs/connect.c110
-rw-r--r--fs/cifs/dns_resolve.c10
-rw-r--r--fs/cifs/dns_resolve.h2
-rw-r--r--fs/cifs/misc.c2
-rw-r--r--fs/cifs/smb2ops.c6
-rw-r--r--fs/cifs/smb2pdu.h1
-rw-r--r--fs/configfs/file.c29
-rw-r--r--fs/fcntl.c2
-rw-r--r--fs/fs_context.c54
-rw-r--r--fs/hfs/bfind.c14
-rw-r--r--fs/hfs/bnode.c25
-rw-r--r--fs/hfs/btree.h7
-rw-r--r--fs/hfs/super.c10
-rw-r--r--fs/io_uring.c8
-rw-r--r--fs/iomap/buffered-io.c8
-rw-r--r--fs/iomap/seek.c25
-rw-r--r--fs/xfs/libxfs/xfs_ag.c8
-rw-r--r--fs/xfs/libxfs/xfs_attr.c16
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.c55
-rw-r--r--fs/xfs/libxfs/xfs_ialloc.h3
-rw-r--r--fs/xfs/libxfs/xfs_inode_buf.c28
-rw-r--r--fs/xfs/libxfs/xfs_trans_inode.c10
-rw-r--r--fs/xfs/scrub/inode.c18
-rw-r--r--fs/xfs/xfs_inode.c13
-rw-r--r--fs/xfs/xfs_ioctl.c27
-rw-r--r--fs/xfs/xfs_rtalloc.c49
-rw-r--r--fs/zonefs/super.c3
29 files changed, 428 insertions, 128 deletions
diff --git a/fs/cifs/cifs_dfs_ref.c b/fs/cifs/cifs_dfs_ref.c
index 57f91311fdaa..007427ba75e5 100644
--- a/fs/cifs/cifs_dfs_ref.c
+++ b/fs/cifs/cifs_dfs_ref.c
@@ -176,7 +176,7 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
}
}
- rc = dns_resolve_server_name_to_ip(name, &srvIP);
+ rc = dns_resolve_server_name_to_ip(name, &srvIP, NULL);
if (rc < 0) {
cifs_dbg(FYI, "%s: Failed to resolve server part of %s to IP: %d\n",
__func__, name, rc);
@@ -211,6 +211,10 @@ char *cifs_compose_mount_options(const char *sb_mountdata,
else
noff = tkn_e - (sb_mountdata + off) + 1;
+ if (strncasecmp(sb_mountdata + off, "cruid=", 6) == 0) {
+ off += noff;
+ continue;
+ }
if (strncasecmp(sb_mountdata + off, "unc=", 4) == 0) {
off += noff;
continue;
diff --git a/fs/cifs/cifsglob.h b/fs/cifs/cifsglob.h
index 3c2e117bb926..c0bfc2f01030 100644
--- a/fs/cifs/cifsglob.h
+++ b/fs/cifs/cifsglob.h
@@ -75,6 +75,9 @@
#define SMB_ECHO_INTERVAL_MAX 600
#define SMB_ECHO_INTERVAL_DEFAULT 60
+/* dns resolution interval in seconds */
+#define SMB_DNS_RESOLVE_INTERVAL_DEFAULT 600
+
/* maximum number of PDUs in one compound */
#define MAX_COMPOUND 5
@@ -646,6 +649,7 @@ struct TCP_Server_Info {
/* point to the SMBD connection if RDMA is used instead of socket */
struct smbd_connection *smbd_conn;
struct delayed_work echo; /* echo ping workqueue job */
+ struct delayed_work resolve; /* dns resolution workqueue job */
char *smallbuf; /* pointer to current "small" buffer */
char *bigbuf; /* pointer to current "big" buffer */
/* Total size of this PDU. Only valid from cifs_demultiplex_thread */
@@ -689,6 +693,9 @@ struct TCP_Server_Info {
bool use_swn_dstaddr;
struct sockaddr_storage swn_dstaddr;
#endif
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ bool is_dfs_conn; /* if a dfs connection */
+#endif
};
struct cifs_credits {
diff --git a/fs/cifs/connect.c b/fs/cifs/connect.c
index 01dc45178f66..1b04d6ec14dd 100644
--- a/fs/cifs/connect.c
+++ b/fs/cifs/connect.c
@@ -78,6 +78,8 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
int rc;
int len;
char *unc, *ipaddr = NULL;
+ time64_t expiry, now;
+ unsigned long ttl = SMB_DNS_RESOLVE_INTERVAL_DEFAULT;
if (!server->hostname)
return -EINVAL;
@@ -91,13 +93,13 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
}
scnprintf(unc, len, "\\\\%s", server->hostname);
- rc = dns_resolve_server_name_to_ip(unc, &ipaddr);
+ rc = dns_resolve_server_name_to_ip(unc, &ipaddr, &expiry);
kfree(unc);
if (rc < 0) {
cifs_dbg(FYI, "%s: failed to resolve server part of %s to IP: %d\n",
__func__, server->hostname, rc);
- return rc;
+ goto requeue_resolve;
}
spin_lock(&cifs_tcp_ses_lock);
@@ -106,7 +108,45 @@ static int reconn_set_ipaddr_from_hostname(struct TCP_Server_Info *server)
spin_unlock(&cifs_tcp_ses_lock);
kfree(ipaddr);
- return !rc ? -1 : 0;
+ /* rc == 1 means success here */
+ if (rc) {
+ now = ktime_get_real_seconds();
+ if (expiry && expiry > now)
+ /*
+ * To make sure we don't use the cached entry, retry 1s
+ * after expiry.
+ */
+ ttl = (expiry - now + 1);
+ }
+ rc = !rc ? -1 : 0;
+
+requeue_resolve:
+ cifs_dbg(FYI, "%s: next dns resolution scheduled for %lu seconds in the future\n",
+ __func__, ttl);
+ mod_delayed_work(cifsiod_wq, &server->resolve, (ttl * HZ));
+
+ return rc;
+}
+
+
+static void cifs_resolve_server(struct work_struct *work)
+{
+ int rc;
+ struct TCP_Server_Info *server = container_of(work,
+ struct TCP_Server_Info, resolve.work);
+
+ mutex_lock(&server->srv_mutex);
+
+ /*
+ * Resolve the hostname again to make sure that IP address is up-to-date.
+ */
+ rc = reconn_set_ipaddr_from_hostname(server);
+ if (rc) {
+ cifs_dbg(FYI, "%s: failed to resolve hostname: %d\n",
+ __func__, rc);
+ }
+
+ mutex_unlock(&server->srv_mutex);
}
#ifdef CONFIG_CIFS_DFS_UPCALL
@@ -680,6 +720,7 @@ static void clean_demultiplex_info(struct TCP_Server_Info *server)
spin_unlock(&cifs_tcp_ses_lock);
cancel_delayed_work_sync(&server->echo);
+ cancel_delayed_work_sync(&server->resolve);
spin_lock(&GlobalMid_Lock);
server->tcpStatus = CifsExiting;
@@ -1227,6 +1268,16 @@ cifs_find_tcp_session(struct smb3_fs_context *ctx)
spin_lock(&cifs_tcp_ses_lock);
list_for_each_entry(server, &cifs_tcp_ses_list, tcp_ses_list) {
+#ifdef CONFIG_CIFS_DFS_UPCALL
+ /*
+ * DFS failover implementation in cifs_reconnect() requires unique tcp sessions for
+ * DFS connections to do failover properly, so avoid sharing them with regular
+ * shares or even links that may connect to same server but having completely
+ * different failover targets.
+ */
+ if (server->is_dfs_conn)
+ continue;
+#endif
/*
* Skip ses channels since they're only handled in lower layers
* (e.g. cifs_send_recv).
@@ -1254,12 +1305,16 @@ cifs_put_tcp_session(struct TCP_Server_Info *server, int from_reconnect)
return;
}
+ /* srv_count can never go negative */
+ WARN_ON(server->srv_count < 0);
+
put_net(cifs_net_ns(server));
list_del_init(&server->tcp_ses_list);
spin_unlock(&cifs_tcp_ses_lock);
cancel_delayed_work_sync(&server->echo);
+ cancel_delayed_work_sync(&server->resolve);
if (from_reconnect)
/*
@@ -1342,6 +1397,7 @@ cifs_get_tcp_session(struct smb3_fs_context *ctx)
INIT_LIST_HEAD(&tcp_ses->tcp_ses_list);
INIT_LIST_HEAD(&tcp_ses->smb_ses_list);
INIT_DELAYED_WORK(&tcp_ses->echo, cifs_echo_request);
+ INIT_DELAYED_WORK(&tcp_ses->resolve, cifs_resolve_server);
INIT_DELAYED_WORK(&tcp_ses->reconnect, smb2_reconnect_server);
mutex_init(&tcp_ses->reconnect_mutex);
memcpy(&tcp_ses->srcaddr, &ctx->srcaddr,
@@ -1427,6 +1483,12 @@ smbd_connected:
/* queue echo request delayed work */
queue_delayed_work(cifsiod_wq, &tcp_ses->echo, tcp_ses->echo_interval);
+ /* queue dns resolution delayed work */
+ cifs_dbg(FYI, "%s: next dns resolution scheduled for %d seconds in the future\n",
+ __func__, SMB_DNS_RESOLVE_INTERVAL_DEFAULT);
+
+ queue_delayed_work(cifsiod_wq, &tcp_ses->resolve, (SMB_DNS_RESOLVE_INTERVAL_DEFAULT * HZ));
+
return tcp_ses;
out_err_crypto_release:
@@ -1605,6 +1667,9 @@ void cifs_put_smb_ses(struct cifs_ses *ses)
}
spin_unlock(&cifs_tcp_ses_lock);
+ /* ses_count can never go negative */
+ WARN_ON(ses->ses_count < 0);
+
spin_lock(&GlobalMid_Lock);
if (ses->status == CifsGood)
ses->status = CifsExiting;
@@ -1972,6 +2037,9 @@ cifs_put_tcon(struct cifs_tcon *tcon)
return;
}
+ /* tc_count can never go negative */
+ WARN_ON(tcon->tc_count < 0);
+
if (tcon->use_witness) {
int rc;
@@ -2910,6 +2978,23 @@ static int mount_setup_tlink(struct cifs_sb_info *cifs_sb, struct cifs_ses *ses,
}
#ifdef CONFIG_CIFS_DFS_UPCALL
+static int mount_get_dfs_conns(struct smb3_fs_context *ctx, struct cifs_sb_info *cifs_sb,
+ unsigned int *xid, struct TCP_Server_Info **nserver,
+ struct cifs_ses **nses, struct cifs_tcon **ntcon)
+{
+ int rc;
+
+ ctx->nosharesock = true;
+ rc = mount_get_conns(ctx, cifs_sb, xid, nserver, nses, ntcon);
+ if (*nserver) {
+ cifs_dbg(FYI, "%s: marking tcp session as a dfs connection\n", __func__);
+ spin_lock(&cifs_tcp_ses_lock);
+ (*nserver)->is_dfs_conn = true;
+ spin_unlock(&cifs_tcp_ses_lock);
+ }
+ return rc;
+}
+
/*
* cifs_build_path_to_root returns full path to root when we do not have an
* existing connection (tcon)
@@ -3105,7 +3190,7 @@ static int do_dfs_failover(const char *path, const char *full_path, struct cifs_
tmp_ctx.prepath);
mount_put_conns(cifs_sb, *xid, *server, *ses, *tcon);
- rc = mount_get_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon);
+ rc = mount_get_dfs_conns(&tmp_ctx, cifs_sb, xid, server, ses, tcon);
if (!rc || (*server && *ses)) {
/*
* We were able to connect to new target server. Update current context with
@@ -3404,7 +3489,12 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
goto error;
}
- ctx->nosharesock = true;
+ mount_put_conns(cifs_sb, xid, server, ses, tcon);
+ /*
+ * Ignore error check here because we may failover to other targets from cached a
+ * referral.
+ */
+ (void)mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
/* Get path of DFS root */
ref_path = build_unc_path_to_root(ctx, cifs_sb, false);
@@ -3433,7 +3523,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
/* Connect to new DFS target only if we were redirected */
if (oldmnt != cifs_sb->ctx->mount_options) {
mount_put_conns(cifs_sb, xid, server, ses, tcon);
- rc = mount_get_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
+ rc = mount_get_dfs_conns(ctx, cifs_sb, &xid, &server, &ses, &tcon);
}
if (rc && !server && !ses) {
/* Failed to connect. Try to connect to other targets in the referral. */
@@ -3459,7 +3549,7 @@ int cifs_mount(struct cifs_sb_info *cifs_sb, struct smb3_fs_context *ctx)
rc = -ELOOP;
} while (rc == -EREMOTE);
- if (rc || !tcon)
+ if (rc || !tcon || !ses)
goto error;
kfree(ref_path);
@@ -4095,7 +4185,8 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
if (!tree)
return -ENOMEM;
- if (!tcon->dfs_path) {
+ /* If it is not dfs or there was no cached dfs referral, then reconnect to same share */
+ if (!tcon->dfs_path || dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl)) {
if (tcon->ipc) {
scnprintf(tree, MAX_TREE_SIZE, "\\\\%s\\IPC$", server->hostname);
rc = ops->tree_connect(xid, tcon->ses, tree, tcon, nlsc);
@@ -4105,9 +4196,6 @@ int cifs_tree_connect(const unsigned int xid, struct cifs_tcon *tcon, const stru
goto out;
}
- rc = dfs_cache_noreq_find(tcon->dfs_path + 1, &ref, &tl);
- if (rc)
- goto out;
isroot = ref.server_type == DFS_TYPE_ROOT;
free_dfs_info_param(&ref);
diff --git a/fs/cifs/dns_resolve.c b/fs/cifs/dns_resolve.c
index d15b82d569ef..8c616aaeb7c4 100644
--- a/fs/cifs/dns_resolve.c
+++ b/fs/cifs/dns_resolve.c
@@ -24,6 +24,7 @@
* dns_resolve_server_name_to_ip - Resolve UNC server name to ip address.
* @unc: UNC path specifying the server (with '/' as delimiter)
* @ip_addr: Where to return the IP address.
+ * @expiry: Where to return the expiry time for the dns record.
*
* The IP address will be returned in string form, and the caller is
* responsible for freeing it.
@@ -31,7 +32,7 @@
* Returns length of result on success, -ve on error.
*/
int
-dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
+dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry)
{
struct sockaddr_storage ss;
const char *hostname, *sep;
@@ -66,13 +67,14 @@ dns_resolve_server_name_to_ip(const char *unc, char **ip_addr)
/* Perform the upcall */
rc = dns_query(current->nsproxy->net_ns, NULL, hostname, len,
- NULL, ip_addr, NULL, false);
+ NULL, ip_addr, expiry, false);
if (rc < 0)
cifs_dbg(FYI, "%s: unable to resolve: %*.*s\n",
__func__, len, len, hostname);
else
- cifs_dbg(FYI, "%s: resolved: %*.*s to %s\n",
- __func__, len, len, hostname, *ip_addr);
+ cifs_dbg(FYI, "%s: resolved: %*.*s to %s expiry %llu\n",
+ __func__, len, len, hostname, *ip_addr,
+ expiry ? (*expiry) : 0);
return rc;
name_is_IP_address:
diff --git a/fs/cifs/dns_resolve.h b/fs/cifs/dns_resolve.h
index 5be060b82b13..9fa2807ef79e 100644
--- a/fs/cifs/dns_resolve.h
+++ b/fs/cifs/dns_resolve.h
@@ -12,7 +12,7 @@
#define _DNS_RESOLVE_H
#ifdef __KERNEL__
-extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr);
+extern int dns_resolve_server_name_to_ip(const char *unc, char **ip_addr, time64_t *expiry);
#endif /* KERNEL */
#endif /* _DNS_RESOLVE_H */
diff --git a/fs/cifs/misc.c b/fs/cifs/misc.c
index 184138b4eb8c..844abeb2b48f 100644
--- a/fs/cifs/misc.c
+++ b/fs/cifs/misc.c
@@ -1187,7 +1187,7 @@ int match_target_ip(struct TCP_Server_Info *server,
cifs_dbg(FYI, "%s: target name: %s\n", __func__, target + 2);
- rc = dns_resolve_server_name_to_ip(target, &tip);
+ rc = dns_resolve_server_name_to_ip(target, &tip, NULL);
if (rc < 0)
goto out;
diff --git a/fs/cifs/smb2ops.c b/fs/cifs/smb2ops.c
index e4c8f603dd58..ba3c58e1f725 100644
--- a/fs/cifs/smb2ops.c
+++ b/fs/cifs/smb2ops.c
@@ -557,8 +557,8 @@ parse_server_interfaces(struct network_interface_info_ioctl_rsp *buf,
p = buf;
while (bytes_left >= sizeof(*p)) {
info->speed = le64_to_cpu(p->LinkSpeed);
- info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE);
- info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE);
+ info->rdma_capable = le32_to_cpu(p->Capability & RDMA_CAPABLE) ? 1 : 0;
+ info->rss_capable = le32_to_cpu(p->Capability & RSS_CAPABLE) ? 1 : 0;
cifs_dbg(FYI, "%s: adding iface %zu\n", __func__, *iface_count);
cifs_dbg(FYI, "%s: speed %zu bps\n", __func__, info->speed);
@@ -2910,6 +2910,8 @@ smb2_get_dfs_refer(const unsigned int xid, struct cifs_ses *ses,
/* ipc tcons are not refcounted */
spin_lock(&cifs_tcp_ses_lock);
tcon->tc_count--;
+ /* tc_count can never go negative */
+ WARN_ON(tcon->tc_count < 0);
spin_unlock(&cifs_tcp_ses_lock);
}
kfree(utf16_path);
diff --git a/fs/cifs/smb2pdu.h b/fs/cifs/smb2pdu.h
index 4b27cb9105fd..e9cac7970b66 100644
--- a/fs/cifs/smb2pdu.h
+++ b/fs/cifs/smb2pdu.h
@@ -394,6 +394,7 @@ struct smb2_compression_capabilities_context {
__u16 Padding;
__u32 Flags;
__le16 CompressionAlgorithms[3];
+ __u16 Pad; /* Some servers require pad to DataLen multiple of 8 */
/* Check if pad needed */
} __packed;
diff --git a/fs/configfs/file.c b/fs/configfs/file.c
index 2f63bf3a7325..5a0be9985bae 100644
--- a/fs/configfs/file.c
+++ b/fs/configfs/file.c
@@ -91,7 +91,10 @@ static ssize_t configfs_read_iter(struct kiocb *iocb, struct iov_iter *to)
}
pr_debug("%s: count = %zd, pos = %lld, buf = %s\n",
__func__, iov_iter_count(to), iocb->ki_pos, buffer->page);
- retval = copy_to_iter(buffer->page, buffer->count, to);
+ if (iocb->ki_pos >= buffer->count)
+ goto out;
+ retval = copy_to_iter(buffer->page + iocb->ki_pos,
+ buffer->count - iocb->ki_pos, to);
iocb->ki_pos += retval;
if (retval == 0)
retval = -EFAULT;
@@ -162,7 +165,10 @@ static ssize_t configfs_bin_read_iter(struct kiocb *iocb, struct iov_iter *to)
buffer->needs_read_fill = 0;
}
- retval = copy_to_iter(buffer->bin_buffer, buffer->bin_buffer_size, to);
+ if (iocb->ki_pos >= buffer->bin_buffer_size)
+ goto out;
+ retval = copy_to_iter(buffer->bin_buffer + iocb->ki_pos,
+ buffer->bin_buffer_size - iocb->ki_pos, to);
iocb->ki_pos += retval;
if (retval == 0)
retval = -EFAULT;
@@ -171,21 +177,28 @@ out:
return retval;
}
-static int fill_write_buffer(struct configfs_buffer *buffer,
+/* Fill [buffer, buffer + pos) with data coming from @from. */
+static int fill_write_buffer(struct configfs_buffer *buffer, loff_t pos,
struct iov_iter *from)
{
+ loff_t to_copy;
int copied;
+ u8 *to;
if (!buffer->page)
buffer->page = (char *)__get_free_pages(GFP_KERNEL, 0);
if (!buffer->page)
return -ENOMEM;
- copied = copy_from_iter(buffer->page, SIMPLE_ATTR_SIZE - 1, from);
+ to_copy = SIMPLE_ATTR_SIZE - 1 - pos;
+ if (to_copy <= 0)
+ return 0;
+ to = buffer->page + pos;
+ copied = copy_from_iter(to, to_copy, from);
buffer->needs_read_fill = 1;
/* if buf is assumed to contain a string, terminate it by \0,
* so e.g. sscanf() can scan the string easily */
- buffer->page[copied] = 0;
+ to[copied] = 0;
return copied ? : -EFAULT;
}
@@ -217,7 +230,7 @@ static ssize_t configfs_write_iter(struct kiocb *iocb, struct iov_iter *from)
ssize_t len;
mutex_lock(&buffer->mutex);
- len = fill_write_buffer(buffer, from);
+ len = fill_write_buffer(buffer, iocb->ki_pos, from);
if (len > 0)
len = flush_write_buffer(file, buffer, len);
if (len > 0)
@@ -272,7 +285,9 @@ static ssize_t configfs_bin_write_iter(struct kiocb *iocb,
buffer->bin_buffer_size = end_offset;
}
- len = copy_from_iter(buffer->bin_buffer, buffer->bin_buffer_size, from);
+ len = copy_from_iter(buffer->bin_buffer + iocb->ki_pos,
+ buffer->bin_buffer_size - iocb->ki_pos, from);
+ iocb->ki_pos += len;
out:
mutex_unlock(&buffer->mutex);
return len ? : -EFAULT;
diff --git a/fs/fcntl.c b/fs/fcntl.c
index dfc72f15be7f..f946bec8f1f1 100644
--- a/fs/fcntl.c
+++ b/fs/fcntl.c
@@ -369,8 +369,8 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg,
/* 32-bit arches must use fcntl64() */
case F_OFD_SETLK:
case F_OFD_SETLKW:
-#endif
fallthrough;
+#endif
case F_SETLK:
case F_SETLKW:
if (copy_from_user(&flock, argp, sizeof(flock)))
diff --git a/fs/fs_context.c b/fs/fs_context.c
index 2834d1afa6e8..de1985eae535 100644
--- a/fs/fs_context.c
+++ b/fs/fs_context.c
@@ -80,6 +80,35 @@ static int vfs_parse_sb_flag(struct fs_context *fc, const char *key)
}
/**
+ * vfs_parse_fs_param_source - Handle setting "source" via parameter
+ * @fc: The filesystem context to modify
+ * @param: The parameter
+ *
+ * This is a simple helper for filesystems to verify that the "source" they
+ * accept is sane.
+ *
+ * Returns 0 on success, -ENOPARAM if this is not "source" parameter, and
+ * -EINVAL otherwise. In the event of failure, supplementary error information
+ * is logged.
+ */
+int vfs_parse_fs_param_source(struct fs_context *fc, struct fs_parameter *param)
+{
+ if (strcmp(param->key, "source") != 0)
+ return -ENOPARAM;
+
+ if (param->type != fs_value_is_string)
+ return invalf(fc, "Non-string source");
+
+ if (fc->source)
+ return invalf(fc, "Multiple sources");
+
+ fc->source = param->string;
+ param->string = NULL;
+ return 0;
+}
+EXPORT_SYMBOL(vfs_parse_fs_param_source);
+
+/**
* vfs_parse_fs_param - Add a single parameter to a superblock config
* @fc: The filesystem context to modify
* @param: The parameter
@@ -122,15 +151,9 @@ int vfs_parse_fs_param(struct fs_context *fc, struct fs_parameter *param)
/* If the filesystem doesn't take any arguments, give it the
* default handling of source.
*/
- if (strcmp(param->key, "source") == 0) {
- if (param->type != fs_value_is_string)
- return invalf(fc, "VFS: Non-string source");
- if (fc->source)
- return invalf(fc, "VFS: Multiple sources");
- fc->source = param->string;
- param->string = NULL;
- return 0;
- }
+ ret = vfs_parse_fs_param_source(fc, param);
+ if (ret != -ENOPARAM)
+ return ret;
return invalf(fc, "%s: Unknown parameter '%s'",
fc->fs_type->name, param->key);
@@ -504,16 +527,11 @@ static int legacy_parse_param(struct fs_context *fc, struct fs_parameter *param)
struct legacy_fs_context *ctx = fc->fs_private;
unsigned int size = ctx->data_size;
size_t len = 0;
+ int ret;
- if (strcmp(param->key, "source") == 0) {
- if (param->type != fs_value_is_string)
- return invalf(fc, "VFS: Legacy: Non-string source");
- if (fc->source)
- return invalf(fc, "VFS: Legacy: Multiple sources");
- fc->source = param->string;
- param->string = NULL;
- return 0;
- }
+ ret = vfs_parse_fs_param_source(fc, param);
+ if (ret != -ENOPARAM)
+ return ret;
if (ctx->param_type == LEGACY_FS_MONOLITHIC_PARAMS)
return invalf(fc, "VFS: Legacy: Can't mix monolithic and individual options");
diff --git a/fs/hfs/bfind.c b/fs/hfs/bfind.c
index 4af318fbda77..ef9498a6e88a 100644
--- a/fs/hfs/bfind.c
+++ b/fs/hfs/bfind.c
@@ -25,7 +25,19 @@ int hfs_find_init(struct hfs_btree *tree, struct hfs_find_data *fd)
fd->key = ptr + tree->max_key_len + 2;
hfs_dbg(BNODE_REFS, "find_init: %d (%p)\n",
tree->cnid, __builtin_return_address(0));
- mutex_lock(&tree->tree_lock);
+ switch (tree->cnid) {
+ case HFS_CAT_CNID:
+ mutex_lock_nested(&tree->tree_lock, CATALOG_BTREE_MUTEX);
+ break;
+ case HFS_EXT_CNID:
+ mutex_lock_nested(&tree->tree_lock, EXTENTS_BTREE_MUTEX);
+ break;
+ case HFS_ATTR_CNID:
+ mutex_lock_nested(&tree->tree_lock, ATTR_BTREE_MUTEX);
+ break;
+ default:
+ return -EINVAL;
+ }
return 0;
}
diff --git a/fs/hfs/bnode.c b/fs/hfs/bnode.c
index b63a4df7327b..c0a73a6ffb28 100644
--- a/fs/hfs/bnode.c
+++ b/fs/hfs/bnode.c
@@ -15,16 +15,31 @@
#include "btree.h"
-void hfs_bnode_read(struct hfs_bnode *node, void *buf,
- int off, int len)
+void hfs_bnode_read(struct hfs_bnode *node, void *buf, int off, int len)
{
struct page *page;
+ int pagenum;
+ int bytes_read;
+ int bytes_to_read;
+ void *vaddr;
off += node->page_offset;
- page = node->page[0];
+ pagenum = off >> PAGE_SHIFT;
+ off &= ~PAGE_MASK; /* compute page offset for the first page */
- memcpy(buf, kmap(page) + off, len);
- kunmap(page);
+ for (bytes_read = 0; bytes_read < len; bytes_read += bytes_to_read) {
+ if (pagenum >= node->tree->pages_per_bnode)
+ break;
+ page = node->page[pagenum];
+ bytes_to_read = min_t(int, len - bytes_read, PAGE_SIZE - off);
+
+ vaddr = kmap_atomic(page);
+ memcpy(buf + bytes_read, vaddr + off, bytes_to_read);
+ kunmap_atomic(vaddr);
+
+ pagenum++;
+ off = 0; /* page offset only applies to the first page */
+ }
}
u16 hfs_bnode_read_u16(struct hfs_bnode *node, int off)
diff --git a/fs/hfs/btree.h b/fs/hfs/btree.h
index 4ba45caf5939..0e6baee93245 100644
--- a/fs/hfs/btree.h
+++ b/fs/hfs/btree.h
@@ -13,6 +13,13 @@ typedef int (*btree_keycmp)(const btree_key *, const btree_key *);
#define NODE_HASH_SIZE 256
+/* B-tree mutex nested subclasses */
+enum hfs_btree_mutex_classes {
+ CATALOG_BTREE_MUTEX,
+ EXTENTS_BTREE_MUTEX,
+ ATTR_BTREE_MUTEX,
+};
+
/* A HFS BTree held in memory */
struct hfs_btree {
struct super_block *sb;
diff --git a/fs/hfs/super.c b/fs/hfs/super.c
index 44d07c9e3a7f..12d9bae39363 100644
--- a/fs/hfs/super.c
+++ b/fs/hfs/super.c
@@ -420,14 +420,12 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
if (!res) {
if (fd.entrylength > sizeof(rec) || fd.entrylength < 0) {
res = -EIO;
- goto bail;
+ goto bail_hfs_find;
}
hfs_bnode_read(fd.bnode, &rec, fd.entryoffset, fd.entrylength);
}
- if (res) {
- hfs_find_exit(&fd);
- goto bail_no_root;
- }
+ if (res)
+ goto bail_hfs_find;
res = -EINVAL;
root_inode = hfs_iget(sb, &fd.search_key->cat, &rec);
hfs_find_exit(&fd);
@@ -443,6 +441,8 @@ static int hfs_fill_super(struct super_block *sb, void *data, int silent)
/* everything's okay */
return 0;
+bail_hfs_find:
+ hfs_find_exit(&fd);
bail_no_root:
pr_err("get root inode failed\n");
bail:
diff --git a/fs/io_uring.c b/fs/io_uring.c
index d94fb5835a20..0cac361bf6b8 100644
--- a/fs/io_uring.c
+++ b/fs/io_uring.c
@@ -2016,7 +2016,7 @@ static void io_req_task_submit(struct io_kiocb *req)
/* ctx stays valid until unlock, even if we drop all ours ctx->refs */
mutex_lock(&ctx->uring_lock);
- if (!(current->flags & PF_EXITING) && !current->in_execve)
+ if (!(req->task->flags & PF_EXITING) && !req->task->in_execve)
__io_queue_sqe(req);
else
io_req_complete_failed(req, -EFAULT);
@@ -6019,11 +6019,13 @@ static bool io_drain_req(struct io_kiocb *req)
ret = io_req_prep_async(req);
if (ret)
- return ret;
+ goto fail;
io_prep_async_link(req);
de = kmalloc(sizeof(*de), GFP_KERNEL);
if (!de) {
- io_req_complete_failed(req, -ENOMEM);
+ ret = -ENOMEM;
+fail:
+ io_req_complete_failed(req, ret);
return true;
}
diff --git a/fs/iomap/buffered-io.c b/fs/iomap/buffered-io.c
index 41da4f14c00b..87ccb3438bec 100644
--- a/fs/iomap/buffered-io.c
+++ b/fs/iomap/buffered-io.c
@@ -215,6 +215,7 @@ iomap_read_inline_data(struct inode *inode, struct page *page,
if (PageUptodate(page))
return;
+ BUG_ON(page_has_private(page));
BUG_ON(page->index);
BUG_ON(size > PAGE_SIZE - offset_in_page(iomap->inline_data));
@@ -239,7 +240,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
{
struct iomap_readpage_ctx *ctx = data;
struct page *page = ctx->cur_page;
- struct iomap_page *iop = iomap_page_create(inode, page);
+ struct iomap_page *iop;
bool same_page = false, is_contig = false;
loff_t orig_pos = pos;
unsigned poff, plen;
@@ -252,6 +253,7 @@ iomap_readpage_actor(struct inode *inode, loff_t pos, loff_t length, void *data,
}
/* zero post-eof blocks as the page may be mapped */
+ iop = iomap_page_create(inode, page);
iomap_adjust_read_range(inode, iop, &pos, length, &poff, &plen);
if (plen == 0)
goto done;
@@ -967,7 +969,6 @@ iomap_page_mkwrite_actor(struct inode *inode, loff_t pos, loff_t length,
block_commit_write(page, 0, length);
} else {
WARN_ON_ONCE(!PageUptodate(page));
- iomap_page_create(inode, page);
set_page_dirty(page);
}
@@ -1304,14 +1305,13 @@ iomap_writepage_map(struct iomap_writepage_ctx *wpc,
struct writeback_control *wbc, struct inode *inode,
struct page *page, u64 end_offset)
{
- struct iomap_page *iop = to_iomap_page(page);
+ struct iomap_page *iop = iomap_page_create(inode, page);
struct iomap_ioend *ioend, *next;
unsigned len = i_blocksize(inode);
u64 file_offset; /* file offset of page */
int error = 0, count = 0, i;
LIST_HEAD(submit_list);
- WARN_ON_ONCE(i_blocks_per_page(inode, page) > 1 && !iop);
WARN_ON_ONCE(iop && atomic_read(&iop->write_bytes_pending) != 0);
/*
diff --git a/fs/iomap/seek.c b/fs/iomap/seek.c
index dab1b02eba5b..ce6fb810854f 100644
--- a/fs/iomap/seek.c
+++ b/fs/iomap/seek.c
@@ -35,23 +35,20 @@ loff_t
iomap_seek_hole(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
{
loff_t size = i_size_read(inode);
- loff_t length = size - offset;
loff_t ret;
/* Nothing to be found before or beyond the end of the file. */
if (offset < 0 || offset >= size)
return -ENXIO;
- while (length > 0) {
- ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
- &offset, iomap_seek_hole_actor);
+ while (offset < size) {
+ ret = iomap_apply(inode, offset, size - offset, IOMAP_REPORT,
+ ops, &offset, iomap_seek_hole_actor);
if (ret < 0)
return ret;
if (ret == 0)
break;
-
offset += ret;
- length -= ret;
}
return offset;
@@ -83,27 +80,23 @@ loff_t
iomap_seek_data(struct inode *inode, loff_t offset, const struct iomap_ops *ops)
{
loff_t size = i_size_read(inode);
- loff_t length = size - offset;
loff_t ret;
/* Nothing to be found before or beyond the end of the file. */
if (offset < 0 || offset >= size)
return -ENXIO;
- while (length > 0) {
- ret = iomap_apply(inode, offset, length, IOMAP_REPORT, ops,
- &offset, iomap_seek_data_actor);
+ while (offset < size) {
+ ret = iomap_apply(inode, offset, size - offset, IOMAP_REPORT,
+ ops, &offset, iomap_seek_data_actor);
if (ret < 0)
return ret;
if (ret == 0)
- break;
-
+ return offset;
offset += ret;
- length -= ret;
}
- if (length <= 0)
- return -ENXIO;
- return offset;
+ /* We've reached the end of the file without finding data */
+ return -ENXIO;
}
EXPORT_SYMBOL_GPL(iomap_seek_data);
diff --git a/fs/xfs/libxfs/xfs_ag.c b/fs/xfs/libxfs/xfs_ag.c
index 778ec52cce70..ee9ec0c50bec 100644
--- a/fs/xfs/libxfs/xfs_ag.c
+++ b/fs/xfs/libxfs/xfs_ag.c
@@ -804,6 +804,14 @@ xfs_ag_shrink_space(
args.fsbno = XFS_AGB_TO_FSB(mp, agno, aglen - delta);
/*
+ * Make sure that the last inode cluster cannot overlap with the new
+ * end of the AG, even if it's sparse.
+ */
+ error = xfs_ialloc_check_shrink(*tpp, agno, agibp, aglen - delta);
+ if (error)
+ return error;
+
+ /*
* Disable perag reservations so it doesn't cause the allocation request
* to fail. We'll reestablish reservation before we return.
*/
diff --git a/fs/xfs/libxfs/xfs_attr.c b/fs/xfs/libxfs/xfs_attr.c
index d9d7d5137b73..191d51725988 100644
--- a/fs/xfs/libxfs/xfs_attr.c
+++ b/fs/xfs/libxfs/xfs_attr.c
@@ -483,7 +483,7 @@ xfs_attr_set_iter(
if (error)
return error;
- /* fallthrough */
+ fallthrough;
case XFS_DAS_RM_LBLK:
/* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */
dac->dela_state = XFS_DAS_RM_LBLK;
@@ -496,7 +496,7 @@ xfs_attr_set_iter(
return -EAGAIN;
}
- /* fallthrough */
+ fallthrough;
case XFS_DAS_RD_LEAF:
/*
* This is the last step for leaf format. Read the block with
@@ -528,7 +528,7 @@ xfs_attr_set_iter(
return error;
}
- /* fallthrough */
+ fallthrough;
case XFS_DAS_ALLOC_NODE:
/*
* If there was an out-of-line value, allocate the blocks we
@@ -590,7 +590,7 @@ xfs_attr_set_iter(
if (error)
return error;
- /* fallthrough */
+ fallthrough;
case XFS_DAS_RM_NBLK:
/* Set state in case xfs_attr_rmtval_remove returns -EAGAIN */
dac->dela_state = XFS_DAS_RM_NBLK;
@@ -603,7 +603,7 @@ xfs_attr_set_iter(
return -EAGAIN;
}
- /* fallthrough */
+ fallthrough;
case XFS_DAS_CLR_FLAG:
/*
* The last state for node format. Look up the old attr and
@@ -1406,7 +1406,7 @@ xfs_attr_remove_iter(
state = dac->da_state;
}
- /* fallthrough */
+ fallthrough;
case XFS_DAS_RMTBLK:
dac->dela_state = XFS_DAS_RMTBLK;
@@ -1441,7 +1441,7 @@ xfs_attr_remove_iter(
return -EAGAIN;
}
- /* fallthrough */
+ fallthrough;
case XFS_DAS_RM_NAME:
/*
* If we came here fresh from a transaction roll, reattach all
@@ -1469,7 +1469,7 @@ xfs_attr_remove_iter(
return -EAGAIN;
}
- /* fallthrough */
+ fallthrough;
case XFS_DAS_RM_SHRINK:
/*
* If the result is small enough, push it all into the inode.
diff --git a/fs/xfs/libxfs/xfs_ialloc.c b/fs/xfs/libxfs/xfs_ialloc.c
index 57d9cb632983..aaf8805a82df 100644
--- a/fs/xfs/libxfs/xfs_ialloc.c
+++ b/fs/xfs/libxfs/xfs_ialloc.c
@@ -2928,3 +2928,58 @@ xfs_ialloc_calc_rootino(
return XFS_AGINO_TO_INO(mp, 0, XFS_AGB_TO_AGINO(mp, first_bno));
}
+
+/*
+ * Ensure there are not sparse inode clusters that cross the new EOAG.
+ *
+ * This is a no-op for non-spinode filesystems since clusters are always fully
+ * allocated and checking the bnobt suffices. However, a spinode filesystem
+ * could have a record where the upper inodes are free blocks. If those blocks
+ * were removed from the filesystem, the inode record would extend beyond EOAG,
+ * which will be flagged as corruption.
+ */
+int
+xfs_ialloc_check_shrink(
+ struct xfs_trans *tp,
+ xfs_agnumber_t agno,
+ struct xfs_buf *agibp,
+ xfs_agblock_t new_length)
+{
+ struct xfs_inobt_rec_incore rec;
+ struct xfs_btree_cur *cur;
+ struct xfs_mount *mp = tp->t_mountp;
+ struct xfs_perag *pag;
+ xfs_agino_t agino = XFS_AGB_TO_AGINO(mp, new_length);
+ int has;
+ int error;
+
+ if (!xfs_sb_version_hassparseinodes(&mp->m_sb))
+ return 0;
+
+ pag = xfs_perag_get(mp, agno);
+ cur = xfs_inobt_init_cursor(mp, tp, agibp, pag, XFS_BTNUM_INO);
+
+ /* Look up the inobt record that would correspond to the new EOFS. */
+ error = xfs_inobt_lookup(cur, agino, XFS_LOOKUP_LE, &has);
+ if (error || !has)
+ goto out;
+
+ error = xfs_inobt_get_rec(cur, &rec, &has);
+ if (error)
+ goto out;
+
+ if (!has) {
+ error = -EFSCORRUPTED;
+ goto out;
+ }
+
+ /* If the record covers inodes that would be beyond EOFS, bail out. */
+ if (rec.ir_startino + XFS_INODES_PER_CHUNK > agino) {
+ error = -ENOSPC;
+ goto out;
+ }
+out:
+ xfs_btree_del_cursor(cur, error);
+ xfs_perag_put(pag);
+ return error;
+}
diff --git a/fs/xfs/libxfs/xfs_ialloc.h b/fs/xfs/libxfs/xfs_ialloc.h
index 9df7c80408ff..9a2112b4ad5e 100644
--- a/fs/xfs/libxfs/xfs_ialloc.h
+++ b/fs/xfs/libxfs/xfs_ialloc.h
@@ -122,4 +122,7 @@ int xfs_ialloc_cluster_alignment(struct xfs_mount *mp);
void xfs_ialloc_setup_geometry(struct xfs_mount *mp);
xfs_ino_t xfs_ialloc_calc_rootino(struct xfs_mount *mp, int sunit);
+int xfs_ialloc_check_shrink(struct xfs_trans *tp, xfs_agnumber_t agno,
+ struct xfs_buf *agibp, xfs_agblock_t new_length);
+
#endif /* __XFS_IALLOC_H__ */
diff --git a/fs/xfs/libxfs/xfs_inode_buf.c b/fs/xfs/libxfs/xfs_inode_buf.c
index 04ce361688f7..84ea2e0af9f0 100644
--- a/fs/xfs/libxfs/xfs_inode_buf.c
+++ b/fs/xfs/libxfs/xfs_inode_buf.c
@@ -592,23 +592,27 @@ xfs_inode_validate_extsize(
/*
* This comment describes a historic gap in this verifier function.
*
- * On older kernels, the extent size hint verifier doesn't check that
- * the extent size hint is an integer multiple of the realtime extent
- * size on a directory with both RTINHERIT and EXTSZINHERIT flags set.
- * The verifier has always enforced the alignment rule for regular
- * files with the REALTIME flag set.
+ * For a directory with both RTINHERIT and EXTSZINHERIT flags set, this
+ * function has never checked that the extent size hint is an integer
+ * multiple of the realtime extent size. Since we allow users to set
+ * this combination on non-rt filesystems /and/ to change the rt
+ * extent size when adding a rt device to a filesystem, the net effect
+ * is that users can configure a filesystem anticipating one rt
+ * geometry and change their minds later. Directories do not use the
+ * extent size hint, so this is harmless for them.
*
* If a directory with a misaligned extent size hint is allowed to
* propagate that hint into a new regular realtime file, the result
* is that the inode cluster buffer verifier will trigger a corruption
- * shutdown the next time it is run.
+ * shutdown the next time it is run, because the verifier has always
+ * enforced the alignment rule for regular files.
*
- * Unfortunately, there could be filesystems with these misconfigured
- * directories in the wild, so we cannot add a check to this verifier
- * at this time because that will result a new source of directory
- * corruption errors when reading an existing filesystem. Instead, we
- * permit the misconfiguration to pass through the verifiers so that
- * callers of this function can correct and mitigate externally.
+ * Because we allow administrators to set a new rt extent size when
+ * adding a rt section, we cannot add a check to this verifier because
+ * that will result a new source of directory corruption errors when
+ * reading an existing filesystem. Instead, we rely on callers to
+ * decide when alignment checks are appropriate, and fix things up as
+ * needed.
*/
if (rt_flag)
diff --git a/fs/xfs/libxfs/xfs_trans_inode.c b/fs/xfs/libxfs/xfs_trans_inode.c
index 8d595a5c4abd..16f723ebe8dd 100644
--- a/fs/xfs/libxfs/xfs_trans_inode.c
+++ b/fs/xfs/libxfs/xfs_trans_inode.c
@@ -143,16 +143,14 @@ xfs_trans_log_inode(
}
/*
- * Inode verifiers on older kernels don't check that the extent size
- * hint is an integer multiple of the rt extent size on a directory
- * with both rtinherit and extszinherit flags set. If we're logging a
- * directory that is misconfigured in this way, clear the hint.
+ * Inode verifiers do not check that the extent size hint is an integer
+ * multiple of the rt extent size on a directory with both rtinherit
+ * and extszinherit flags set. If we're logging a directory that is
+ * misconfigured in this way, clear the hint.
*/
if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
(ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) &&
(ip->i_extsize % ip->i_mount->m_sb.sb_rextsize) > 0) {
- xfs_info_once(ip->i_mount,
- "Correcting misaligned extent size hint in inode 0x%llx.", ip->i_ino);
ip->i_diflags &= ~(XFS_DIFLAG_EXTSIZE |
XFS_DIFLAG_EXTSZINHERIT);
ip->i_extsize = 0;
diff --git a/fs/xfs/scrub/inode.c b/fs/xfs/scrub/inode.c
index 61f90b2c9430..76fbc7ca4cec 100644
--- a/fs/xfs/scrub/inode.c
+++ b/fs/xfs/scrub/inode.c
@@ -73,11 +73,25 @@ xchk_inode_extsize(
uint16_t flags)
{
xfs_failaddr_t fa;
+ uint32_t value = be32_to_cpu(dip->di_extsize);
- fa = xfs_inode_validate_extsize(sc->mp, be32_to_cpu(dip->di_extsize),
- mode, flags);
+ fa = xfs_inode_validate_extsize(sc->mp, value, mode, flags);
if (fa)
xchk_ino_set_corrupt(sc, ino);
+
+ /*
+ * XFS allows a sysadmin to change the rt extent size when adding a rt
+ * section to a filesystem after formatting. If there are any
+ * directories with extszinherit and rtinherit set, the hint could
+ * become misaligned with the new rextsize. The verifier doesn't check
+ * this, because we allow rtinherit directories even without an rt
+ * device. Flag this as an administrative warning since we will clean
+ * this up eventually.
+ */
+ if ((flags & XFS_DIFLAG_RTINHERIT) &&
+ (flags & XFS_DIFLAG_EXTSZINHERIT) &&
+ value % sc->mp->m_sb.sb_rextsize > 0)
+ xchk_ino_set_warning(sc, ino);
}
/*
diff --git a/fs/xfs/xfs_inode.c b/fs/xfs/xfs_inode.c
index a835ceb79ba5..990b72ae3635 100644
--- a/fs/xfs/xfs_inode.c
+++ b/fs/xfs/xfs_inode.c
@@ -2763,6 +2763,19 @@ xfs_remove(
error = xfs_droplink(tp, ip);
if (error)
goto out_trans_cancel;
+
+ /*
+ * Point the unlinked child directory's ".." entry to the root
+ * directory to eliminate back-references to inodes that may
+ * get freed before the child directory is closed. If the fs
+ * gets shrunk, this can lead to dirent inode validation errors.
+ */
+ if (dp->i_ino != tp->t_mountp->m_sb.sb_rootino) {
+ error = xfs_dir_replace(tp, ip, &xfs_name_dotdot,
+ tp->t_mountp->m_sb.sb_rootino, 0);
+ if (error)
+ return error;
+ }
} else {
/*
* When removing a non-directory we need to log the parent
diff --git a/fs/xfs/xfs_ioctl.c b/fs/xfs/xfs_ioctl.c
index 65270e63c032..16039ea10ac9 100644
--- a/fs/xfs/xfs_ioctl.c
+++ b/fs/xfs/xfs_ioctl.c
@@ -1065,7 +1065,24 @@ xfs_fill_fsxattr(
fileattr_fill_xflags(fa, xfs_ip2xflags(ip));
- fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
+ if (ip->i_diflags & XFS_DIFLAG_EXTSIZE) {
+ fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
+ } else if (ip->i_diflags & XFS_DIFLAG_EXTSZINHERIT) {
+ /*
+ * Don't let a misaligned extent size hint on a directory
+ * escape to userspace if it won't pass the setattr checks
+ * later.
+ */
+ if ((ip->i_diflags & XFS_DIFLAG_RTINHERIT) &&
+ ip->i_extsize % mp->m_sb.sb_rextsize > 0) {
+ fa->fsx_xflags &= ~(FS_XFLAG_EXTSIZE |
+ FS_XFLAG_EXTSZINHERIT);
+ fa->fsx_extsize = 0;
+ } else {
+ fa->fsx_extsize = XFS_FSB_TO_B(mp, ip->i_extsize);
+ }
+ }
+
if (ip->i_diflags2 & XFS_DIFLAG2_COWEXTSIZE)
fa->fsx_cowextsize = XFS_FSB_TO_B(mp, ip->i_cowextsize);
fa->fsx_projid = ip->i_projid;
@@ -1292,10 +1309,10 @@ xfs_ioctl_setattr_check_extsize(
new_diflags = xfs_flags2diflags(ip, fa->fsx_xflags);
/*
- * Inode verifiers on older kernels don't check that the extent size
- * hint is an integer multiple of the rt extent size on a directory
- * with both rtinherit and extszinherit flags set. Don't let sysadmins
- * misconfigure directories.
+ * Inode verifiers do not check that the extent size hint is an integer
+ * multiple of the rt extent size on a directory with both rtinherit
+ * and extszinherit flags set. Don't let sysadmins misconfigure
+ * directories.
*/
if ((new_diflags & XFS_DIFLAG_RTINHERIT) &&
(new_diflags & XFS_DIFLAG_EXTSZINHERIT)) {
diff --git a/fs/xfs/xfs_rtalloc.c b/fs/xfs/xfs_rtalloc.c
index 4e7be6b4ca8e..699066fb9052 100644
--- a/fs/xfs/xfs_rtalloc.c
+++ b/fs/xfs/xfs_rtalloc.c
@@ -923,16 +923,41 @@ xfs_growfs_rt(
uint8_t *rsum_cache; /* old summary cache */
sbp = &mp->m_sb;
- /*
- * Initial error checking.
- */
+
if (!capable(CAP_SYS_ADMIN))
return -EPERM;
- if (mp->m_rtdev_targp == NULL || mp->m_rbmip == NULL ||
- (nrblocks = in->newblocks) <= sbp->sb_rblocks ||
- (sbp->sb_rblocks && (in->extsize != sbp->sb_rextsize)))
+
+ /* Needs to have been mounted with an rt device. */
+ if (!XFS_IS_REALTIME_MOUNT(mp))
+ return -EINVAL;
+ /*
+ * Mount should fail if the rt bitmap/summary files don't load, but
+ * we'll check anyway.
+ */
+ if (!mp->m_rbmip || !mp->m_rsumip)
+ return -EINVAL;
+
+ /* Shrink not supported. */
+ if (in->newblocks <= sbp->sb_rblocks)
+ return -EINVAL;
+
+ /* Can only change rt extent size when adding rt volume. */
+ if (sbp->sb_rblocks > 0 && in->extsize != sbp->sb_rextsize)
+ return -EINVAL;
+
+ /* Range check the extent size. */
+ if (XFS_FSB_TO_B(mp, in->extsize) > XFS_MAX_RTEXTSIZE ||
+ XFS_FSB_TO_B(mp, in->extsize) < XFS_MIN_RTEXTSIZE)
return -EINVAL;
- if ((error = xfs_sb_validate_fsb_count(sbp, nrblocks)))
+
+ /* Unsupported realtime features. */
+ if (xfs_sb_version_hasrmapbt(&mp->m_sb) ||
+ xfs_sb_version_hasreflink(&mp->m_sb))
+ return -EOPNOTSUPP;
+
+ nrblocks = in->newblocks;
+ error = xfs_sb_validate_fsb_count(sbp, nrblocks);
+ if (error)
return error;
/*
* Read in the last block of the device, make sure it exists.
@@ -996,7 +1021,8 @@ xfs_growfs_rt(
((sbp->sb_rextents & ((1 << mp->m_blkbit_log) - 1)) != 0);
bmbno < nrbmblocks;
bmbno++) {
- xfs_trans_t *tp;
+ struct xfs_trans *tp;
+ xfs_rfsblock_t nrblocks_step;
*nmp = *mp;
nsbp = &nmp->m_sb;
@@ -1005,10 +1031,9 @@ xfs_growfs_rt(
*/
nsbp->sb_rextsize = in->extsize;
nsbp->sb_rbmblocks = bmbno + 1;
- nsbp->sb_rblocks =
- XFS_RTMIN(nrblocks,
- nsbp->sb_rbmblocks * NBBY *
- nsbp->sb_blocksize * nsbp->sb_rextsize);
+ nrblocks_step = (bmbno + 1) * NBBY * nsbp->sb_blocksize *
+ nsbp->sb_rextsize;
+ nsbp->sb_rblocks = min(nrblocks, nrblocks_step);
nsbp->sb_rextents = nsbp->sb_rblocks;
do_div(nsbp->sb_rextents, nsbp->sb_rextsize);
ASSERT(nsbp->sb_rextents != 0);
diff --git a/fs/zonefs/super.c b/fs/zonefs/super.c
index dbf03635869c..70055d486bf7 100644
--- a/fs/zonefs/super.c
+++ b/fs/zonefs/super.c
@@ -705,9 +705,6 @@ static ssize_t zonefs_file_dio_append(struct kiocb *iocb, struct iov_iter *from)
return 0;
bio = bio_alloc(GFP_NOFS, nr_pages);
- if (!bio)
- return -ENOMEM;
-
bio_set_dev(bio, bdev);
bio->bi_iter.bi_sector = zi->i_zsector;
bio->bi_write_hint = iocb->ki_hint;