diff options
| -rw-r--r-- | Documentation/filesystems/9p.rst | 10 | ||||
| -rw-r--r-- | fs/9p/fid.c | 11 | ||||
| -rw-r--r-- | fs/9p/v9fs.c | 27 | ||||
| -rw-r--r-- | fs/9p/v9fs.h | 28 | ||||
| -rw-r--r-- | fs/9p/v9fs_vfs.h | 15 | ||||
| -rw-r--r-- | fs/9p/vfs_addr.c | 38 | ||||
| -rw-r--r-- | fs/9p/vfs_dentry.c | 105 | ||||
| -rw-r--r-- | fs/9p/vfs_dir.c | 14 | ||||
| -rw-r--r-- | fs/9p/vfs_file.c | 2 | ||||
| -rw-r--r-- | fs/9p/vfs_inode.c | 46 | ||||
| -rw-r--r-- | fs/9p/vfs_inode_dotl.c | 58 | ||||
| -rw-r--r-- | fs/9p/vfs_super.c | 1 | ||||
| -rw-r--r-- | include/net/9p/client.h | 2 | ||||
| -rw-r--r-- | net/9p/client.c | 7 | ||||
| -rw-r--r-- | net/9p/trans_fd.c | 6 | ||||
| -rw-r--r-- | net/9p/trans_rdma.c | 19 | ||||
| -rw-r--r-- | net/9p/trans_usbg.c | 2 | ||||
| -rw-r--r-- | net/9p/trans_virtio.c | 5 |
18 files changed, 321 insertions, 75 deletions
diff --git a/Documentation/filesystems/9p.rst b/Documentation/filesystems/9p.rst index be3504ca034a..3f65db648db0 100644 --- a/Documentation/filesystems/9p.rst +++ b/Documentation/filesystems/9p.rst @@ -23,13 +23,10 @@ the 9p client is available in the form of a USENIX paper: Other applications are described in the following papers: * XCPU & Clustering - http://xcpu.org/papers/xcpu-talk.pdf * KVMFS: control file system for KVM - http://xcpu.org/papers/kvmfs.pdf * CellFS: A New Programming Model for the Cell BE - http://xcpu.org/papers/cellfs-talk.pdf * PROSE I/O: Using 9p to enable Application Partitions - http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf + http://web.archive.org/web/20110101152020/http://plan9.escet.urjc.es/iwp9/cready/PROSE_iwp9_2006.pdf * VirtFS: A Virtualization Aware File System pass-through https://kernel.org/doc/ols/2010/ols2010-pages-109-120.pdf @@ -238,6 +235,11 @@ Options cachetag cache tag to use the specified persistent cache. cache tags for existing cache sessions can be listed at /sys/fs/9p/caches. (applies only to cache=fscache) + + negtimeout the duration (in milliseconds) that negative dentries (paths + that do not actually exist) are retained in the cache. If + set to a negative value, those entries are kept indefinitely + until evicted by the buffer cache management system ============= =============================================================== Behavior diff --git a/fs/9p/fid.c b/fs/9p/fid.c index f84412290a30..76242d450aa7 100644 --- a/fs/9p/fid.c +++ b/fs/9p/fid.c @@ -20,7 +20,9 @@ static inline void __add_fid(struct dentry *dentry, struct p9_fid *fid) { - hlist_add_head(&fid->dlist, (struct hlist_head *)&dentry->d_fsdata); + struct v9fs_dentry *v9fs_dentry = to_v9fs_dentry(dentry); + + hlist_add_head(&fid->dlist, &v9fs_dentry->head); } @@ -112,6 +114,7 @@ void v9fs_open_fid_add(struct inode *inode, struct p9_fid **pfid) static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) { + struct v9fs_dentry *v9fs_dentry = to_v9fs_dentry(dentry); struct p9_fid *fid, *ret; p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p) uid %d any %d\n", @@ -119,11 +122,9 @@ static struct p9_fid *v9fs_fid_find(struct dentry *dentry, kuid_t uid, int any) any); ret = NULL; /* we'll recheck under lock if there's anything to look in */ - if (dentry->d_fsdata) { - struct hlist_head *h = (struct hlist_head *)&dentry->d_fsdata; - + if (!hlist_empty(&v9fs_dentry->head)) { spin_lock(&dentry->d_lock); - hlist_for_each_entry(fid, h, dlist) { + hlist_for_each_entry(fid, &v9fs_dentry->head, dlist) { if (any || uid_eq(fid->uid, uid)) { ret = fid; p9_fid_get(ret); diff --git a/fs/9p/v9fs.c b/fs/9p/v9fs.c index acda42499ca9..274c5157135d 100644 --- a/fs/9p/v9fs.c +++ b/fs/9p/v9fs.c @@ -24,6 +24,9 @@ #include "v9fs_vfs.h" #include "cache.h" +/* cache=loose default negative dentry retention time is 24hours */ +#define CACHE_LOOSE_NDENTRY_TIMEOUT_DEFAULT (24 * 60 * 60 * 1000) + static DEFINE_SPINLOCK(v9fs_sessionlist_lock); static LIST_HEAD(v9fs_sessionlist); struct kmem_cache *v9fs_inode_cache; @@ -39,7 +42,7 @@ enum { * source if we rejected it as EINVAL */ Opt_source, /* Options that take integer arguments */ - Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid, + Opt_debug, Opt_dfltuid, Opt_dfltgid, Opt_afid, Opt_negtimeout, /* String options */ Opt_uname, Opt_remotename, Opt_cache, Opt_cachetag, /* Options that take no arguments */ @@ -93,6 +96,7 @@ const struct fs_parameter_spec v9fs_param_spec[] = { fsparam_string ("access", Opt_access), fsparam_flag ("posixacl", Opt_posixacl), fsparam_u32 ("locktimeout", Opt_locktimeout), + fsparam_s32 ("negtimeout", Opt_negtimeout), /* client options */ fsparam_u32 ("msize", Opt_msize), @@ -159,6 +163,9 @@ int v9fs_show_options(struct seq_file *m, struct dentry *root) from_kgid_munged(&init_user_ns, v9ses->dfltgid)); if (v9ses->afid != ~0) seq_printf(m, ",afid=%u", v9ses->afid); + if (v9ses->flags & V9FS_NDENTRY_TIMEOUT_SET) + seq_printf(m, ",negtimeout=%d", + (int)v9ses->ndentry_timeout_ms); if (strcmp(v9ses->uname, V9FS_DEFUSER) != 0) seq_printf(m, ",uname=%s", v9ses->uname); if (strcmp(v9ses->aname, V9FS_DEFANAME) != 0) @@ -337,6 +344,16 @@ int v9fs_parse_param(struct fs_context *fc, struct fs_parameter *param) session_opts->session_lock_timeout = (long)result.uint_32 * HZ; break; + case Opt_negtimeout: + session_opts->flags |= V9FS_NDENTRY_TIMEOUT_SET; + if (result.int_32 < 0) { + session_opts->ndentry_timeout_ms = + NDENTRY_TIMEOUT_NEVER; + } else { + session_opts->ndentry_timeout_ms = result.int_32; + } + break; + /* Options for client */ case Opt_msize: if (result.uint_32 < 4096) { @@ -426,6 +443,14 @@ static void v9fs_apply_options(struct v9fs_session_info *v9ses, v9ses->cache = ctx->session_opts.cache; v9ses->uid = ctx->session_opts.uid; v9ses->session_lock_timeout = ctx->session_opts.session_lock_timeout; + v9ses->ndentry_timeout_ms = ctx->session_opts.ndentry_timeout_ms; + + /* If negative dentry timeout has not been overridden set default for + * cache=loose + */ + if (!(v9ses->flags & V9FS_NDENTRY_TIMEOUT_SET) && + (v9ses->cache & CACHE_LOOSE)) + v9ses->ndentry_timeout_ms = CACHE_LOOSE_NDENTRY_TIMEOUT_DEFAULT; } /** diff --git a/fs/9p/v9fs.h b/fs/9p/v9fs.h index 6a12445d3858..a462bcbfc7da 100644 --- a/fs/9p/v9fs.h +++ b/fs/9p/v9fs.h @@ -24,6 +24,8 @@ * @V9FS_ACCESS_ANY: use a single attach for all users * @V9FS_ACCESS_MASK: bit mask of different ACCESS options * @V9FS_POSIX_ACL: POSIX ACLs are enforced + * @V9FS_NDENTRY_TIMEOUT_SET: Has negative dentry timeout retention time been + * overridden by negtimeout mount option * * Session flags reflect options selected by users at mount time */ @@ -34,16 +36,17 @@ #define V9FS_ACL_MASK V9FS_POSIX_ACL enum p9_session_flags { - V9FS_PROTO_2000U = 0x01, - V9FS_PROTO_2000L = 0x02, - V9FS_ACCESS_SINGLE = 0x04, - V9FS_ACCESS_USER = 0x08, - V9FS_ACCESS_CLIENT = 0x10, - V9FS_POSIX_ACL = 0x20, - V9FS_NO_XATTR = 0x40, - V9FS_IGNORE_QV = 0x80, /* ignore qid.version for cache hints */ - V9FS_DIRECT_IO = 0x100, - V9FS_SYNC = 0x200 + V9FS_PROTO_2000U = 0x01, + V9FS_PROTO_2000L = 0x02, + V9FS_ACCESS_SINGLE = 0x04, + V9FS_ACCESS_USER = 0x08, + V9FS_ACCESS_CLIENT = 0x10, + V9FS_POSIX_ACL = 0x20, + V9FS_NO_XATTR = 0x40, + V9FS_IGNORE_QV = 0x80, /* ignore qid.version for cache hints */ + V9FS_DIRECT_IO = 0x100, + V9FS_SYNC = 0x200, + V9FS_NDENTRY_TIMEOUT_SET = 0x400, }; /** @@ -91,6 +94,7 @@ enum p9_cache_bits { * @debug: debug level * @afid: authentication handle * @cache: cache mode of type &p9_cache_bits + * @ndentry_timeout: Negative dentry lookup cache retention time in ms * @cachetag: the tag of the cache associated with this session * @fscache: session cookie associated with FS-Cache * @uname: string user name to mount hierarchy as @@ -101,6 +105,7 @@ enum p9_cache_bits { * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy * @clnt: reference to 9P network client instantiated for this session * @slist: reference to list of registered 9p sessions + * @ndentry_timeout_ms: Negative dentry caching retention time * * This structure holds state for each session instance established during * a sys_mount() . @@ -116,6 +121,7 @@ struct v9fs_session_info { unsigned short debug; unsigned int afid; unsigned int cache; + unsigned int ndentry_timeout_ms; #ifdef CONFIG_9P_FSCACHE char *cachetag; struct fscache_volume *fscache; @@ -133,6 +139,8 @@ struct v9fs_session_info { long session_lock_timeout; /* retry interval for blocking locks */ }; +#define NDENTRY_TIMEOUT_NEVER (-1U) + /* cache_validity flags */ #define V9FS_INO_INVALID_ATTR 0x01 diff --git a/fs/9p/v9fs_vfs.h b/fs/9p/v9fs_vfs.h index 34c115d7c250..1856d91f8703 100644 --- a/fs/9p/v9fs_vfs.h +++ b/fs/9p/v9fs_vfs.h @@ -28,6 +28,19 @@ /* flags for v9fs_stat2inode() & v9fs_stat2inode_dotl() */ #define V9FS_STAT2INODE_KEEP_ISIZE 1 +/** + * struct v9fs_dentry - v9fs specific dentry data + * @head: List of fid associated with this dentry + * @expire_time: Lookup cache expiration time for negative dentries + * @rcu: used by kfree_rcu to schedule clean up job + */ +struct v9fs_dentry { + struct hlist_head head; + u64 expire_time; + struct rcu_head rcu; +}; +#define to_v9fs_dentry(d) ((struct v9fs_dentry *)((d)->d_fsdata)) + extern struct file_system_type v9fs_fs_type; extern const struct address_space_operations v9fs_addr_operations; extern const struct file_operations v9fs_file_operations; @@ -35,6 +48,8 @@ extern const struct file_operations v9fs_file_operations_dotl; extern const struct file_operations v9fs_dir_operations; extern const struct file_operations v9fs_dir_operations_dotl; extern const struct dentry_operations v9fs_dentry_operations; +extern void v9fs_ndentry_refresh_timeout(struct dentry *dentry); +extern void v9fs_dentry_fid_remove(struct dentry *dentry); extern const struct dentry_operations v9fs_cached_dentry_operations; extern struct kmem_cache *v9fs_inode_cache; diff --git a/fs/9p/vfs_addr.c b/fs/9p/vfs_addr.c index c21d33830f5f..1ac0b3dcc077 100644 --- a/fs/9p/vfs_addr.c +++ b/fs/9p/vfs_addr.c @@ -70,11 +70,34 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) { struct netfs_io_request *rreq = subreq->rreq; struct p9_fid *fid = rreq->netfs_priv; + char *target; unsigned long long pos = subreq->start + subreq->transferred; - int total, err; - - total = p9_client_read(fid, pos, &subreq->io_iter, &err); + int total = 0, err, len, n; + + if (S_ISLNK(rreq->inode->i_mode)) { + /* p9_client_readlink() must not be called for legacy protocols + * 9p2000 or 9p2000.u. + */ + BUG_ON(!p9_is_proto_dotl(fid->clnt)); + if (WARN_ON_ONCE(pos)) { + /* reading a link at a non null offset should + * not happen + */ + err = -EIO; + goto fill_subreq; + } + err = p9_client_readlink(fid, &target); + if (err != 0) + goto fill_subreq; + len = strlen(target); + n = copy_to_iter(target, len, &subreq->io_iter); + kfree(target); + total = n; + } else { + total = p9_client_read(fid, pos, &subreq->io_iter, &err); + } +fill_subreq: /* if we just extended the file size, any portion not in * cache won't be on server and is zeroes */ if (subreq->rreq->origin != NETFS_UNBUFFERED_READ && @@ -99,6 +122,7 @@ static void v9fs_issue_read(struct netfs_io_subrequest *subreq) static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file) { struct p9_fid *fid; + struct dentry *dentry; bool writing = (rreq->origin == NETFS_READ_FOR_WRITE || rreq->origin == NETFS_WRITETHROUGH || rreq->origin == NETFS_UNBUFFERED_WRITE || @@ -115,6 +139,14 @@ static int v9fs_init_request(struct netfs_io_request *rreq, struct file *file) if (!fid) goto no_fid; p9_fid_get(fid); + } else if (S_ISLNK(rreq->inode->i_mode)) { + dentry = d_find_any_alias(rreq->inode); + if (!dentry) + goto no_fid; + fid = v9fs_fid_lookup(dentry); + dput(dentry); + if (IS_ERR(fid)) + goto no_fid; } else { fid = v9fs_fid_find_inode(rreq->inode, writing, INVALID_UID, true); if (!fid) diff --git a/fs/9p/vfs_dentry.c b/fs/9p/vfs_dentry.c index c5bf74d547e8..e549e222602e 100644 --- a/fs/9p/vfs_dentry.c +++ b/fs/9p/vfs_dentry.c @@ -24,6 +24,46 @@ #include "fid.h" /** + * v9fs_ndentry_is_expired - Check if negative dentry lookup has expired + * + * This should be called to know if a negative dentry should be removed from + * cache. + * + * @dentry: dentry in question + * + */ +static bool v9fs_ndentry_is_expired(struct dentry const *dentry) +{ + struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry); + struct v9fs_dentry *v9fs_dentry = to_v9fs_dentry(dentry); + + if (v9ses->ndentry_timeout_ms == NDENTRY_TIMEOUT_NEVER) + return false; + + return time_before_eq64(v9fs_dentry->expire_time, get_jiffies_64()); +} + +/** + * v9fs_ndentry_refresh_timeout - Refresh negative dentry lookup cache timeout + * + * This should be called when a look up yields a negative entry. + * + * @dentry: dentry in question + * + */ +void v9fs_ndentry_refresh_timeout(struct dentry *dentry) +{ + struct v9fs_session_info *v9ses = v9fs_dentry2v9ses(dentry); + struct v9fs_dentry *v9fs_dentry = to_v9fs_dentry(dentry); + + if (v9ses->ndentry_timeout_ms == NDENTRY_TIMEOUT_NEVER) + return; + + v9fs_dentry->expire_time = get_jiffies_64() + + msecs_to_jiffies(v9ses->ndentry_timeout_ms); +} + +/** * v9fs_cached_dentry_delete - called when dentry refcount equals 0 * @dentry: dentry in question * @@ -33,20 +73,15 @@ static int v9fs_cached_dentry_delete(const struct dentry *dentry) p9_debug(P9_DEBUG_VFS, " dentry: %pd (%p)\n", dentry, dentry); - /* Don't cache negative dentries */ - if (d_really_is_negative(dentry)) - return 1; - return 0; -} + if (!d_really_is_negative(dentry)) + return 0; -/** - * v9fs_dentry_release - called when dentry is going to be freed - * @dentry: dentry that is being release - * - */ + return v9fs_ndentry_is_expired(dentry); +} -static void v9fs_dentry_release(struct dentry *dentry) +static void __v9fs_dentry_fid_remove(struct dentry *dentry) { + struct v9fs_dentry *v9fs_dentry = to_v9fs_dentry(dentry); struct hlist_node *p, *n; struct hlist_head head; @@ -54,13 +89,54 @@ static void v9fs_dentry_release(struct dentry *dentry) dentry, dentry); spin_lock(&dentry->d_lock); - hlist_move_list((struct hlist_head *)&dentry->d_fsdata, &head); + hlist_move_list(&v9fs_dentry->head, &head); spin_unlock(&dentry->d_lock); hlist_for_each_safe(p, n, &head) p9_fid_put(hlist_entry(p, struct p9_fid, dlist)); } +/** + * v9fs_dentry_fid_remove - Release all dentry's fids + * @dentry: dentry in question + * + */ +void v9fs_dentry_fid_remove(struct dentry *dentry) +{ + __v9fs_dentry_fid_remove(dentry); +} + +/** + * v9fs_dentry_init - Initialize v9fs dentry data + * @dentry: dentry in question + * + */ +static int v9fs_dentry_init(struct dentry *dentry) +{ + struct v9fs_dentry *v9fs_dentry = kzalloc(sizeof(*v9fs_dentry), + GFP_KERNEL); + + if (!v9fs_dentry) + return -ENOMEM; + + INIT_HLIST_HEAD(&v9fs_dentry->head); + dentry->d_fsdata = (void *)v9fs_dentry; + return 0; +} + +/** + * v9fs_dentry_release - called when dentry is going to be freed + * @dentry: dentry that is being released + * + */ +static void v9fs_dentry_release(struct dentry *dentry) +{ + struct v9fs_dentry *v9fs_dentry = to_v9fs_dentry(dentry); + + __v9fs_dentry_fid_remove(dentry); + kfree_rcu(v9fs_dentry, rcu); +} + static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) { struct p9_fid *fid; @@ -72,7 +148,7 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) inode = d_inode(dentry); if (!inode) - goto out_valid; + return !v9fs_ndentry_is_expired(dentry); v9inode = V9FS_I(inode); if (v9inode->cache_validity & V9FS_INO_INVALID_ATTR) { @@ -112,7 +188,6 @@ static int __v9fs_lookup_revalidate(struct dentry *dentry, unsigned int flags) return retval; } } -out_valid: p9_debug(P9_DEBUG_VFS, "dentry: %pd (%p) is valid\n", dentry, dentry); return 1; } @@ -139,12 +214,14 @@ const struct dentry_operations v9fs_cached_dentry_operations = { .d_revalidate = v9fs_lookup_revalidate, .d_weak_revalidate = __v9fs_lookup_revalidate, .d_delete = v9fs_cached_dentry_delete, + .d_init = v9fs_dentry_init, .d_release = v9fs_dentry_release, .d_unalias_trylock = v9fs_dentry_unalias_trylock, .d_unalias_unlock = v9fs_dentry_unalias_unlock, }; const struct dentry_operations v9fs_dentry_operations = { + .d_init = v9fs_dentry_init, .d_release = v9fs_dentry_release, .d_unalias_trylock = v9fs_dentry_unalias_trylock, .d_unalias_unlock = v9fs_dentry_unalias_unlock, diff --git a/fs/9p/vfs_dir.c b/fs/9p/vfs_dir.c index e0d34e4e9076..323f85352f6a 100644 --- a/fs/9p/vfs_dir.c +++ b/fs/9p/vfs_dir.c @@ -27,6 +27,7 @@ * struct p9_rdir - readdir accounting * @head: start offset of current dirread buffer * @tail: end offset of current dirread buffer + * @offset: file position the data at @head corresponds to * @buf: dirread buffer * * private structure for keeping track of readdir @@ -36,6 +37,7 @@ struct p9_rdir { int head; int tail; + loff_t offset; uint8_t buf[]; }; @@ -70,7 +72,7 @@ static struct p9_rdir *v9fs_alloc_rdir_buf(struct file *filp, int buflen) struct p9_fid *fid = filp->private_data; if (!fid->rdir) - fid->rdir = kzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); + fid->rdir = kvzalloc(sizeof(struct p9_rdir) + buflen, GFP_KERNEL); return fid->rdir; } @@ -102,6 +104,9 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) kvec.iov_base = rdir->buf; kvec.iov_len = buflen; + if (rdir->head < rdir->tail && rdir->offset != ctx->pos) + rdir->head = rdir->tail = 0; + while (1) { if (rdir->tail == rdir->head) { struct iov_iter to; @@ -117,6 +122,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) rdir->head = 0; rdir->tail = n; + rdir->offset = ctx->pos; } while (rdir->head < rdir->tail) { err = p9stat_read(fid->clnt, rdir->buf + rdir->head, @@ -134,6 +140,7 @@ static int v9fs_dir_readdir(struct file *file, struct dir_context *ctx) rdir->head += err; ctx->pos += err; + rdir->offset = ctx->pos; } } } @@ -161,6 +168,9 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx) if (!rdir) return -ENOMEM; + if (rdir->head < rdir->tail && rdir->offset != ctx->pos) + rdir->head = rdir->tail = 0; + while (1) { if (rdir->tail == rdir->head) { err = p9_client_readdir(fid, rdir->buf, buflen, @@ -170,6 +180,7 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx) rdir->head = 0; rdir->tail = err; + rdir->offset = ctx->pos; } while (rdir->head < rdir->tail) { @@ -190,6 +201,7 @@ static int v9fs_dir_readdir_dotl(struct file *file, struct dir_context *ctx) ctx->pos = curdirent.d_off; rdir->head += err; + rdir->offset = ctx->pos; } } } diff --git a/fs/9p/vfs_file.c b/fs/9p/vfs_file.c index c5e73c37baea..cddfb4b7ce4e 100644 --- a/fs/9p/vfs_file.c +++ b/fs/9p/vfs_file.c @@ -198,7 +198,7 @@ static int v9fs_file_do_lock(struct file *filp, int cmd, struct file_lock *fl) res = -EAGAIN; break; default: - WARN_ONCE(1, "unknown lock status code: %d\n", status); + p9_debug(P9_DEBUG_ERROR, "unknown lock status code: %d\n", status); fallthrough; case P9_LOCK_ERROR: case P9_LOCK_GRACE: diff --git a/fs/9p/vfs_inode.c b/fs/9p/vfs_inode.c index f468acb8ee7d..5783d0336f96 100644 --- a/fs/9p/vfs_inode.c +++ b/fs/9p/vfs_inode.c @@ -302,10 +302,12 @@ int v9fs_init_inode(struct v9fs_session_info *v9ses, goto error; } - if (v9fs_proto_dotl(v9ses)) + if (v9fs_proto_dotl(v9ses)) { inode->i_op = &v9fs_symlink_inode_operations_dotl; - else + inode_nohighmem(inode); + } else { inode->i_op = &v9fs_symlink_inode_operations; + } break; case S_IFDIR: @@ -488,10 +490,19 @@ static int v9fs_at_to_dotl_flags(int flags) * - ext4 (with dir_nlink feature enabled) sets nlink to 1 if a dir has more * than EXT4_LINK_MAX (65000) links. * + * In cacheless mode the server is the source of truth for nlink and the + * inode is going away immediately, so locally adjusting i_nlink buys + * nothing and races with concurrent metadata fetches that may already + * have observed the post-unlink value (nlink == 0). + * * @inode: inode whose nlink is being dropped */ static void v9fs_dec_count(struct inode *inode) { + struct v9fs_session_info *v9ses = v9fs_inode2v9ses(inode); + + if (!(v9ses->cache & (CACHE_META | CACHE_LOOSE))) + return; if (!S_ISDIR(inode->i_mode) || inode->i_nlink > 2) drop_nlink(inode); } @@ -549,7 +560,7 @@ static int v9fs_remove(struct inode *dir, struct dentry *dentry, int flags) /* invalidate all fids associated with dentry */ /* NOTE: This will not include open fids */ - dentry->d_op->d_release(dentry); + v9fs_dentry_fid_remove(dentry); } return retval; } @@ -672,27 +683,20 @@ v9fs_vfs_create(struct mnt_idmap *idmap, struct inode *dir, static struct dentry *v9fs_vfs_mkdir(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t mode) { - int err; u32 perm; struct p9_fid *fid; struct v9fs_session_info *v9ses; p9_debug(P9_DEBUG_VFS, "name %pd\n", dentry); - err = 0; v9ses = v9fs_inode2v9ses(dir); perm = unixmode2p9mode(v9ses, mode | S_IFDIR); fid = v9fs_create(v9ses, dir, dentry, NULL, perm, P9_OREAD); - if (IS_ERR(fid)) { - err = PTR_ERR(fid); - fid = NULL; - } else { - inc_nlink(dir); - v9fs_invalidate_inode_attr(dir); - } - - if (fid) - p9_fid_put(fid); - return ERR_PTR(err); + if (IS_ERR(fid)) + return ERR_CAST(fid); + inc_nlink(dir); + v9fs_invalidate_inode_attr(dir); + p9_fid_put(fid); + return NULL; } /** @@ -732,14 +736,16 @@ struct dentry *v9fs_vfs_lookup(struct inode *dir, struct dentry *dentry, name = dentry->d_name.name; fid = p9_client_walk(dfid, 1, &name, 1); p9_fid_put(dfid); - if (fid == ERR_PTR(-ENOENT)) + if (fid == ERR_PTR(-ENOENT)) { inode = NULL; - else if (IS_ERR(fid)) + v9fs_ndentry_refresh_timeout(dentry); + } else if (IS_ERR(fid)) { inode = ERR_CAST(fid); - else if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) + } else if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) { inode = v9fs_get_inode_from_fid(v9ses, fid, dir->i_sb); - else + } else { inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); + } /* * If we had a rename on the server and a parallel lookup * for the new name, then make sure we instantiate with diff --git a/fs/9p/vfs_inode_dotl.c b/fs/9p/vfs_inode_dotl.c index 141fb54db65d..f7396d20cb6c 100644 --- a/fs/9p/vfs_inode_dotl.c +++ b/fs/9p/vfs_inode_dotl.c @@ -349,7 +349,7 @@ static struct dentry *v9fs_vfs_mkdir_dotl(struct mnt_idmap *idmap, struct inode *dir, struct dentry *dentry, umode_t omode) { - int err; + int err = 0; struct v9fs_session_info *v9ses; struct p9_fid *fid = NULL, *dfid = NULL; kgid_t gid; @@ -412,7 +412,7 @@ error: p9_fid_put(fid); v9fs_put_acl(dacl, pacl); p9_fid_put(dfid); - return ERR_PTR(err); + return err ? ERR_PTR(err) : NULL; } static int @@ -690,9 +690,11 @@ v9fs_vfs_symlink_dotl(struct mnt_idmap *idmap, struct inode *dir, int err; kgid_t gid; const unsigned char *name; + struct v9fs_session_info *v9ses; struct p9_qid qid; struct p9_fid *dfid; struct p9_fid *fid = NULL; + struct inode *inode; name = dentry->d_name.name; p9_debug(P9_DEBUG_VFS, "%llu,%s,%s\n", dir->i_ino, name, symname); @@ -716,6 +718,26 @@ v9fs_vfs_symlink_dotl(struct mnt_idmap *idmap, struct inode *dir, v9fs_invalidate_inode_attr(dir); + /* instantiate inode and assign the unopened fid to the dentry */ + fid = p9_client_walk(dfid, 1, &name, 1); + if (IS_ERR(fid)) { + err = PTR_ERR(fid); + p9_debug(P9_DEBUG_VFS, "p9_client_walk failed %d\n", + err); + goto error; + } + + v9ses = v9fs_inode2v9ses(dir); + inode = v9fs_get_new_inode_from_fid(v9ses, fid, dir->i_sb); + if (IS_ERR(inode)) { + err = PTR_ERR(inode); + p9_debug(P9_DEBUG_VFS, "inode creation failed %d\n", + err); + goto error; + } + v9fs_fid_add(dentry, &fid); + d_instantiate(dentry, inode); + err = 0; error: p9_fid_put(fid); p9_fid_put(dfid); @@ -857,16 +879,18 @@ error: } /** - * v9fs_vfs_get_link_dotl - follow a symlink path + * v9fs_vfs_get_link_nocache_dotl - Resolve a symlink directly. + * + * To be used when symlink caching is not enabled. + * * @dentry: dentry for symlink * @inode: inode for symlink * @done: destructor for return value */ - static const char * -v9fs_vfs_get_link_dotl(struct dentry *dentry, - struct inode *inode, - struct delayed_call *done) +v9fs_vfs_get_link_nocache_dotl(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) { struct p9_fid *fid; char *target; @@ -888,6 +912,26 @@ v9fs_vfs_get_link_dotl(struct dentry *dentry, return target; } +/** + * v9fs_vfs_get_link_dotl - follow a symlink path + * @dentry: dentry for symlink + * @inode: inode for symlink + * @done: destructor for return value + */ +static const char * +v9fs_vfs_get_link_dotl(struct dentry *dentry, + struct inode *inode, + struct delayed_call *done) +{ + struct v9fs_session_info *v9ses; + + v9ses = v9fs_inode2v9ses(inode); + if (v9ses->cache & (CACHE_META|CACHE_LOOSE)) + return page_get_link(dentry, inode, done); + + return v9fs_vfs_get_link_nocache_dotl(dentry, inode, done); +} + int v9fs_refresh_inode_dotl(struct p9_fid *fid, struct inode *inode) { struct p9_stat_dotl *st; diff --git a/fs/9p/vfs_super.c b/fs/9p/vfs_super.c index 431f24938a1d..94d6b02c221b 100644 --- a/fs/9p/vfs_super.c +++ b/fs/9p/vfs_super.c @@ -330,6 +330,7 @@ static int v9fs_init_fs_context(struct fs_context *fc) ctx->session_opts.uid = INVALID_UID; ctx->session_opts.dfltuid = V9FS_DEFUID; ctx->session_opts.dfltgid = V9FS_DEFGID; + ctx->session_opts.ndentry_timeout_ms = 0; /* initialize client options */ ctx->client_opts.proto_version = p9_proto_2000L; diff --git a/include/net/9p/client.h b/include/net/9p/client.h index 838a94218b59..55c6cb54bd25 100644 --- a/include/net/9p/client.h +++ b/include/net/9p/client.h @@ -192,6 +192,7 @@ struct p9_rdma_opts { * @dfltgid: default numeric groupid to mount hierarchy as * @uid: if %V9FS_ACCESS_SINGLE, the numeric uid which mounted the hierarchy * @session_lock_timeout: retry interval for blocking locks + * @ndentry_timeout_ms: Negative dentry lookup cache retention time in ms * * This strucure holds options which are parsed and will be transferred * to the v9fs_session_info structure when mounted, and therefore largely @@ -203,6 +204,7 @@ struct p9_session_opts { unsigned short debug; unsigned int afid; unsigned int cache; + unsigned int ndentry_timeout_ms; #ifdef CONFIG_9P_FSCACHE char *cachetag; #endif diff --git a/net/9p/client.c b/net/9p/client.c index f0dcf252af7e..ef64546c6d52 100644 --- a/net/9p/client.c +++ b/net/9p/client.c @@ -600,6 +600,8 @@ again: if (err == -ERESTARTSYS && c->status == Connected && type == P9_TFLUSH) { + if (fatal_signal_pending(current)) + goto recalc_sigpending; sigpending = 1; clear_thread_flag(TIF_SIGPENDING); goto again; @@ -765,7 +767,7 @@ static void p9_fid_destroy(struct p9_fid *fid) spin_lock_irqsave(&clnt->lock, flags); idr_remove(&clnt->fids, fid->fid); spin_unlock_irqrestore(&clnt->lock, flags); - kfree(fid->rdir); + kvfree(fid->rdir); kfree(fid); } @@ -1092,7 +1094,8 @@ struct p9_fid *p9_client_walk(struct p9_fid *oldfid, uint16_t nwname, clunk_fid: kfree(wqids); - p9_fid_put(fid); + if (fid != oldfid) + p9_fid_put(fid); fid = NULL; error: diff --git a/net/9p/trans_fd.c b/net/9p/trans_fd.c index dbad3213ba84..eb685b52aeb2 100644 --- a/net/9p/trans_fd.c +++ b/net/9p/trans_fd.c @@ -940,14 +940,12 @@ p9_fd_create_unix(struct p9_client *client, struct fs_context *fc) if (!addr || !strlen(addr)) return -EINVAL; - if (strlen(addr) >= UNIX_PATH_MAX) { + sun_server.sun_family = PF_UNIX; + if (strscpy(sun_server.sun_path, addr) < 0) { pr_err("%s (%d): address too long: %s\n", __func__, task_pid_nr(current), addr); return -ENAMETOOLONG; } - - sun_server.sun_family = PF_UNIX; - strcpy(sun_server.sun_path, addr); err = __sock_create(current->nsproxy->net_ns, PF_UNIX, SOCK_STREAM, 0, &csocket, 1); if (err < 0) { diff --git a/net/9p/trans_rdma.c b/net/9p/trans_rdma.c index aa5bd74d333f..b4274f10fa44 100644 --- a/net/9p/trans_rdma.c +++ b/net/9p/trans_rdma.c @@ -128,25 +128,36 @@ p9_cm_event_handler(struct rdma_cm_id *id, struct rdma_cm_event *event) { struct p9_client *c = id->context; struct p9_trans_rdma *rdma = c->trans; + unsigned long flags; + switch (event->event) { case RDMA_CM_EVENT_ADDR_RESOLVED: + spin_lock_irqsave(&rdma->req_lock, flags); BUG_ON(rdma->state != P9_RDMA_INIT); rdma->state = P9_RDMA_ADDR_RESOLVED; + spin_unlock_irqrestore(&rdma->req_lock, flags); break; case RDMA_CM_EVENT_ROUTE_RESOLVED: + spin_lock_irqsave(&rdma->req_lock, flags); BUG_ON(rdma->state != P9_RDMA_ADDR_RESOLVED); rdma->state = P9_RDMA_ROUTE_RESOLVED; + spin_unlock_irqrestore(&rdma->req_lock, flags); break; case RDMA_CM_EVENT_ESTABLISHED: + spin_lock_irqsave(&rdma->req_lock, flags); BUG_ON(rdma->state != P9_RDMA_ROUTE_RESOLVED); rdma->state = P9_RDMA_CONNECTED; + spin_unlock_irqrestore(&rdma->req_lock, flags); break; case RDMA_CM_EVENT_DISCONNECTED: - if (rdma) + if (rdma) { + spin_lock_irqsave(&rdma->req_lock, flags); rdma->state = P9_RDMA_CLOSED; + spin_unlock_irqrestore(&rdma->req_lock, flags); + } c->status = Disconnected; break; @@ -184,6 +195,7 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc) struct p9_req_t *req; int err = 0; int16_t tag; + unsigned long flags; req = NULL; ib_dma_unmap_single(rdma->cm_id->device, c->busa, client->msize, @@ -220,7 +232,10 @@ recv_done(struct ib_cq *cq, struct ib_wc *wc) err_out: p9_debug(P9_DEBUG_ERROR, "req %p err %d status %d\n", req, err, wc->status); - rdma->state = P9_RDMA_FLUSHING; + spin_lock_irqsave(&rdma->req_lock, flags); + if (rdma->state < P9_RDMA_FLUSHING) + rdma->state = P9_RDMA_FLUSHING; + spin_unlock_irqrestore(&rdma->req_lock, flags); client->status = Disconnected; goto out; } diff --git a/net/9p/trans_usbg.c b/net/9p/trans_usbg.c index 1ce70338999c..419cda13a7b5 100644 --- a/net/9p/trans_usbg.c +++ b/net/9p/trans_usbg.c @@ -804,7 +804,7 @@ static void usb9pfs_attr_release(struct config_item *item) usb_put_function_instance(&usb9pfs_opts->func_inst); } -static struct configfs_item_operations usb9pfs_item_ops = { +static const struct configfs_item_operations usb9pfs_item_ops = { .release = usb9pfs_attr_release, }; diff --git a/net/9p/trans_virtio.c b/net/9p/trans_virtio.c index 4cdab7094b27..b0d0094ec8e2 100644 --- a/net/9p/trans_virtio.c +++ b/net/9p/trans_virtio.c @@ -532,6 +532,11 @@ req_retry_pinned: p9_debug(P9_DEBUG_TRANS, "virtio request kicked\n"); err = io_wait_event_killable(req->wq, READ_ONCE(req->status) >= REQ_STATUS_RCVD); + /* + * Make sure our req is coherent with regard to updates in other + * threads - echoes to wmb() in the callback + */ + smp_rmb(); // RERROR needs reply (== error string) in static data if (READ_ONCE(req->status) == REQ_STATUS_RCVD && unlikely(req->rc.sdata[4] == P9_RERROR)) |
