diff options
author | Anand V. Avati <avati@redhat.com> | 2012-08-19 08:53:23 -0400 |
---|---|---|
committer | Miklos Szeredi <mszeredi@suse.cz> | 2013-01-24 16:21:25 +0100 |
commit | 0b05b18381eea98c9c9ada95629bf659a88c9374 (patch) | |
tree | a6389eaffda03a2e28cb05be242e03ef839fcb91 /fs/fuse/dir.c | |
parent | ff7532ca2c631e7e96dcd305a967b610259dc0ea (diff) |
fuse: implement NFS-like readdirplus support
This patch implements readdirplus support in FUSE, similar to NFS.
The payload returned in the readdirplus call contains
'fuse_entry_out' structure thereby providing all the necessary inputs
for 'faking' a lookup() operation on the spot.
If the dentry and inode already existed (for e.g. in a re-run of ls -l)
then just the inode attributes timeout and dentry timeout are refreshed.
With a simple client->network->server implementation of a FUSE based
filesystem, the following performance observations were made:
Test: Performing a filesystem crawl over 20,000 files with
sh# time ls -lR /mnt
Without readdirplus:
Run 1: 18.1s
Run 2: 16.0s
Run 3: 16.2s
With readdirplus:
Run 1: 4.1s
Run 2: 3.8s
Run 3: 3.8s
The performance improvement is significant as it avoided 20,000 upcalls
calls (lookup). Cache consistency is no worse than what already is.
Signed-off-by: Anand V. Avati <avati@redhat.com>
Signed-off-by: Miklos Szeredi <mszeredi@suse.cz>
Diffstat (limited to 'fs/fuse/dir.c')
-rw-r--r-- | fs/fuse/dir.c | 160 |
1 files changed, 156 insertions, 4 deletions
diff --git a/fs/fuse/dir.c b/fs/fuse/dir.c index b7c09f9eb40c..dcc1e522c7d4 100644 --- a/fs/fuse/dir.c +++ b/fs/fuse/dir.c @@ -1155,6 +1155,143 @@ static int parse_dirfile(char *buf, size_t nbytes, struct file *file, return 0; } +static int fuse_direntplus_link(struct file *file, + struct fuse_direntplus *direntplus, + u64 attr_version) +{ + int err; + struct fuse_entry_out *o = &direntplus->entry_out; + struct fuse_dirent *dirent = &direntplus->dirent; + struct dentry *parent = file->f_path.dentry; + struct qstr name = QSTR_INIT(dirent->name, dirent->namelen); + struct dentry *dentry; + struct dentry *alias; + struct inode *dir = parent->d_inode; + struct fuse_conn *fc; + struct inode *inode; + + if (!o->nodeid) { + /* + * Unlike in the case of fuse_lookup, zero nodeid does not mean + * ENOENT. Instead, it only means the userspace filesystem did + * not want to return attributes/handle for this entry. + * + * So do nothing. + */ + return 0; + } + + if (name.name[0] == '.') { + /* + * We could potentially refresh the attributes of the directory + * and its parent? + */ + if (name.len == 1) + return 0; + if (name.name[1] == '.' && name.len == 2) + return 0; + } + fc = get_fuse_conn(dir); + + name.hash = full_name_hash(name.name, name.len); + dentry = d_lookup(parent, &name); + if (dentry && dentry->d_inode) { + inode = dentry->d_inode; + if (get_node_id(inode) == o->nodeid) { + struct fuse_inode *fi; + fi = get_fuse_inode(inode); + spin_lock(&fc->lock); + fi->nlookup++; + spin_unlock(&fc->lock); + + /* + * The other branch to 'found' comes via fuse_iget() + * which bumps nlookup inside + */ + goto found; + } + err = d_invalidate(dentry); + if (err) + goto out; + dput(dentry); + dentry = NULL; + } + + dentry = d_alloc(parent, &name); + err = -ENOMEM; + if (!dentry) + goto out; + + inode = fuse_iget(dir->i_sb, o->nodeid, o->generation, + &o->attr, entry_attr_timeout(o), attr_version); + if (!inode) + goto out; + + alias = d_materialise_unique(dentry, inode); + err = PTR_ERR(alias); + if (IS_ERR(alias)) + goto out; + if (alias) { + dput(dentry); + dentry = alias; + } + +found: + fuse_change_attributes(inode, &o->attr, entry_attr_timeout(o), + attr_version); + + fuse_change_entry_timeout(dentry, o); + + err = 0; +out: + if (dentry) + dput(dentry); + return err; +} + +static int parse_dirplusfile(char *buf, size_t nbytes, struct file *file, + void *dstbuf, filldir_t filldir, u64 attr_version) +{ + struct fuse_direntplus *direntplus; + struct fuse_dirent *dirent; + size_t reclen; + int over = 0; + int ret; + + while (nbytes >= FUSE_NAME_OFFSET_DIRENTPLUS) { + direntplus = (struct fuse_direntplus *) buf; + dirent = &direntplus->dirent; + reclen = FUSE_DIRENTPLUS_SIZE(direntplus); + + if (!dirent->namelen || dirent->namelen > FUSE_NAME_MAX) + return -EIO; + if (reclen > nbytes) + break; + + if (!over) { + /* We fill entries into dstbuf only as much as + it can hold. But we still continue iterating + over remaining entries to link them. If not, + we need to send a FORGET for each of those + which we did not link. + */ + over = filldir(dstbuf, dirent->name, dirent->namelen, + file->f_pos, dirent->ino, + dirent->type); + file->f_pos = dirent->off; + } + + buf += reclen; + nbytes -= reclen; + + ret = fuse_direntplus_link(file, direntplus, attr_version); + if (ret) + fuse_force_forget(file, direntplus->entry_out.nodeid); + } + + return 0; +} + static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) { int err; @@ -1163,6 +1300,7 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) struct inode *inode = file->f_path.dentry->d_inode; struct fuse_conn *fc = get_fuse_conn(inode); struct fuse_req *req; + u64 attr_version = 0; if (is_bad_inode(inode)) return -EIO; @@ -1179,14 +1317,28 @@ static int fuse_readdir(struct file *file, void *dstbuf, filldir_t filldir) req->out.argpages = 1; req->num_pages = 1; req->pages[0] = page; - fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, FUSE_READDIR); + if (fc->do_readdirplus) { + attr_version = fuse_get_attr_version(fc); + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, + FUSE_READDIRPLUS); + } else { + fuse_read_fill(req, file, file->f_pos, PAGE_SIZE, + FUSE_READDIR); + } fuse_request_send(fc, req); nbytes = req->out.args[0].size; err = req->out.h.error; fuse_put_request(fc, req); - if (!err) - err = parse_dirfile(page_address(page), nbytes, file, dstbuf, - filldir); + if (!err) { + if (fc->do_readdirplus) { + err = parse_dirplusfile(page_address(page), nbytes, + file, dstbuf, filldir, + attr_version); + } else { + err = parse_dirfile(page_address(page), nbytes, file, + dstbuf, filldir); + } + } __free_page(page); fuse_invalidate_attr(inode); /* atime changed */ |