summaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Documentation/filesystems/Locking4
-rw-r--r--Documentation/filesystems/vfs.txt11
-rw-r--r--fs/internal.h5
-rw-r--r--fs/namei.c203
-rw-r--r--fs/open.c42
-rw-r--r--include/linux/fs.h7
6 files changed, 270 insertions, 2 deletions
diff --git a/Documentation/filesystems/Locking b/Documentation/filesystems/Locking
index 8e2da1e06e3b..8157488c3463 100644
--- a/Documentation/filesystems/Locking
+++ b/Documentation/filesystems/Locking
@@ -62,6 +62,9 @@ ata *);
int (*removexattr) (struct dentry *, const char *);
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start, u64 len);
void (*update_time)(struct inode *, struct timespec *, int);
+ struct file * (*atomic_open)(struct inode *, struct dentry *,
+ struct opendata *, unsigned open_flag,
+ umode_t create_mode, bool *created);
locking rules:
all may block
@@ -89,6 +92,7 @@ listxattr: no
removexattr: yes
fiemap: no
update_time: no
+atomic_open: yes
Additionally, ->rmdir(), ->unlink() and ->rename() have ->i_mutex on
victim.
diff --git a/Documentation/filesystems/vfs.txt b/Documentation/filesystems/vfs.txt
index efd23f481704..beb6e691f70a 100644
--- a/Documentation/filesystems/vfs.txt
+++ b/Documentation/filesystems/vfs.txt
@@ -364,6 +364,9 @@ struct inode_operations {
ssize_t (*listxattr) (struct dentry *, char *, size_t);
int (*removexattr) (struct dentry *, const char *);
void (*update_time)(struct inode *, struct timespec *, int);
+ struct file * (*atomic_open)(struct inode *, struct dentry *,
+ struct opendata *, unsigned open_flag,
+ umode_t create_mode, bool *created);
};
Again, all methods are called without any locks being held, unless
@@ -476,6 +479,14 @@ otherwise noted.
an inode. If this is not defined the VFS will update the inode itself
and call mark_inode_dirty_sync.
+ atomic_open: called on the last component of an open. Using this optional
+ method the filesystem can look up, possibly create and open the file in
+ one atomic operation. If it cannot perform this (e.g. the file type
+ turned out to be wrong) it may signal this by returning NULL instead of
+ an open struct file pointer. This method is only called if the last
+ component is negative or needs lookup. Cached positive dentries are
+ still handled by f_op->open().
+
The Address Space Object
========================
diff --git a/fs/internal.h b/fs/internal.h
index d2a23ff61b40..70067775df2e 100644
--- a/fs/internal.h
+++ b/fs/internal.h
@@ -85,6 +85,11 @@ extern struct super_block *user_get_super(dev_t);
struct nameidata;
extern struct file *nameidata_to_filp(struct nameidata *);
extern void release_open_intent(struct nameidata *);
+struct opendata {
+ struct dentry *dentry;
+ struct vfsmount *mnt;
+ struct file **filp;
+};
struct open_flags {
int open_flag;
umode_t mode;
diff --git a/fs/namei.c b/fs/namei.c
index ccb0eb17f528..9e11ae83bff6 100644
--- a/fs/namei.c
+++ b/fs/namei.c
@@ -2196,6 +2196,176 @@ static inline int open_to_namei_flags(int flag)
return flag;
}
+static int may_o_create(struct path *dir, struct dentry *dentry, umode_t mode)
+{
+ int error = security_path_mknod(dir, dentry, mode, 0);
+ if (error)
+ return error;
+
+ error = inode_permission(dir->dentry->d_inode, MAY_WRITE | MAY_EXEC);
+ if (error)
+ return error;
+
+ return security_inode_create(dir->dentry->d_inode, dentry, mode);
+}
+
+static struct file *atomic_open(struct nameidata *nd, struct dentry *dentry,
+ struct path *path, const struct open_flags *op,
+ int *want_write, bool need_lookup,
+ bool *created)
+{
+ struct inode *dir = nd->path.dentry->d_inode;
+ unsigned open_flag = open_to_namei_flags(op->open_flag);
+ umode_t mode;
+ int error;
+ int acc_mode;
+ struct opendata od;
+ struct file *filp;
+ int create_error = 0;
+ struct dentry *const DENTRY_NOT_SET = (void *) -1UL;
+
+ BUG_ON(dentry->d_inode);
+
+ /* Don't create child dentry for a dead directory. */
+ if (unlikely(IS_DEADDIR(dir))) {
+ filp = ERR_PTR(-ENOENT);
+ goto out;
+ }
+
+ mode = op->mode & S_IALLUGO;
+ if ((open_flag & O_CREAT) && !IS_POSIXACL(dir))
+ mode &= ~current_umask();
+
+ if (open_flag & O_EXCL) {
+ open_flag &= ~O_TRUNC;
+ *created = true;
+ }
+
+ /*
+ * Checking write permission is tricky, bacuse we don't know if we are
+ * going to actually need it: O_CREAT opens should work as long as the
+ * file exists. But checking existence breaks atomicity. The trick is
+ * to check access and if not granted clear O_CREAT from the flags.
+ *
+ * Another problem is returing the "right" error value (e.g. for an
+ * O_EXCL open we want to return EEXIST not EROFS).
+ */
+ if ((open_flag & (O_CREAT | O_TRUNC)) ||
+ (open_flag & O_ACCMODE) != O_RDONLY) {
+ error = mnt_want_write(nd->path.mnt);
+ if (!error) {
+ *want_write = 1;
+ } else if (!(open_flag & O_CREAT)) {
+ /*
+ * No O_CREATE -> atomicity not a requirement -> fall
+ * back to lookup + open
+ */
+ goto no_open;
+ } else if (open_flag & (O_EXCL | O_TRUNC)) {
+ /* Fall back and fail with the right error */
+ create_error = error;
+ goto no_open;
+ } else {
+ /* No side effects, safe to clear O_CREAT */
+ create_error = error;
+ open_flag &= ~O_CREAT;
+ }
+ }
+
+ if (open_flag & O_CREAT) {
+ error = may_o_create(&nd->path, dentry, op->mode);
+ if (error) {
+ create_error = error;
+ if (open_flag & O_EXCL)
+ goto no_open;
+ open_flag &= ~O_CREAT;
+ }
+ }
+
+ if (nd->flags & LOOKUP_DIRECTORY)
+ open_flag |= O_DIRECTORY;
+
+ od.dentry = DENTRY_NOT_SET;
+ od.mnt = nd->path.mnt;
+ od.filp = &nd->intent.open.file;
+ filp = dir->i_op->atomic_open(dir, dentry, &od, open_flag, mode,
+ created);
+ if (IS_ERR(filp)) {
+ if (WARN_ON(od.dentry != DENTRY_NOT_SET))
+ dput(od.dentry);
+
+ if (create_error && PTR_ERR(filp) == -ENOENT)
+ filp = ERR_PTR(create_error);
+ goto out;
+ }
+
+ acc_mode = op->acc_mode;
+ if (*created) {
+ fsnotify_create(dir, dentry);
+ acc_mode = MAY_OPEN;
+ }
+
+ if (!filp) {
+ if (WARN_ON(od.dentry == DENTRY_NOT_SET)) {
+ filp = ERR_PTR(-EIO);
+ goto out;
+ }
+ if (od.dentry) {
+ dput(dentry);
+ dentry = od.dentry;
+ }
+ goto looked_up;
+ }
+
+ /*
+ * We didn't have the inode before the open, so check open permission
+ * here.
+ */
+ error = may_open(&filp->f_path, acc_mode, open_flag);
+ if (error)
+ goto out_fput;
+
+ error = open_check_o_direct(filp);
+ if (error)
+ goto out_fput;
+
+out:
+ dput(dentry);
+ return filp;
+
+out_fput:
+ fput(filp);
+ filp = ERR_PTR(error);
+ goto out;
+
+no_open:
+ if (need_lookup) {
+ dentry = lookup_real(dir, dentry, nd);
+ if (IS_ERR(dentry))
+ return ERR_CAST(dentry);
+
+ if (create_error) {
+ int open_flag = op->open_flag;
+
+ filp = ERR_PTR(create_error);
+ if ((open_flag & O_EXCL)) {
+ if (!dentry->d_inode)
+ goto out;
+ } else if (!dentry->d_inode) {
+ goto out;
+ } else if ((open_flag & O_TRUNC) &&
+ S_ISREG(dentry->d_inode->i_mode)) {
+ goto out;
+ }
+ /* will fail later, go on to get the right error */
+ }
+ }
+looked_up:
+ path->dentry = dentry;
+ path->mnt = nd->path.mnt;
+ return NULL;
+}
+
/*
* Lookup, maybe create and open the last component
*
@@ -2219,6 +2389,15 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
if (IS_ERR(dentry))
return ERR_CAST(dentry);
+ /* Cached positive dentry: will open in f_op->open */
+ if (!need_lookup && dentry->d_inode)
+ goto out_no_open;
+
+ if ((nd->flags & LOOKUP_OPEN) && dir_inode->i_op->atomic_open) {
+ return atomic_open(nd, dentry, path, op, want_write,
+ need_lookup, created);
+ }
+
if (need_lookup) {
BUG_ON(dentry->d_inode);
@@ -2251,6 +2430,7 @@ static struct file *lookup_open(struct nameidata *nd, struct path *path,
if (error)
goto out_dput;
}
+out_no_open:
path->dentry = dentry;
path->mnt = nd->path.mnt;
return NULL;
@@ -2344,8 +2524,16 @@ retry_lookup:
filp = lookup_open(nd, path, op, &want_write, &created);
mutex_unlock(&dir->d_inode->i_mutex);
- if (IS_ERR(filp))
- goto out;
+ if (filp) {
+ if (IS_ERR(filp))
+ goto out;
+
+ if (created || !S_ISREG(filp->f_path.dentry->d_inode->i_mode))
+ will_truncate = 0;
+
+ audit_inode(pathname, filp->f_path.dentry);
+ goto opened;
+ }
if (created) {
/* Don't check for write permission, don't truncate */
@@ -2361,6 +2549,16 @@ retry_lookup:
*/
audit_inode(pathname, path->dentry);
+ /*
+ * If atomic_open() acquired write access it is dropped now due to
+ * possible mount and symlink following (this might be optimized away if
+ * necessary...)
+ */
+ if (want_write) {
+ mnt_drop_write(nd->path.mnt);
+ want_write = 0;
+ }
+
error = -EEXIST;
if (open_flag & O_EXCL)
goto exit_dput;
@@ -2444,6 +2642,7 @@ common:
retried = true;
goto retry_lookup;
}
+opened:
if (!IS_ERR(filp)) {
error = ima_file_check(filp, op->acc_mode);
if (error) {
diff --git a/fs/open.c b/fs/open.c
index 1540632d8387..13bece4f36a4 100644
--- a/fs/open.c
+++ b/fs/open.c
@@ -811,6 +811,48 @@ out_err:
EXPORT_SYMBOL_GPL(lookup_instantiate_filp);
/**
+ * finish_open - finish opening a file
+ * @od: opaque open data
+ * @dentry: pointer to dentry
+ * @open: open callback
+ *
+ * This can be used to finish opening a file passed to i_op->atomic_open().
+ *
+ * If the open callback is set to NULL, then the standard f_op->open()
+ * filesystem callback is substituted.
+ */
+struct file *finish_open(struct opendata *od, struct dentry *dentry,
+ int (*open)(struct inode *, struct file *))
+{
+ struct file *res;
+
+ mntget(od->mnt);
+ dget(dentry);
+
+ res = do_dentry_open(dentry, od->mnt, *od->filp, open, current_cred());
+ if (!IS_ERR(res))
+ *od->filp = NULL;
+
+ return res;
+}
+EXPORT_SYMBOL(finish_open);
+
+/**
+ * finish_no_open - finish ->atomic_open() without opening the file
+ *
+ * @od: opaque open data
+ * @dentry: dentry or NULL (as returned from ->lookup())
+ *
+ * This can be used to set the result of a successful lookup in ->atomic_open().
+ * The filesystem's atomic_open() method shall return NULL after calling this.
+ */
+void finish_no_open(struct opendata *od, struct dentry *dentry)
+{
+ od->dentry = dentry;
+}
+EXPORT_SYMBOL(finish_no_open);
+
+/**
* nameidata_to_filp - convert a nameidata to an open filp.
* @nd: pointer to nameidata
* @flags: open flags
diff --git a/include/linux/fs.h b/include/linux/fs.h
index f06db6bd5a74..0314635cf833 100644
--- a/include/linux/fs.h
+++ b/include/linux/fs.h
@@ -427,6 +427,7 @@ struct kstatfs;
struct vm_area_struct;
struct vfsmount;
struct cred;
+struct opendata;
extern void __init inode_init(void);
extern void __init inode_init_early(void);
@@ -1693,6 +1694,9 @@ struct inode_operations {
int (*fiemap)(struct inode *, struct fiemap_extent_info *, u64 start,
u64 len);
int (*update_time)(struct inode *, struct timespec *, int);
+ struct file * (*atomic_open)(struct inode *, struct dentry *,
+ struct opendata *, unsigned open_flag,
+ umode_t create_mode, bool *created);
} ____cacheline_aligned;
struct seq_file;
@@ -2061,6 +2065,9 @@ extern struct file * dentry_open(struct dentry *, struct vfsmount *, int,
const struct cred *);
extern int filp_close(struct file *, fl_owner_t id);
extern char * getname(const char __user *);
+extern struct file *finish_open(struct opendata *od, struct dentry *dentry,
+ int (*open)(struct inode *, struct file *));
+extern void finish_no_open(struct opendata *od, struct dentry *dentry);
/* fs/ioctl.c */