diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-22 09:56:06 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2012-08-22 09:56:06 -0700 |
commit | 467e9e51d07d43d32a1dd8b6ead2351e28fff084 (patch) | |
tree | d925caa3c94036ba2457f0de64b6e6c40236ca46 | |
parent | 23dcfa61bac244e1200ff9ad19c6e9144dcb6bb5 (diff) | |
parent | 88ec2789d856056344161aa20420dd37e893b0fe (diff) |
Merge branch 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs
Pull assorted fixes - mostly vfs - from Al Viro:
"Assorted fixes, with an unexpected detour into vfio refcounting logics
(fell out when digging in an analog of eventpoll race in there)."
* 'for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/viro/vfs:
task_work: add a scheduling point in task_work_run()
fs: fix fs/namei.c kernel-doc warnings
eventpoll: use-after-possible-free in epoll_create1()
vfio: grab vfio_device reference *before* exposing the sucker via fd_install()
vfio: get rid of vfio_device_put()/vfio_group_get_device* races
vfio: get rid of open-coding kref_put_mutex
introduce kref_put_mutex()
vfio: don't dereference after kfree...
mqueue: lift mnt_want_write() outside ->i_mutex, clean up a bit
-rw-r--r-- | drivers/vfio/vfio.c | 19 | ||||
-rw-r--r-- | fs/eventpoll.c | 2 | ||||
-rw-r--r-- | fs/namei.c | 2 | ||||
-rw-r--r-- | include/linux/kref.h | 18 | ||||
-rw-r--r-- | ipc/mqueue.c | 61 |
5 files changed, 56 insertions, 46 deletions
diff --git a/drivers/vfio/vfio.c b/drivers/vfio/vfio.c index 9591e2b509d7..17830c9c7cc6 100644 --- a/drivers/vfio/vfio.c +++ b/drivers/vfio/vfio.c @@ -264,6 +264,7 @@ static struct vfio_group *vfio_create_group(struct iommu_group *iommu_group) return group; } +/* called with vfio.group_lock held */ static void vfio_group_release(struct kref *kref) { struct vfio_group *group = container_of(kref, struct vfio_group, kref); @@ -287,13 +288,7 @@ static void vfio_group_release(struct kref *kref) static void vfio_group_put(struct vfio_group *group) { - mutex_lock(&vfio.group_lock); - /* - * Release needs to unlock to unregister the notifier, so only - * unlock if not released. - */ - if (!kref_put(&group->kref, vfio_group_release)) - mutex_unlock(&vfio.group_lock); + kref_put_mutex(&group->kref, vfio_group_release, &vfio.group_lock); } /* Assume group_lock or group reference is held */ @@ -401,7 +396,6 @@ static void vfio_device_release(struct kref *kref) struct vfio_device, kref); struct vfio_group *group = device->group; - mutex_lock(&group->device_lock); list_del(&device->group_next); mutex_unlock(&group->device_lock); @@ -416,8 +410,9 @@ static void vfio_device_release(struct kref *kref) /* Device reference always implies a group reference */ static void vfio_device_put(struct vfio_device *device) { - kref_put(&device->kref, vfio_device_release); - vfio_group_put(device->group); + struct vfio_group *group = device->group; + kref_put_mutex(&device->kref, vfio_device_release, &group->device_lock); + vfio_group_put(group); } static void vfio_device_get(struct vfio_device *device) @@ -1116,10 +1111,10 @@ static int vfio_group_get_device_fd(struct vfio_group *group, char *buf) */ filep->f_mode |= (FMODE_LSEEK | FMODE_PREAD | FMODE_PWRITE); - fd_install(ret, filep); - vfio_device_get(device); atomic_inc(&group->container_users); + + fd_install(ret, filep); break; } mutex_unlock(&group->device_lock); diff --git a/fs/eventpoll.c b/fs/eventpoll.c index 1c8b55670804..eedec84c1809 100644 --- a/fs/eventpoll.c +++ b/fs/eventpoll.c @@ -1654,8 +1654,8 @@ SYSCALL_DEFINE1(epoll_create1, int, flags) error = PTR_ERR(file); goto out_free_fd; } - fd_install(fd, file); ep->file = file; + fd_install(fd, file); return fd; out_free_fd: diff --git a/fs/namei.c b/fs/namei.c index db76b866a097..dd1ed1b8e98e 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -352,6 +352,7 @@ int __inode_permission(struct inode *inode, int mask) /** * sb_permission - Check superblock-level permissions * @sb: Superblock of inode to check permission on + * @inode: Inode to check permission on * @mask: Right to check for (%MAY_READ, %MAY_WRITE, %MAY_EXEC) * * Separate out file-system wide checks from inode-specific permission checks. @@ -656,6 +657,7 @@ int sysctl_protected_hardlinks __read_mostly = 1; /** * may_follow_link - Check symlink following for unsafe situations * @link: The path of the symlink + * @nd: nameidata pathwalk data * * In the case of the sysctl_protected_symlinks sysctl being enabled, * CAP_DAC_OVERRIDE needs to be specifically ignored if the symlink is diff --git a/include/linux/kref.h b/include/linux/kref.h index 9c07dcebded7..65af6887872f 100644 --- a/include/linux/kref.h +++ b/include/linux/kref.h @@ -18,6 +18,7 @@ #include <linux/bug.h> #include <linux/atomic.h> #include <linux/kernel.h> +#include <linux/mutex.h> struct kref { atomic_t refcount; @@ -93,4 +94,21 @@ static inline int kref_put(struct kref *kref, void (*release)(struct kref *kref) { return kref_sub(kref, 1, release); } + +static inline int kref_put_mutex(struct kref *kref, + void (*release)(struct kref *kref), + struct mutex *lock) +{ + WARN_ON(release == NULL); + if (unlikely(!atomic_add_unless(&kref->refcount, -1, 1))) { + mutex_lock(lock); + if (unlikely(!atomic_dec_and_test(&kref->refcount))) { + mutex_unlock(lock); + return 0; + } + release(kref); + return 1; + } + return 0; +} #endif /* _KREF_H_ */ diff --git a/ipc/mqueue.c b/ipc/mqueue.c index f8e54f5b9080..9a08acc9e649 100644 --- a/ipc/mqueue.c +++ b/ipc/mqueue.c @@ -726,7 +726,6 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, struct mq_attr *attr) { const struct cred *cred = current_cred(); - struct file *result; int ret; if (attr) { @@ -748,21 +747,11 @@ static struct file *do_create(struct ipc_namespace *ipc_ns, struct inode *dir, } mode &= ~current_umask(); - ret = mnt_want_write(path->mnt); - if (ret) - return ERR_PTR(ret); ret = vfs_create(dir, path->dentry, mode, true); path->dentry->d_fsdata = NULL; - if (!ret) - result = dentry_open(path, oflag, cred); - else - result = ERR_PTR(ret); - /* - * dentry_open() took a persistent mnt_want_write(), - * so we can now drop this one. - */ - mnt_drop_write(path->mnt); - return result; + if (ret) + return ERR_PTR(ret); + return dentry_open(path, oflag, cred); } /* Opens existing queue */ @@ -788,7 +777,9 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, struct mq_attr attr; int fd, error; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; - struct dentry *root = ipc_ns->mq_mnt->mnt_root; + struct vfsmount *mnt = ipc_ns->mq_mnt; + struct dentry *root = mnt->mnt_root; + int ro; if (u_attr && copy_from_user(&attr, u_attr, sizeof(struct mq_attr))) return -EFAULT; @@ -802,6 +793,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, if (fd < 0) goto out_putname; + ro = mnt_want_write(mnt); /* we'll drop it in any case */ error = 0; mutex_lock(&root->d_inode->i_mutex); path.dentry = lookup_one_len(name, root, strlen(name)); @@ -809,7 +801,7 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, error = PTR_ERR(path.dentry); goto out_putfd; } - path.mnt = mntget(ipc_ns->mq_mnt); + path.mnt = mntget(mnt); if (oflag & O_CREAT) { if (path.dentry->d_inode) { /* entry already exists */ @@ -820,6 +812,10 @@ SYSCALL_DEFINE4(mq_open, const char __user *, u_name, int, oflag, umode_t, mode, } filp = do_open(&path, oflag); } else { + if (ro) { + error = ro; + goto out; + } filp = do_create(ipc_ns, root->d_inode, &path, oflag, mode, u_attr ? &attr : NULL); @@ -845,6 +841,7 @@ out_putfd: fd = error; } mutex_unlock(&root->d_inode->i_mutex); + mnt_drop_write(mnt); out_putname: putname(name); return fd; @@ -857,40 +854,38 @@ SYSCALL_DEFINE1(mq_unlink, const char __user *, u_name) struct dentry *dentry; struct inode *inode = NULL; struct ipc_namespace *ipc_ns = current->nsproxy->ipc_ns; + struct vfsmount *mnt = ipc_ns->mq_mnt; name = getname(u_name); if (IS_ERR(name)) return PTR_ERR(name); - mutex_lock_nested(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex, - I_MUTEX_PARENT); - dentry = lookup_one_len(name, ipc_ns->mq_mnt->mnt_root, strlen(name)); + err = mnt_want_write(mnt); + if (err) + goto out_name; + mutex_lock_nested(&mnt->mnt_root->d_inode->i_mutex, I_MUTEX_PARENT); + dentry = lookup_one_len(name, mnt->mnt_root, strlen(name)); if (IS_ERR(dentry)) { err = PTR_ERR(dentry); goto out_unlock; } - if (!dentry->d_inode) { - err = -ENOENT; - goto out_err; - } - inode = dentry->d_inode; - if (inode) + if (!inode) { + err = -ENOENT; + } else { ihold(inode); - err = mnt_want_write(ipc_ns->mq_mnt); - if (err) - goto out_err; - err = vfs_unlink(dentry->d_parent->d_inode, dentry); - mnt_drop_write(ipc_ns->mq_mnt); -out_err: + err = vfs_unlink(dentry->d_parent->d_inode, dentry); + } dput(dentry); out_unlock: - mutex_unlock(&ipc_ns->mq_mnt->mnt_root->d_inode->i_mutex); - putname(name); + mutex_unlock(&mnt->mnt_root->d_inode->i_mutex); if (inode) iput(inode); + mnt_drop_write(mnt); +out_name: + putname(name); return err; } |