diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-10 11:39:13 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2010-08-10 11:39:13 -0700 |
commit | 8c8946f509a494769a8c602b5ed189df01917d39 (patch) | |
tree | dfd96bd6ca5ea6803c6d77f65ba37e04f78b2d3b /fs/notify/inotify/inotify_user.c | |
parent | 5f248c9c251c60af3403902b26e08de43964ea0b (diff) | |
parent | 1968f5eed54ce47bde488fd9a450912e4a2d7138 (diff) |
Merge branch 'for-linus' of git://git.infradead.org/users/eparis/notify
* 'for-linus' of git://git.infradead.org/users/eparis/notify: (132 commits)
fanotify: use both marks when possible
fsnotify: pass both the vfsmount mark and inode mark
fsnotify: walk the inode and vfsmount lists simultaneously
fsnotify: rework ignored mark flushing
fsnotify: remove global fsnotify groups lists
fsnotify: remove group->mask
fsnotify: remove the global masks
fsnotify: cleanup should_send_event
fanotify: use the mark in handler functions
audit: use the mark in handler functions
dnotify: use the mark in handler functions
inotify: use the mark in handler functions
fsnotify: send fsnotify_mark to groups in event handling functions
fsnotify: Exchange list heads instead of moving elements
fsnotify: srcu to protect read side of inode and vfsmount locks
fsnotify: use an explicit flag to indicate fsnotify_destroy_mark has been called
fsnotify: use _rcu functions for mark list traversal
fsnotify: place marks on object in order of group memory address
vfs/fsnotify: fsnotify_close can delay the final work in fput
fsnotify: store struct file not struct path
...
Fix up trivial delete/modify conflict in fs/notify/inotify/inotify.c.
Diffstat (limited to 'fs/notify/inotify/inotify_user.c')
-rw-r--r-- | fs/notify/inotify/inotify_user.c | 369 |
1 files changed, 234 insertions, 135 deletions
diff --git a/fs/notify/inotify/inotify_user.c b/fs/notify/inotify/inotify_user.c index e46ca685b9be..bf7f6d776c31 100644 --- a/fs/notify/inotify/inotify_user.c +++ b/fs/notify/inotify/inotify_user.c @@ -46,17 +46,11 @@ /* these are configurable via /proc/sys/fs/inotify/ */ static int inotify_max_user_instances __read_mostly; static int inotify_max_queued_events __read_mostly; -int inotify_max_user_watches __read_mostly; +static int inotify_max_user_watches __read_mostly; static struct kmem_cache *inotify_inode_mark_cachep __read_mostly; struct kmem_cache *event_priv_cachep __read_mostly; -/* - * When inotify registers a new group it increments this and uses that - * value as an offset to set the fsnotify group "name" and priority. - */ -static atomic_t inotify_grp_num; - #ifdef CONFIG_SYSCTL #include <linux/sysctl.h> @@ -96,11 +90,14 @@ static inline __u32 inotify_arg_to_mask(u32 arg) { __u32 mask; - /* everything should accept their own ignored and cares about children */ - mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD); + /* + * everything should accept their own ignored, cares about children, + * and should receive events when the inode is unmounted + */ + mask = (FS_IN_IGNORED | FS_EVENT_ON_CHILD | FS_UNMOUNT); /* mask off the flags used to open the fd */ - mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT)); + mask |= (arg & (IN_ALL_EVENTS | IN_ONESHOT | IN_EXCL_UNLINK)); return mask; } @@ -144,6 +141,8 @@ static struct fsnotify_event *get_one_event(struct fsnotify_group *group, event = fsnotify_peek_notify_event(group); + pr_debug("%s: group=%p event=%p\n", __func__, group, event); + if (event->name_len) event_size += roundup(event->name_len + 1, event_size); @@ -173,6 +172,8 @@ static ssize_t copy_event_to_user(struct fsnotify_group *group, size_t event_size = sizeof(struct inotify_event); size_t name_len = 0; + pr_debug("%s: group=%p event=%p\n", __func__, group, event); + /* we get the inotify watch descriptor from the event private data */ spin_lock(&event->lock); fsn_priv = fsnotify_remove_priv_from_event(group, event); @@ -245,6 +246,8 @@ static ssize_t inotify_read(struct file *file, char __user *buf, kevent = get_one_event(group, count); mutex_unlock(&group->notification_mutex); + pr_debug("%s: group=%p kevent=%p\n", __func__, group, kevent); + if (kevent) { ret = PTR_ERR(kevent); if (IS_ERR(kevent)) @@ -289,6 +292,8 @@ static int inotify_release(struct inode *ignored, struct file *file) struct fsnotify_group *group = file->private_data; struct user_struct *user = group->inotify_data.user; + pr_debug("%s: group=%p\n", __func__, group); + fsnotify_clear_marks_by_group(group); /* free this group, matching get was inotify_init->fsnotify_obtain_group */ @@ -312,6 +317,8 @@ static long inotify_ioctl(struct file *file, unsigned int cmd, group = file->private_data; p = (void __user *) arg; + pr_debug("%s: group=%p cmd=%u\n", __func__, group, cmd); + switch (cmd) { case FIONREAD: mutex_lock(&group->notification_mutex); @@ -357,59 +364,159 @@ static int inotify_find_inode(const char __user *dirname, struct path *path, uns return error; } +static int inotify_add_to_idr(struct idr *idr, spinlock_t *idr_lock, + int *last_wd, + struct inotify_inode_mark *i_mark) +{ + int ret; + + do { + if (unlikely(!idr_pre_get(idr, GFP_KERNEL))) + return -ENOMEM; + + spin_lock(idr_lock); + ret = idr_get_new_above(idr, i_mark, *last_wd + 1, + &i_mark->wd); + /* we added the mark to the idr, take a reference */ + if (!ret) { + *last_wd = i_mark->wd; + fsnotify_get_mark(&i_mark->fsn_mark); + } + spin_unlock(idr_lock); + } while (ret == -EAGAIN); + + return ret; +} + +static struct inotify_inode_mark *inotify_idr_find_locked(struct fsnotify_group *group, + int wd) +{ + struct idr *idr = &group->inotify_data.idr; + spinlock_t *idr_lock = &group->inotify_data.idr_lock; + struct inotify_inode_mark *i_mark; + + assert_spin_locked(idr_lock); + + i_mark = idr_find(idr, wd); + if (i_mark) { + struct fsnotify_mark *fsn_mark = &i_mark->fsn_mark; + + fsnotify_get_mark(fsn_mark); + /* One ref for being in the idr, one ref we just took */ + BUG_ON(atomic_read(&fsn_mark->refcnt) < 2); + } + + return i_mark; +} + +static struct inotify_inode_mark *inotify_idr_find(struct fsnotify_group *group, + int wd) +{ + struct inotify_inode_mark *i_mark; + spinlock_t *idr_lock = &group->inotify_data.idr_lock; + + spin_lock(idr_lock); + i_mark = inotify_idr_find_locked(group, wd); + spin_unlock(idr_lock); + + return i_mark; +} + +static void do_inotify_remove_from_idr(struct fsnotify_group *group, + struct inotify_inode_mark *i_mark) +{ + struct idr *idr = &group->inotify_data.idr; + spinlock_t *idr_lock = &group->inotify_data.idr_lock; + int wd = i_mark->wd; + + assert_spin_locked(idr_lock); + + idr_remove(idr, wd); + + /* removed from the idr, drop that ref */ + fsnotify_put_mark(&i_mark->fsn_mark); +} + /* * Remove the mark from the idr (if present) and drop the reference * on the mark because it was in the idr. */ static void inotify_remove_from_idr(struct fsnotify_group *group, - struct inotify_inode_mark_entry *ientry) + struct inotify_inode_mark *i_mark) { - struct idr *idr; - struct fsnotify_mark_entry *entry; - struct inotify_inode_mark_entry *found_ientry; + spinlock_t *idr_lock = &group->inotify_data.idr_lock; + struct inotify_inode_mark *found_i_mark = NULL; int wd; - spin_lock(&group->inotify_data.idr_lock); - idr = &group->inotify_data.idr; - wd = ientry->wd; + spin_lock(idr_lock); + wd = i_mark->wd; - if (wd == -1) + /* + * does this i_mark think it is in the idr? we shouldn't get called + * if it wasn't.... + */ + if (wd == -1) { + WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" + " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, + i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode); goto out; + } - entry = idr_find(&group->inotify_data.idr, wd); - if (unlikely(!entry)) + /* Lets look in the idr to see if we find it */ + found_i_mark = inotify_idr_find_locked(group, wd); + if (unlikely(!found_i_mark)) { + WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" + " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, + i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode); goto out; + } - found_ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); - if (unlikely(found_ientry != ientry)) { - /* We found an entry in the idr with the right wd, but it's - * not the entry we were told to remove. eparis seriously - * fucked up somewhere. */ - WARN_ON(1); - ientry->wd = -1; + /* + * We found an mark in the idr at the right wd, but it's + * not the mark we were told to remove. eparis seriously + * fucked up somewhere. + */ + if (unlikely(found_i_mark != i_mark)) { + WARN_ONCE(1, "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p " + "mark->inode=%p found_i_mark=%p found_i_mark->wd=%d " + "found_i_mark->group=%p found_i_mark->inode=%p\n", + __func__, i_mark, i_mark->wd, i_mark->fsn_mark.group, + i_mark->fsn_mark.i.inode, found_i_mark, found_i_mark->wd, + found_i_mark->fsn_mark.group, + found_i_mark->fsn_mark.i.inode); goto out; } - /* One ref for being in the idr, one ref held by the caller */ - BUG_ON(atomic_read(&entry->refcnt) < 2); - - idr_remove(idr, wd); - ientry->wd = -1; + /* + * One ref for being in the idr + * one ref held by the caller trying to kill us + * one ref grabbed by inotify_idr_find + */ + if (unlikely(atomic_read(&i_mark->fsn_mark.refcnt) < 3)) { + printk(KERN_ERR "%s: i_mark=%p i_mark->wd=%d i_mark->group=%p" + " i_mark->inode=%p\n", __func__, i_mark, i_mark->wd, + i_mark->fsn_mark.group, i_mark->fsn_mark.i.inode); + /* we can't really recover with bad ref cnting.. */ + BUG(); + } - /* removed from the idr, drop that ref */ - fsnotify_put_mark(entry); + do_inotify_remove_from_idr(group, i_mark); out: - spin_unlock(&group->inotify_data.idr_lock); + /* match the ref taken by inotify_idr_find_locked() */ + if (found_i_mark) + fsnotify_put_mark(&found_i_mark->fsn_mark); + i_mark->wd = -1; + spin_unlock(idr_lock); } /* * Send IN_IGNORED for this wd, remove this wd from the idr. */ -void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, +void inotify_ignored_and_remove_idr(struct fsnotify_mark *fsn_mark, struct fsnotify_group *group) { - struct inotify_inode_mark_entry *ientry; - struct fsnotify_event *ignored_event; + struct inotify_inode_mark *i_mark; + struct fsnotify_event *ignored_event, *notify_event; struct inotify_event_private_data *event_priv; struct fsnotify_event_private_data *fsn_event_priv; int ret; @@ -420,7 +527,7 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, if (!ignored_event) return; - ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); + i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); event_priv = kmem_cache_alloc(event_priv_cachep, GFP_NOFS); if (unlikely(!event_priv)) @@ -429,37 +536,44 @@ void inotify_ignored_and_remove_idr(struct fsnotify_mark_entry *entry, fsn_event_priv = &event_priv->fsnotify_event_priv_data; fsn_event_priv->group = group; - event_priv->wd = ientry->wd; - - ret = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv); - if (ret) + event_priv->wd = i_mark->wd; + + notify_event = fsnotify_add_notify_event(group, ignored_event, fsn_event_priv, NULL); + if (notify_event) { + if (IS_ERR(notify_event)) + ret = PTR_ERR(notify_event); + else + fsnotify_put_event(notify_event); inotify_free_event_priv(fsn_event_priv); + } skip_send_ignore: /* matches the reference taken when the event was created */ fsnotify_put_event(ignored_event); - /* remove this entry from the idr */ - inotify_remove_from_idr(group, ientry); + /* remove this mark from the idr */ + inotify_remove_from_idr(group, i_mark); atomic_dec(&group->inotify_data.user->inotify_watches); } /* ding dong the mark is dead */ -static void inotify_free_mark(struct fsnotify_mark_entry *entry) +static void inotify_free_mark(struct fsnotify_mark *fsn_mark) { - struct inotify_inode_mark_entry *ientry = (struct inotify_inode_mark_entry *)entry; + struct inotify_inode_mark *i_mark; + + i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); - kmem_cache_free(inotify_inode_mark_cachep, ientry); + kmem_cache_free(inotify_inode_mark_cachep, i_mark); } static int inotify_update_existing_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) { - struct fsnotify_mark_entry *entry; - struct inotify_inode_mark_entry *ientry; + struct fsnotify_mark *fsn_mark; + struct inotify_inode_mark *i_mark; __u32 old_mask, new_mask; __u32 mask; int add = (arg & IN_MASK_ADD); @@ -467,52 +581,43 @@ static int inotify_update_existing_watch(struct fsnotify_group *group, /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); - if (unlikely(!mask)) + if (unlikely(!(mask & IN_ALL_EVENTS))) return -EINVAL; - spin_lock(&inode->i_lock); - entry = fsnotify_find_mark_entry(group, inode); - spin_unlock(&inode->i_lock); - if (!entry) + fsn_mark = fsnotify_find_inode_mark(group, inode); + if (!fsn_mark) return -ENOENT; - ientry = container_of(entry, struct inotify_inode_mark_entry, fsn_entry); + i_mark = container_of(fsn_mark, struct inotify_inode_mark, fsn_mark); - spin_lock(&entry->lock); + spin_lock(&fsn_mark->lock); - old_mask = entry->mask; - if (add) { - entry->mask |= mask; - new_mask = entry->mask; - } else { - entry->mask = mask; - new_mask = entry->mask; - } + old_mask = fsn_mark->mask; + if (add) + fsnotify_set_mark_mask_locked(fsn_mark, (fsn_mark->mask | mask)); + else + fsnotify_set_mark_mask_locked(fsn_mark, mask); + new_mask = fsn_mark->mask; - spin_unlock(&entry->lock); + spin_unlock(&fsn_mark->lock); if (old_mask != new_mask) { /* more bits in old than in new? */ int dropped = (old_mask & ~new_mask); - /* more bits in this entry than the inode's mask? */ + /* more bits in this fsn_mark than the inode's mask? */ int do_inode = (new_mask & ~inode->i_fsnotify_mask); - /* more bits in this entry than the group? */ - int do_group = (new_mask & ~group->mask); - /* update the inode with this new entry */ + /* update the inode with this new fsn_mark */ if (dropped || do_inode) fsnotify_recalc_inode_mask(inode); - /* update the group mask with the new mask */ - if (dropped || do_group) - fsnotify_recalc_group_mask(group); } /* return the wd */ - ret = ientry->wd; + ret = i_mark->wd; - /* match the get from fsnotify_find_mark_entry() */ - fsnotify_put_mark(entry); + /* match the get from fsnotify_find_mark() */ + fsnotify_put_mark(fsn_mark); return ret; } @@ -521,73 +626,51 @@ static int inotify_new_watch(struct fsnotify_group *group, struct inode *inode, u32 arg) { - struct inotify_inode_mark_entry *tmp_ientry; + struct inotify_inode_mark *tmp_i_mark; __u32 mask; int ret; + struct idr *idr = &group->inotify_data.idr; + spinlock_t *idr_lock = &group->inotify_data.idr_lock; /* don't allow invalid bits: we don't want flags set */ mask = inotify_arg_to_mask(arg); - if (unlikely(!mask)) + if (unlikely(!(mask & IN_ALL_EVENTS))) return -EINVAL; - tmp_ientry = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); - if (unlikely(!tmp_ientry)) + tmp_i_mark = kmem_cache_alloc(inotify_inode_mark_cachep, GFP_KERNEL); + if (unlikely(!tmp_i_mark)) return -ENOMEM; - fsnotify_init_mark(&tmp_ientry->fsn_entry, inotify_free_mark); - tmp_ientry->fsn_entry.mask = mask; - tmp_ientry->wd = -1; + fsnotify_init_mark(&tmp_i_mark->fsn_mark, inotify_free_mark); + tmp_i_mark->fsn_mark.mask = mask; + tmp_i_mark->wd = -1; ret = -ENOSPC; if (atomic_read(&group->inotify_data.user->inotify_watches) >= inotify_max_user_watches) goto out_err; -retry: - ret = -ENOMEM; - if (unlikely(!idr_pre_get(&group->inotify_data.idr, GFP_KERNEL))) - goto out_err; - /* we are putting the mark on the idr, take a reference */ - fsnotify_get_mark(&tmp_ientry->fsn_entry); - - spin_lock(&group->inotify_data.idr_lock); - ret = idr_get_new_above(&group->inotify_data.idr, &tmp_ientry->fsn_entry, - group->inotify_data.last_wd+1, - &tmp_ientry->wd); - spin_unlock(&group->inotify_data.idr_lock); - if (ret) { - /* we didn't get on the idr, drop the idr reference */ - fsnotify_put_mark(&tmp_ientry->fsn_entry); - - /* idr was out of memory allocate and try again */ - if (ret == -EAGAIN) - goto retry; + ret = inotify_add_to_idr(idr, idr_lock, &group->inotify_data.last_wd, + tmp_i_mark); + if (ret) goto out_err; - } /* we are on the idr, now get on the inode */ - ret = fsnotify_add_mark(&tmp_ientry->fsn_entry, group, inode); + ret = fsnotify_add_mark(&tmp_i_mark->fsn_mark, group, inode, NULL, 0); if (ret) { /* we failed to get on the inode, get off the idr */ - inotify_remove_from_idr(group, tmp_ientry); + inotify_remove_from_idr(group, tmp_i_mark); goto out_err; } - /* update the idr hint, who cares about races, it's just a hint */ - group->inotify_data.last_wd = tmp_ientry->wd; - /* increment the number of watches the user has */ atomic_inc(&group->inotify_data.user->inotify_watches); - /* return the watch descriptor for this new entry */ - ret = tmp_ientry->wd; - - /* if this mark added a new event update the group mask */ - if (mask & ~group->mask) - fsnotify_recalc_group_mask(group); + /* return the watch descriptor for this new mark */ + ret = tmp_i_mark->wd; out_err: - /* match the ref from fsnotify_init_markentry() */ - fsnotify_put_mark(&tmp_ientry->fsn_entry); + /* match the ref from fsnotify_init_mark() */ + fsnotify_put_mark(&tmp_i_mark->fsn_mark); return ret; } @@ -616,11 +699,8 @@ retry: static struct fsnotify_group *inotify_new_group(struct user_struct *user, unsigned int max_events) { struct fsnotify_group *group; - unsigned int grp_num; - /* fsnotify_obtain_group took a reference to group, we put this when we kill the file in the end */ - grp_num = (INOTIFY_GROUP_NUM - atomic_inc_return(&inotify_grp_num)); - group = fsnotify_obtain_group(grp_num, 0, &inotify_fsnotify_ops); + group = fsnotify_alloc_group(&inotify_fsnotify_ops); if (IS_ERR(group)) return group; @@ -726,7 +806,7 @@ fput_and_out: SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) { struct fsnotify_group *group; - struct fsnotify_mark_entry *entry; + struct inotify_inode_mark *i_mark; struct file *filp; int ret = 0, fput_needed; @@ -735,25 +815,23 @@ SYSCALL_DEFINE2(inotify_rm_watch, int, fd, __s32, wd) return -EBADF; /* verify that this is indeed an inotify instance */ - if (unlikely(filp->f_op != &inotify_fops)) { - ret = -EINVAL; + ret = -EINVAL; + if (unlikely(filp->f_op != &inotify_fops)) goto out; - } group = filp->private_data; - spin_lock(&group->inotify_data.idr_lock); - entry = idr_find(&group->inotify_data.idr, wd); - if (unlikely(!entry)) { - spin_unlock(&group->inotify_data.idr_lock); - ret = -EINVAL; + ret = -EINVAL; + i_mark = inotify_idr_find(group, wd); + if (unlikely(!i_mark)) goto out; - } - fsnotify_get_mark(entry); - spin_unlock(&group->inotify_data.idr_lock); - fsnotify_destroy_mark_by_entry(entry); - fsnotify_put_mark(entry); + ret = 0; + + fsnotify_destroy_mark(&i_mark->fsn_mark); + + /* match ref taken by inotify_idr_find */ + fsnotify_put_mark(&i_mark->fsn_mark); out: fput_light(filp, fput_needed); @@ -767,7 +845,28 @@ out: */ static int __init inotify_user_setup(void) { - inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark_entry, SLAB_PANIC); + BUILD_BUG_ON(IN_ACCESS != FS_ACCESS); + BUILD_BUG_ON(IN_MODIFY != FS_MODIFY); + BUILD_BUG_ON(IN_ATTRIB != FS_ATTRIB); + BUILD_BUG_ON(IN_CLOSE_WRITE != FS_CLOSE_WRITE); + BUILD_BUG_ON(IN_CLOSE_NOWRITE != FS_CLOSE_NOWRITE); + BUILD_BUG_ON(IN_OPEN != FS_OPEN); + BUILD_BUG_ON(IN_MOVED_FROM != FS_MOVED_FROM); + BUILD_BUG_ON(IN_MOVED_TO != FS_MOVED_TO); + BUILD_BUG_ON(IN_CREATE != FS_CREATE); + BUILD_BUG_ON(IN_DELETE != FS_DELETE); + BUILD_BUG_ON(IN_DELETE_SELF != FS_DELETE_SELF); + BUILD_BUG_ON(IN_MOVE_SELF != FS_MOVE_SELF); + BUILD_BUG_ON(IN_UNMOUNT != FS_UNMOUNT); + BUILD_BUG_ON(IN_Q_OVERFLOW != FS_Q_OVERFLOW); + BUILD_BUG_ON(IN_IGNORED != FS_IN_IGNORED); + BUILD_BUG_ON(IN_EXCL_UNLINK != FS_EXCL_UNLINK); + BUILD_BUG_ON(IN_ISDIR != FS_IN_ISDIR); + BUILD_BUG_ON(IN_ONESHOT != FS_IN_ONESHOT); + + BUG_ON(hweight32(ALL_INOTIFY_BITS) != 21); + + inotify_inode_mark_cachep = KMEM_CACHE(inotify_inode_mark, SLAB_PANIC); event_priv_cachep = KMEM_CACHE(inotify_event_private_data, SLAB_PANIC); inotify_max_queued_events = 16384; |