diff options
author | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-04 14:21:20 -0700 |
---|---|---|
committer | Linus Torvalds <torvalds@linux-foundation.org> | 2014-04-04 14:21:20 -0700 |
commit | f7789dc0d476e597b0fba52871e777f97d8e3f6e (patch) | |
tree | a2f77a68f4339c62ef3db227268db2dd56f83a64 | |
parent | 7df934526c0b3775613502dcd19ab6d2da8cce1e (diff) | |
parent | 29723adee11804b548903ddb1db666cf4a60f60e (diff) |
Merge branch 'locks-3.15' of git://git.samba.org/jlayton/linux
Pull file locking updates from Jeff Layton:
"Highlights:
- maintainership change for fs/locks.c. Willy's not interested in
maintaining it these days, and is OK with Bruce and I taking it.
- fix for open vs setlease race that Al ID'ed
- cleanup and consolidation of file locking code
- eliminate unneeded BUG() call
- merge of file-private lock implementation"
* 'locks-3.15' of git://git.samba.org/jlayton/linux:
locks: make locks_mandatory_area check for file-private locks
locks: fix locks_mandatory_locked to respect file-private locks
locks: require that flock->l_pid be set to 0 for file-private locks
locks: add new fcntl cmd values for handling file private locks
locks: skip deadlock detection on FL_FILE_PVT locks
locks: pass the cmd value to fcntl_getlk/getlk64
locks: report l_pid as -1 for FL_FILE_PVT locks
locks: make /proc/locks show IS_FILE_PVT locks as type "FLPVT"
locks: rename locks_remove_flock to locks_remove_file
locks: consolidate checks for compatible filp->f_mode values in setlk handlers
locks: fix posix lock range overflow handling
locks: eliminate BUG() call when there's an unexpected lock on file close
locks: add __acquires and __releases annotations to locks_start and locks_stop
locks: remove "inline" qualifier from fl_link manipulation functions
locks: clean up comment typo
locks: close potential race between setlease and open
MAINTAINERS: update entry for fs/locks.c
-rw-r--r-- | MAINTAINERS | 3 | ||||
-rw-r--r-- | arch/arm/kernel/sys_oabi-compat.c | 3 | ||||
-rw-r--r-- | fs/compat.c | 35 | ||||
-rw-r--r-- | fs/fcntl.c | 37 | ||||
-rw-r--r-- | fs/file_table.c | 2 | ||||
-rw-r--r-- | fs/locks.c | 389 | ||||
-rw-r--r-- | fs/namei.c | 2 | ||||
-rw-r--r-- | include/linux/fs.h | 43 | ||||
-rw-r--r-- | include/uapi/asm-generic/fcntl.h | 19 | ||||
-rw-r--r-- | mm/mmap.c | 2 | ||||
-rw-r--r-- | mm/nommu.c | 2 | ||||
-rw-r--r-- | security/selinux/hooks.c | 3 |
12 files changed, 356 insertions, 184 deletions
diff --git a/MAINTAINERS b/MAINTAINERS index 8774f7974d69..f728ac2b298a 100644 --- a/MAINTAINERS +++ b/MAINTAINERS @@ -3521,7 +3521,8 @@ F: include/scsi/libfcoe.h F: include/uapi/scsi/fc/ FILE LOCKING (flock() and fcntl()/lockf()) -M: Matthew Wilcox <matthew@wil.cx> +M: Jeff Layton <jlayton@redhat.com> +M: J. Bruce Fields <bfields@fieldses.org> L: linux-fsdevel@vger.kernel.org S: Maintained F: include/linux/fcntl.h diff --git a/arch/arm/kernel/sys_oabi-compat.c b/arch/arm/kernel/sys_oabi-compat.c index 3e94811690ce..702bd329d9d0 100644 --- a/arch/arm/kernel/sys_oabi-compat.c +++ b/arch/arm/kernel/sys_oabi-compat.c @@ -203,6 +203,9 @@ asmlinkage long sys_oabi_fcntl64(unsigned int fd, unsigned int cmd, int ret; switch (cmd) { + case F_GETLKP: + case F_SETLKP: + case F_SETLKPW: case F_GETLK64: case F_SETLK64: case F_SETLKW64: diff --git a/fs/compat.c b/fs/compat.c index f86df85dff61..ca926ad0430c 100644 --- a/fs/compat.c +++ b/fs/compat.c @@ -399,12 +399,28 @@ static int put_compat_flock64(struct flock *kfl, struct compat_flock64 __user *u } #endif +static unsigned int +convert_fcntl_cmd(unsigned int cmd) +{ + switch (cmd) { + case F_GETLK64: + return F_GETLK; + case F_SETLK64: + return F_SETLK; + case F_SETLKW64: + return F_SETLKW; + } + + return cmd; +} + COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { mm_segment_t old_fs; struct flock f; long ret; + unsigned int conv_cmd; switch (cmd) { case F_GETLK: @@ -441,16 +457,18 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, case F_GETLK64: case F_SETLK64: case F_SETLKW64: + case F_GETLKP: + case F_SETLKP: + case F_SETLKPW: ret = get_compat_flock64(&f, compat_ptr(arg)); if (ret != 0) break; old_fs = get_fs(); set_fs(KERNEL_DS); - ret = sys_fcntl(fd, (cmd == F_GETLK64) ? F_GETLK : - ((cmd == F_SETLK64) ? F_SETLK : F_SETLKW), - (unsigned long)&f); + conv_cmd = convert_fcntl_cmd(cmd); + ret = sys_fcntl(fd, conv_cmd, (unsigned long)&f); set_fs(old_fs); - if (cmd == F_GETLK64 && ret == 0) { + if ((conv_cmd == F_GETLK || conv_cmd == F_GETLKP) && ret == 0) { /* need to return lock information - see above for commentary */ if (f.l_start > COMPAT_LOFF_T_MAX) ret = -EOVERFLOW; @@ -471,8 +489,15 @@ COMPAT_SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, COMPAT_SYSCALL_DEFINE3(fcntl, unsigned int, fd, unsigned int, cmd, compat_ulong_t, arg) { - if ((cmd == F_GETLK64) || (cmd == F_SETLK64) || (cmd == F_SETLKW64)) + switch (cmd) { + case F_GETLK64: + case F_SETLK64: + case F_SETLKW64: + case F_GETLKP: + case F_SETLKP: + case F_SETLKPW: return -EINVAL; + } return compat_sys_fcntl64(fd, cmd, arg); } diff --git a/fs/fcntl.c b/fs/fcntl.c index ef6866592a0f..9ead1596399a 100644 --- a/fs/fcntl.c +++ b/fs/fcntl.c @@ -272,9 +272,19 @@ static long do_fcntl(int fd, unsigned int cmd, unsigned long arg, case F_SETFL: err = setfl(fd, filp, arg); break; +#if BITS_PER_LONG != 32 + /* 32-bit arches must use fcntl64() */ + case F_GETLKP: +#endif case F_GETLK: - err = fcntl_getlk(filp, (struct flock __user *) arg); + err = fcntl_getlk(filp, cmd, (struct flock __user *) arg); break; +#if BITS_PER_LONG != 32 + /* 32-bit arches must use fcntl64() */ + case F_SETLKP: + case F_SETLKPW: +#endif + /* Fallthrough */ case F_SETLK: case F_SETLKW: err = fcntl_setlk(fd, filp, cmd, (struct flock __user *) arg); @@ -388,17 +398,20 @@ SYSCALL_DEFINE3(fcntl64, unsigned int, fd, unsigned int, cmd, goto out1; switch (cmd) { - case F_GETLK64: - err = fcntl_getlk64(f.file, (struct flock64 __user *) arg); - break; - case F_SETLK64: - case F_SETLKW64: - err = fcntl_setlk64(fd, f.file, cmd, - (struct flock64 __user *) arg); - break; - default: - err = do_fcntl(fd, cmd, arg, f.file); - break; + case F_GETLK64: + case F_GETLKP: + err = fcntl_getlk64(f.file, cmd, (struct flock64 __user *) arg); + break; + case F_SETLK64: + case F_SETLKW64: + case F_SETLKP: + case F_SETLKPW: + err = fcntl_setlk64(fd, f.file, cmd, + (struct flock64 __user *) arg); + break; + default: + err = do_fcntl(fd, cmd, arg, f.file); + break; } out1: fdput(f); diff --git a/fs/file_table.c b/fs/file_table.c index 5b24008ea4f6..01071c4d752e 100644 --- a/fs/file_table.c +++ b/fs/file_table.c @@ -235,7 +235,7 @@ static void __fput(struct file *file) * in the file cleanup chain. */ eventpoll_release(file); - locks_remove_flock(file); + locks_remove_file(file); if (unlikely(file->f_flags & FASYNC)) { if (file->f_op->fasync) diff --git a/fs/locks.c b/fs/locks.c index 92a0f0a52b06..13fc7a6d380a 100644 --- a/fs/locks.c +++ b/fs/locks.c @@ -135,6 +135,7 @@ #define IS_POSIX(fl) (fl->fl_flags & FL_POSIX) #define IS_FLOCK(fl) (fl->fl_flags & FL_FLOCK) #define IS_LEASE(fl) (fl->fl_flags & (FL_LEASE|FL_DELEG)) +#define IS_FILE_PVT(fl) (fl->fl_flags & FL_FILE_PVT) static bool lease_breaking(struct file_lock *fl) { @@ -344,48 +345,43 @@ static int assign_type(struct file_lock *fl, long type) return 0; } -/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX - * style lock. - */ -static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, - struct flock *l) +static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, + struct flock64 *l) { - off_t start, end; - switch (l->l_whence) { case SEEK_SET: - start = 0; + fl->fl_start = 0; break; case SEEK_CUR: - start = filp->f_pos; + fl->fl_start = filp->f_pos; break; case SEEK_END: - start = i_size_read(file_inode(filp)); + fl->fl_start = i_size_read(file_inode(filp)); break; default: return -EINVAL; } + if (l->l_start > OFFSET_MAX - fl->fl_start) + return -EOVERFLOW; + fl->fl_start += l->l_start; + if (fl->fl_start < 0) + return -EINVAL; /* POSIX-1996 leaves the case l->l_len < 0 undefined; POSIX-2001 defines it. */ - start += l->l_start; - if (start < 0) - return -EINVAL; - fl->fl_end = OFFSET_MAX; if (l->l_len > 0) { - end = start + l->l_len - 1; - fl->fl_end = end; + if (l->l_len - 1 > OFFSET_MAX - fl->fl_start) + return -EOVERFLOW; + fl->fl_end = fl->fl_start + l->l_len - 1; + } else if (l->l_len < 0) { - end = start - 1; - fl->fl_end = end; - start += l->l_len; - if (start < 0) + if (fl->fl_start + l->l_len < 0) return -EINVAL; - } - fl->fl_start = start; /* we record the absolute position */ - if (fl->fl_end < fl->fl_start) - return -EOVERFLOW; - + fl->fl_end = fl->fl_start - 1; + fl->fl_start += l->l_len; + } else + fl->fl_end = OFFSET_MAX; + fl->fl_owner = current->files; fl->fl_pid = current->tgid; fl->fl_file = filp; @@ -393,55 +389,36 @@ static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, fl->fl_ops = NULL; fl->fl_lmops = NULL; - return assign_type(fl, l->l_type); -} - -#if BITS_PER_LONG == 32 -static int flock64_to_posix_lock(struct file *filp, struct file_lock *fl, - struct flock64 *l) -{ - loff_t start; - - switch (l->l_whence) { - case SEEK_SET: - start = 0; - break; - case SEEK_CUR: - start = filp->f_pos; + /* Ensure that fl->fl_filp has compatible f_mode */ + switch (l->l_type) { + case F_RDLCK: + if (!(filp->f_mode & FMODE_READ)) + return -EBADF; break; - case SEEK_END: - start = i_size_read(file_inode(filp)); + case F_WRLCK: + if (!(filp->f_mode & FMODE_WRITE)) + return -EBADF; break; - default: - return -EINVAL; } - start += l->l_start; - if (start < 0) - return -EINVAL; - fl->fl_end = OFFSET_MAX; - if (l->l_len > 0) { - fl->fl_end = start + l->l_len - 1; - } else if (l->l_len < 0) { - fl->fl_end = start - 1; - start += l->l_len; - if (start < 0) - return -EINVAL; - } - fl->fl_start = start; /* we record the absolute position */ - if (fl->fl_end < fl->fl_start) - return -EOVERFLOW; - - fl->fl_owner = current->files; - fl->fl_pid = current->tgid; - fl->fl_file = filp; - fl->fl_flags = FL_POSIX; - fl->fl_ops = NULL; - fl->fl_lmops = NULL; - return assign_type(fl, l->l_type); } -#endif + +/* Verify a "struct flock" and copy it to a "struct file_lock" as a POSIX + * style lock. + */ +static int flock_to_posix_lock(struct file *filp, struct file_lock *fl, + struct flock *l) +{ + struct flock64 ll = { + .l_type = l->l_type, + .l_whence = l->l_whence, + .l_start = l->l_start, + .l_len = l->l_len, + }; + + return flock64_to_posix_lock(filp, fl, &ll); +} /* default lease lock manager operations */ static void lease_break_callback(struct file_lock *fl) @@ -511,8 +488,7 @@ static int posix_same_owner(struct file_lock *fl1, struct file_lock *fl2) } /* Must be called with the i_lock held! */ -static inline void -locks_insert_global_locks(struct file_lock *fl) +static void locks_insert_global_locks(struct file_lock *fl) { lg_local_lock(&file_lock_lglock); fl->fl_link_cpu = smp_processor_id(); @@ -521,8 +497,7 @@ locks_insert_global_locks(struct file_lock *fl) } /* Must be called with the i_lock held! */ -static inline void -locks_delete_global_locks(struct file_lock *fl) +static void locks_delete_global_locks(struct file_lock *fl) { /* * Avoid taking lock if already unhashed. This is safe since this check @@ -544,14 +519,12 @@ posix_owner_key(struct file_lock *fl) return (unsigned long)fl->fl_owner; } -static inline void -locks_insert_global_blocked(struct file_lock *waiter) +static void locks_insert_global_blocked(struct file_lock *waiter) { hash_add(blocked_hash, &waiter->fl_link, posix_owner_key(waiter)); } -static inline void -locks_delete_global_blocked(struct file_lock *waiter) +static void locks_delete_global_blocked(struct file_lock *waiter) { hash_del(&waiter->fl_link); } @@ -581,7 +554,7 @@ static void locks_delete_block(struct file_lock *waiter) * it seems like the reasonable thing to do. * * Must be called with both the i_lock and blocked_lock_lock held. The fl_block - * list itself is protected by the file_lock_list, but by ensuring that the + * list itself is protected by the blocked_lock_lock, but by ensuring that the * i_lock is also held on insertions we can avoid taking the blocked_lock_lock * in some cases when we see that the fl_block list is empty. */ @@ -591,7 +564,7 @@ static void __locks_insert_block(struct file_lock *blocker, BUG_ON(!list_empty(&waiter->fl_block)); waiter->fl_next = blocker; list_add_tail(&waiter->fl_block, &blocker->fl_block); - if (IS_POSIX(blocker)) + if (IS_POSIX(blocker) && !IS_FILE_PVT(blocker)) locks_insert_global_blocked(waiter); } @@ -652,15 +625,18 @@ static void locks_insert_lock(struct file_lock **pos, struct file_lock *fl) locks_insert_global_locks(fl); } -/* - * Delete a lock and then free it. - * Wake up processes that are blocked waiting for this lock, - * notify the FS that the lock has been cleared and - * finally free the lock. +/** + * locks_delete_lock - Delete a lock and then free it. + * @thisfl_p: pointer that points to the fl_next field of the previous + * inode->i_flock list entry + * + * Unlink a lock from all lists and free the namespace reference, but don't + * free it yet. Wake up processes that are blocked waiting for this lock and + * notify the FS that the lock has been cleared. * * Must be called with the i_lock held! */ -static void locks_delete_lock(struct file_lock **thisfl_p) +static void locks_unlink_lock(struct file_lock **thisfl_p) { struct file_lock *fl = *thisfl_p; @@ -675,6 +651,18 @@ static void locks_delete_lock(struct file_lock **thisfl_p) } locks_wake_up_blocks(fl); +} + +/* + * Unlink a lock from all lists and free it. + * + * Must be called with i_lock held! + */ +static void locks_delete_lock(struct file_lock **thisfl_p) +{ + struct file_lock *fl = *thisfl_p; + + locks_unlink_lock(thisfl_p); locks_free_lock(fl); } @@ -769,8 +757,16 @@ EXPORT_SYMBOL(posix_test_lock); * Note: the above assumption may not be true when handling lock * requests from a broken NFS client. It may also fail in the presence * of tasks (such as posix threads) sharing the same open file table. - * * To handle those cases, we just bail out after a few iterations. + * + * For FL_FILE_PVT locks, the owner is the filp, not the files_struct. + * Because the owner is not even nominally tied to a thread of + * execution, the deadlock detection below can't reasonably work well. Just + * skip it for those. + * + * In principle, we could do a more limited deadlock detection on FL_FILE_PVT + * locks that just checks for the case where two tasks are attempting to + * upgrade from read to write locks on the same inode. */ #define MAX_DEADLK_ITERATIONS 10 @@ -793,6 +789,13 @@ static int posix_locks_deadlock(struct file_lock *caller_fl, { int i = 0; + /* + * This deadlock detector can't reasonably detect deadlocks with + * FL_FILE_PVT locks, since they aren't owned by a process, per-se. + */ + if (IS_FILE_PVT(caller_fl)) + return 0; + while ((block_fl = what_owner_is_waiting_for(block_fl))) { if (i++ > MAX_DEADLK_ITERATIONS) return 0; @@ -1152,13 +1155,14 @@ EXPORT_SYMBOL(posix_lock_file_wait); /** * locks_mandatory_locked - Check for an active lock - * @inode: the file to check + * @file: the file to check * * Searches the inode's list of locks to find any POSIX locks which conflict. * This function is called from locks_verify_locked() only. */ -int locks_mandatory_locked(struct inode *inode) +int locks_mandatory_locked(struct file *file) { + struct inode *inode = file_inode(file); fl_owner_t owner = current->files; struct file_lock *fl; @@ -1169,7 +1173,7 @@ int locks_mandatory_locked(struct inode *inode) for (fl = inode->i_flock; fl != NULL; fl = fl->fl_next) { if (!IS_POSIX(fl)) continue; - if (fl->fl_owner != owner) + if (fl->fl_owner != owner && fl->fl_owner != (fl_owner_t)file) break; } spin_unlock(&inode->i_lock); @@ -1195,19 +1199,30 @@ int locks_mandatory_area(int read_write, struct inode *inode, { struct file_lock fl; int error; + bool sleep = false; locks_init_lock(&fl); - fl.fl_owner = current->files; fl.fl_pid = current->tgid; fl.fl_file = filp; fl.fl_flags = FL_POSIX | FL_ACCESS; if (filp && !(filp->f_flags & O_NONBLOCK)) - fl.fl_flags |= FL_SLEEP; + sleep = true; fl.fl_type = (read_write == FLOCK_VERIFY_WRITE) ? F_WRLCK : F_RDLCK; fl.fl_start = offset; fl.fl_end = offset + count - 1; for (;;) { + if (filp) { + fl.fl_owner = (fl_owner_t)filp; + fl.fl_flags &= ~FL_SLEEP; + error = __posix_lock_file(inode, &fl, NULL); + if (!error) + break; + } + + if (sleep) + fl.fl_flags |= FL_SLEEP; + fl.fl_owner = current->files; error = __posix_lock_file(inode, &fl, NULL); if (error != FILE_LOCK_DEFERRED) break; @@ -1472,6 +1487,32 @@ int fcntl_getlease(struct file *filp) return type; } +/** + * check_conflicting_open - see if the given dentry points to a file that has + * an existing open that would conflict with the + * desired lease. + * @dentry: dentry to check + * @arg: type of lease that we're trying to acquire + * + * Check to see if there's an existing open fd on this file that would + * conflict with the lease we're trying to set. + */ +static int +check_conflicting_open(const struct dentry *dentry, const long arg) +{ + int ret = 0; + struct inode *inode = dentry->d_inode; + + if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) + return -EAGAIN; + + if ((arg == F_WRLCK) && ((d_count(dentry) > 1) || + (atomic_read(&inode->i_count) > 1))) + ret = -EAGAIN; + + return ret; +} + static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp) { struct file_lock *fl, **before, **my_before = NULL, *lease; @@ -1499,12 +1540,8 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp return -EINVAL; } - error = -EAGAIN; - if ((arg == F_RDLCK) && (atomic_read(&inode->i_writecount) > 0)) - goto out; - if ((arg == F_WRLCK) - && ((d_count(dentry) > 1) - || (atomic_read(&inode->i_count) > 1))) + error = check_conflicting_open(dentry, arg); + if (error) goto out; /* @@ -1549,7 +1586,19 @@ static int generic_add_lease(struct file *filp, long arg, struct file_lock **flp goto out; locks_insert_lock(before, lease); - error = 0; + /* + * The check in break_lease() is lockless. It's possible for another + * open to race in after we did the earlier check for a conflicting + * open but before the lease was inserted. Check again for a + * conflicting open and cancel the lease if there is one. + * + * We also add a barrier here to ensure that the insertion of the lock + * precedes these checks. + */ + smp_mb(); + error = check_conflicting_open(dentry, arg); + if (error) + locks_unlink_lock(flp); out: if (is_deleg) mutex_unlock(&inode->i_mutex); @@ -1842,7 +1891,7 @@ EXPORT_SYMBOL_GPL(vfs_test_lock); static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) { - flock->l_pid = fl->fl_pid; + flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; #if BITS_PER_LONG == 32 /* * Make sure we can represent the posix lock via @@ -1864,7 +1913,7 @@ static int posix_lock_to_flock(struct flock *flock, struct file_lock *fl) #if BITS_PER_LONG == 32 static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) { - flock->l_pid = fl->fl_pid; + flock->l_pid = IS_FILE_PVT(fl) ? -1 : fl->fl_pid; flock->l_start = fl->fl_start; flock->l_len = fl->fl_end == OFFSET_MAX ? 0 : fl->fl_end - fl->fl_start + 1; @@ -1876,7 +1925,7 @@ static void posix_lock_to_flock64(struct flock64 *flock, struct file_lock *fl) /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ -int fcntl_getlk(struct file *filp, struct flock __user *l) +int fcntl_getlk(struct file *filp, unsigned int cmd, struct flock __user *l) { struct file_lock file_lock; struct flock flock; @@ -1893,6 +1942,16 @@ int fcntl_getlk(struct file *filp, struct flock __user *l) if (error) goto out; + if (cmd == F_GETLKP) { + error = -EINVAL; + if (flock.l_pid != 0) + goto out; + + cmd = F_GETLK; + file_lock.fl_flags |= FL_FILE_PVT; + file_lock.fl_owner = (fl_owner_t)filp; + } + error = vfs_test_lock(filp, &file_lock); if (error) goto out; @@ -2012,25 +2071,32 @@ again: error = flock_to_posix_lock(filp, file_lock, &flock); if (error) goto out; - if (cmd == F_SETLKW) { - file_lock->fl_flags |= FL_SLEEP; - } - - error = -EBADF; - switch (flock.l_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - goto out; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) + + /* + * If the cmd is requesting file-private locks, then set the + * FL_FILE_PVT flag and override the owner. + */ + switch (cmd) { + case F_SETLKP: + error = -EINVAL; + if (flock.l_pid != 0) goto out; + + cmd = F_SETLK; + file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_owner = (fl_owner_t)filp; break; - case F_UNLCK: - break; - default: + case F_SETLKPW: error = -EINVAL; - goto out; + if (flock.l_pid != 0) + goto out; + + cmd = F_SETLKW; + file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_owner = (fl_owner_t)filp; + /* Fallthrough */ + case F_SETLKW: + file_lock->fl_flags |= FL_SLEEP; } error = do_lock_file_wait(filp, cmd, file_lock); @@ -2061,7 +2127,7 @@ out: /* Report the first existing lock that would conflict with l. * This implements the F_GETLK command of fcntl(). */ -int fcntl_getlk64(struct file *filp, struct flock64 __user *l) +int fcntl_getlk64(struct file *filp, unsigned int cmd, struct flock64 __user *l) { struct file_lock file_lock; struct flock64 flock; @@ -2078,6 +2144,16 @@ int fcntl_getlk64(struct file *filp, struct flock64 __user *l) if (error) goto out; + if (cmd == F_GETLKP) { + error = -EINVAL; + if (flock.l_pid != 0) + goto out; + + cmd = F_GETLK64; + file_lock.fl_flags |= FL_FILE_PVT; + file_lock.fl_owner = (fl_owner_t)filp; + } + error = vfs_test_lock(filp, &file_lock); if (error) goto out; @@ -2130,25 +2206,32 @@ again: error = flock64_to_posix_lock(filp, file_lock, &flock); if (error) goto out; - if (cmd == F_SETLKW64) { - file_lock->fl_flags |= FL_SLEEP; - } - - error = -EBADF; - switch (flock.l_type) { - case F_RDLCK: - if (!(filp->f_mode & FMODE_READ)) - goto out; - break; - case F_WRLCK: - if (!(filp->f_mode & FMODE_WRITE)) + + /* + * If the cmd is requesting file-private locks, then set the + * FL_FILE_PVT flag and override the owner. + */ + switch (cmd) { + case F_SETLKP: + error = -EINVAL; + if (flock.l_pid != 0) goto out; + + cmd = F_SETLK64; + file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_owner = (fl_owner_t)filp; break; - case F_UNLCK: - break; - default: + case F_SETLKPW: error = -EINVAL; - goto out; + if (flock.l_pid != 0) + goto out; + + cmd = F_SETLKW64; + file_lock->fl_flags |= FL_FILE_PVT; + file_lock->fl_owner = (fl_owner_t)filp; + /* Fallthrough */ + case F_SETLKW64: + file_lock->fl_flags |= FL_SLEEP; } error = do_lock_file_wait(filp, cmd, file_lock); @@ -2209,7 +2292,7 @@ EXPORT_SYMBOL(locks_remove_posix); /* * This function is called on the last close of an open file. */ -void locks_remove_flock(struct file *filp) +void locks_remove_file(struct file *filp) { struct inode * inode = file_inode(filp); struct file_lock *fl; @@ -2218,6 +2301,8 @@ void locks_remove_flock(struct file *filp) if (!inode->i_flock) return; + locks_remove_posix(filp, (fl_owner_t)filp); + if (filp->f_op->flock) { struct file_lock fl = { .fl_pid = current->tgid, @@ -2236,16 +2321,28 @@ void locks_remove_flock(struct file *filp) while ((fl = *before) != NULL) { if (fl->fl_file == filp) { - if (IS_FLOCK(fl)) { - locks_delete_lock(before); - continue; - } if (IS_LEASE(fl)) { lease_modify(before, F_UNLCK); continue; } - /* What? */ - BUG(); + + /* + * There's a leftover lock on the list of a type that + * we didn't expect to see. Most likely a classic + * POSIX lock that ended up not getting released + * properly, or that raced onto the list somehow. Log + * some info about it and then just remove it from + * the list. + */ + WARN(!IS_FLOCK(fl), + "leftover lock: dev=%u:%u ino=%lu type=%hhd flags=0x%x start=%lld end=%lld\n", + MAJOR(inode->i_sb->s_dev), + MINOR(inode->i_sb->s_dev), inode->i_ino, + fl->fl_type, fl->fl_flags, + fl->fl_start, fl->fl_end); + + locks_delete_lock(before); + continue; } before = &fl->fl_next; } @@ -2314,8 +2411,14 @@ static void lock_get_status(struct seq_file *f, struct file_lock *fl, seq_printf(f, "%lld:%s ", id, pfx); if (IS_POSIX(fl)) { - seq_printf(f, "%6s %s ", - (fl->fl_flags & FL_ACCESS) ? "ACCESS" : "POSIX ", + if (fl->fl_flags & FL_ACCESS) + seq_printf(f, "ACCESS"); + else if (IS_FILE_PVT(fl)) + seq_printf(f, "FLPVT "); + else + seq_printf(f, "POSIX "); + + seq_printf(f, " %s ", (inode == NULL) ? "*NOINODE*" : mandatory_lock(inode) ? "MANDATORY" : "ADVISORY "); } else if (IS_FLOCK(fl)) { @@ -2385,6 +2488,7 @@ static int locks_show(struct seq_file *f, void *v) } static void *locks_start(struct seq_file *f, loff_t *pos) + __acquires(&blocked_lock_lock) { struct locks_iterator *iter = f->private; @@ -2403,6 +2507,7 @@ static void *locks_next(struct seq_file *f, void *v, loff_t *pos) } static void locks_stop(struct seq_file *f, void *v) + __releases(&blocked_lock_lock) { spin_unlock(&blocked_lock_lock); lg_global_unlock(&file_lock_lglock); diff --git a/fs/namei.c b/fs/namei.c index c1178880f23c..88339f59efb5 100644 --- a/fs/namei.c +++ b/fs/namei.c @@ -2569,7 +2569,7 @@ static int handle_truncate(struct file *filp) /* * Refuse to truncate files with mandatory locks held on them. */ - error = locks_verify_locked(inode); + error = locks_verify_locked(filp); if (!error) error = security_path_truncate(path); if (!error) { diff --git a/include/linux/fs.h b/include/linux/fs.h index 215cb1b09f47..a877ed3f389f 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -893,6 +893,7 @@ static inline int file_check_writeable(struct file *filp) #define FL_SLEEP 128 /* A blocking lock */ #define FL_DOWNGRADE_PENDING 256 /* Lease is being downgraded */ #define FL_UNLOCK_PENDING 512 /* Lease is being broken */ +#define FL_FILE_PVT 1024 /* lock is private to the file */ /* * Special return value from posix_lock_file() and vfs_lock_file() for @@ -997,12 +998,12 @@ struct file_lock { extern void send_sigio(struct fown_struct *fown, int fd, int band); #ifdef CONFIG_FILE_LOCKING -extern int fcntl_getlk(struct file *, struct flock __user *); +extern int fcntl_getlk(struct file *, unsigned int, struct flock __user *); extern int fcntl_setlk(unsigned int, struct file *, unsigned int, struct flock __user *); #if BITS_PER_LONG == 32 -extern int fcntl_getlk64(struct file *, struct flock64 __user *); +extern int fcntl_getlk64(struct file *, unsigned int, struct flock64 __user *); extern int fcntl_setlk64(unsigned int, struct file *, unsigned int, struct flock64 __user *); #endif @@ -1017,7 +1018,7 @@ extern struct file_lock * locks_alloc_lock(void); extern void locks_copy_lock(struct file_lock *, struct file_lock *); extern void __locks_copy_lock(struct file_lock *, const struct file_lock *); extern void locks_remove_posix(struct file *, fl_owner_t); -extern void locks_remove_flock(struct file *); +extern void locks_remove_file(struct file *); extern void locks_release_private(struct file_lock *); extern void posix_test_lock(struct file *, struct file_lock *); extern int posix_lock_file(struct file *, struct file_lock *, struct file_lock *); @@ -1035,7 +1036,8 @@ extern int lease_modify(struct file_lock **, int); extern int lock_may_read(struct inode *, loff_t start, unsigned long count); extern int lock_may_write(struct inode *, loff_t start, unsigned long count); #else /* !CONFIG_FILE_LOCKING */ -static inline int fcntl_getlk(struct file *file, struct flock __user *user) +static inline int fcntl_getlk(struct file *file, unsigned int cmd, + struct flock __user *user) { return -EINVAL; } @@ -1047,7 +1049,8 @@ static inline int fcntl_setlk(unsigned int fd, struct file *file, } #if BITS_PER_LONG == 32 -static inline int fcntl_getlk64(struct file *file, struct flock64 __user *user) +static inline int fcntl_getlk64(struct file *file, unsigned int cmd, + struct flock64 __user *user) { return -EINVAL; } @@ -1088,7 +1091,7 @@ static inline void locks_remove_posix(struct file *filp, fl_owner_t owner) return; } -static inline void locks_remove_flock(struct file *filp) +static inline void locks_remove_file(struct file *filp) { return; } @@ -1916,6 +1919,11 @@ extern int current_umask(void); extern void ihold(struct inode * inode); extern void iput(struct inode *); +static inline struct inode *file_inode(struct file *f) +{ + return f->f_inode; +} + /* /sys/fs */ extern struct kobject *fs_kobj; @@ -1925,7 +1933,7 @@ extern struct kobject *fs_kobj; #define FLOCK_VERIFY_WRITE 2 #ifdef CONFIG_FILE_LOCKING -extern int locks_mandatory_locked(struct inode *); +extern int locks_mandatory_locked(struct file *); extern int locks_mandatory_area(int, struct inode *, struct file *, loff_t, size_t); /* @@ -1948,10 +1956,10 @@ static inline int mandatory_lock(struct inode *ino) return IS_MANDLOCK(ino) && __mandatory_lock(ino); } -static inline int locks_verify_locked(struct inode *inode) +static inline int locks_verify_locked(struct file *file) { - if (mandatory_lock(inode)) - return locks_mandatory_locked(inode); + if (mandatory_lock(file_inode(file))) + return locks_mandatory_locked(file); return 0; } @@ -1971,6 +1979,12 @@ static inline int locks_verify_truncate(struct inode *inode, static inline int break_lease(struct inode *inode, unsigned int mode) { + /* + * Since this check is lockless, we must ensure that any refcounts + * taken are done before checking inode->i_flock. Otherwise, we could + * end up racing with tasks trying to set a new lease on this file. + */ + smp_mb(); if (inode->i_flock) return __break_lease(inode, mode, FL_LEASE); return 0; @@ -2006,7 +2020,7 @@ static inline int break_deleg_wait(struct inode **delegated_inode) } #else /* !CONFIG_FILE_LOCKING */ -static inline int locks_mandatory_locked(struct inode *inode) +static inline int locks_mandatory_locked(struct file *file) { return 0; } @@ -2028,7 +2042,7 @@ static inline int mandatory_lock(struct inode *inode) return 0; } -static inline int locks_verify_locked(struct inode *inode) +static inline int locks_verify_locked(struct file *file) { return 0; } @@ -2302,11 +2316,6 @@ static inline bool execute_ok(struct inode *inode) return (inode->i_mode & S_IXUGO) || S_ISDIR(inode->i_mode); } -static inline struct inode *file_inode(struct file *f) -{ - return f->f_inode; -} - static inline void file_start_write(struct file *file) { if (!S_ISREG(file_inode(file)->i_mode)) diff --git a/include/uapi/asm-generic/fcntl.h b/include/uapi/asm-generic/fcntl.h index 95e46c8e05f9..a9b13f8b3595 100644 --- a/include/uapi/asm-generic/fcntl.h +++ b/include/uapi/asm-generic/fcntl.h @@ -132,6 +132,22 @@ #define F_GETOWNER_UIDS 17 #endif +/* + * fd "private" POSIX locks. + * + * Usually POSIX locks held by a process are released on *any* close and are + * not inherited across a fork(). + * + * These cmd values will set locks that conflict with normal POSIX locks, but + * are "owned" by the opened file, not the process. This means that they are + * inherited across fork() like BSD (flock) locks, and they are only released + * automatically when the last reference to the the open file against which + * they were acquired is put. + */ +#define F_GETLKP 36 +#define F_SETLKP 37 +#define F_SETLKPW 38 + #define F_OWNER_TID 0 #define F_OWNER_PID 1 #define F_OWNER_PGRP 2 @@ -186,8 +202,6 @@ struct flock { }; #endif -#ifndef CONFIG_64BIT - #ifndef HAVE_ARCH_STRUCT_FLOCK64 #ifndef __ARCH_FLOCK64_PAD #define __ARCH_FLOCK64_PAD @@ -202,6 +216,5 @@ struct flock64 { __ARCH_FLOCK64_PAD }; #endif -#endif /* !CONFIG_64BIT */ #endif /* _ASM_GENERIC_FCNTL_H */ diff --git a/mm/mmap.c b/mm/mmap.c index ac1d6671b1ed..46433e137abc 100644 --- a/mm/mmap.c +++ b/mm/mmap.c @@ -1299,7 +1299,7 @@ unsigned long do_mmap_pgoff(struct file *file, unsigned long addr, /* * Make sure there are no mandatory locks on the file. */ - if (locks_verify_locked(inode)) + if (locks_verify_locked(file)) return -EAGAIN; vm_flags |= VM_SHARED | VM_MAYSHARE; diff --git a/mm/nommu.c b/mm/nommu.c index 8740213b1647..a554e5a451cd 100644 --- a/mm/nommu.c +++ b/mm/nommu.c @@ -995,7 +995,7 @@ static int validate_mmap_request(struct file *file, (file->f_mode & FMODE_WRITE)) return -EACCES; - if (locks_verify_locked(file_inode(file))) + if (locks_verify_locked(file)) return -EAGAIN; if (!(capabilities & BDI_CAP_MAP_DIRECT)) diff --git a/security/selinux/hooks.c b/security/selinux/hooks.c index 869c2f1e0da1..b4beb77967b1 100644 --- a/security/selinux/hooks.c +++ b/security/selinux/hooks.c @@ -3317,6 +3317,9 @@ static int selinux_file_fcntl(struct file *file, unsigned int cmd, case F_GETLK: case F_SETLK: case F_SETLKW: + case F_GETLKP: + case F_SETLKP: + case F_SETLKPW: #if BITS_PER_LONG == 32 case F_GETLK64: case F_SETLK64: |