From 00b0c9b82663ac42e5a09f58ce960f81f29d64ee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Thu, 14 Dec 2017 21:27:55 -0500 Subject: Add primitives for manipulating bitfields both in host- and fixed-endian. The following primitives are defined in linux/bitfield.h: * u32 le32_get_bits(__le32 val, u32 field) extracts the contents of the bitfield specified by @field in little-endian 32bit object @val and converts it to host-endian. * void le32p_replace_bits(__le32 *p, u32 v, u32 field) replaces the contents of the bitfield specified by @field in little-endian 32bit object pointed to by @p with the value of @v. New value is given in host-endian and stored as little-endian. * __le32 le32_replace_bits(__le32 old, u32 v, u32 field) is equivalent to ({__le32 tmp = old; le32p_replace_bits(&tmp, v, field); tmp;}) In other words, instead of modifying an object in memory, it takes the initial value and returns the modified one. * __le32 le32_encode_bits(u32 v, u32 field) is equivalent to le32_replace_bits(0, v, field). In other words, it returns a little-endian 32bit object with the bitfield specified by @field containing the value of @v and all bits outside that bitfield being zero. Such set of helpers is defined for each of little-, big- and host-endian types; e.g. u64_get_bits(val, field) will return the contents of the bitfield specified by @field in host-endian 64bit object @val, etc. Of course, for host-endian no conversion is involved. Fields to access are specified as GENMASK() values - an N-bit field starting at bit #M is encoded as GENMASK(M + N - 1, M). Note that bit numbers refer to endianness of the object we are working with - e.g. GENMASK(11, 0) in __be16 refers to the second byte and the lower 4 bits of the first byte. In __le16 it would refer to the first byte and the lower 4 bits of the second byte, etc. Field specification must be a constant; __builtin_constant_p() doesn't have to be true for it, but compiler must be able to evaluate it at build time. If it cannot or if the value does not encode any bitfield, the build will fail. If the value being stored in a bitfield is a constant that does not fit into that bitfield, a warning will be generated at compile time. Signed-off-by: Al Viro --- include/linux/bitfield.h | 46 ++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 46 insertions(+) (limited to 'include') diff --git a/include/linux/bitfield.h b/include/linux/bitfield.h index 1030651f8309..cf2588d81148 100644 --- a/include/linux/bitfield.h +++ b/include/linux/bitfield.h @@ -16,6 +16,7 @@ #define _LINUX_BITFIELD_H #include +#include /* * Bitfield access macros @@ -103,4 +104,49 @@ (typeof(_mask))(((_reg) & (_mask)) >> __bf_shf(_mask)); \ }) +extern void __compiletime_warning("value doesn't fit into mask") +__field_overflow(void); +extern void __compiletime_error("bad bitfield mask") +__bad_mask(void); +static __always_inline u64 field_multiplier(u64 field) +{ + if ((field | (field - 1)) & ((field | (field - 1)) + 1)) + __bad_mask(); + return field & -field; +} +static __always_inline u64 field_mask(u64 field) +{ + return field / field_multiplier(field); +} +#define ____MAKE_OP(type,base,to,from) \ +static __always_inline __##type type##_encode_bits(base v, base field) \ +{ \ + if (__builtin_constant_p(v) && (v & ~field_multiplier(field))) \ + __field_overflow(); \ + return to((v & field_mask(field)) * field_multiplier(field)); \ +} \ +static __always_inline __##type type##_replace_bits(__##type old, \ + base val, base field) \ +{ \ + return (old & ~to(field)) | type##_encode_bits(val, field); \ +} \ +static __always_inline void type##p_replace_bits(__##type *p, \ + base val, base field) \ +{ \ + *p = (*p & ~to(field)) | type##_encode_bits(val, field); \ +} \ +static __always_inline base type##_get_bits(__##type v, base field) \ +{ \ + return (from(v) & field)/field_multiplier(field); \ +} +#define __MAKE_OP(size) \ + ____MAKE_OP(le##size,u##size,cpu_to_le##size,le##size##_to_cpu) \ + ____MAKE_OP(be##size,u##size,cpu_to_be##size,be##size##_to_cpu) \ + ____MAKE_OP(u##size,u##size,,) +__MAKE_OP(16) +__MAKE_OP(32) +__MAKE_OP(64) +#undef __MAKE_OP +#undef ____MAKE_OP + #endif -- cgit v1.2.3 From f1ee616214cb22410e939d963bbb2349c2570f02 Mon Sep 17 00:00:00 2001 From: NeilBrown Date: Thu, 21 Dec 2017 09:45:40 +1100 Subject: VFS: don't keep disconnected dentries on d_anon The original purpose of the per-superblock d_anon list was to keep disconnected dentries in the cache between consecutive requests to the NFS server. Dentries can be disconnected if a client holds a file open and repeatedly performs IO on it, and if the server drops the dentry, whether due to memory pressure, server restart, or "echo 3 > /proc/sys/vm/drop_caches". This purpose was thwarted by commit 75a6f82a0d10 ("freeing unlinked file indefinitely delayed") which caused disconnected dentries to be freed as soon as their refcount reached zero. This means that, when a dentry being used by nfsd gets disconnected, a new one needs to be allocated for every request (unless requests overlap). As the dentry has no name, no parent, and no children, there is little of value to cache. As small memory allocations are typically fast (from per-cpu free lists) this likely has little cost. This means that the original purpose of s_anon is no longer relevant: there is no longer any need to keep disconnected dentries on a list so they appear to be hashed. However, s_anon now has a new use. When you mount an NFS filesystem, the dentry stored in s_root is just a placebo. The "real" root dentry is allocated using d_obtain_root() and so it kept on the s_anon list. I don't know the reason for this, but suspect it related to NFSv4 where a mount of "server:/some/path" require NFS to look up the root filehandle on the server, then walk down "/some" and "/path" to get the filehandle to mount. Whatever the reason, NFS depends on the s_anon list and on shrink_dcache_for_umount() pruning all dentries on this list. So we cannot simply remove s_anon. We could just leave the code unchanged, but apart from that being potentially confusing, the (unfair) bit-spin-lock which protects s_anon can become a bottle neck when lots of disconnected dentries are being created. So this patch renames s_anon to s_roots, and stops storing disconnected dentries on the list. Only dentries obtained with d_obtain_root() are now stored on this list. There are many fewer of these (only NFS and NILFS2 use the call, and only during filesystem mount) so contention on the bit-lock will not be a problem. Possibly an alternate solution should be found for NFS and NILFS2, but that would require understanding their needs first. Signed-off-by: NeilBrown Signed-off-by: Al Viro --- include/linux/fs.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 2995a271ec46..6276f8315e5b 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1359,7 +1359,7 @@ struct super_block { const struct fscrypt_operations *s_cop; - struct hlist_bl_head s_anon; /* anonymous dentries for (nfs) exporting */ + struct hlist_bl_head s_roots; /* alternate root dentries for NFS */ struct list_head s_mounts; /* list of mounts; _not_ for fs use */ struct block_device *s_bdev; struct backing_dev_info *s_bdi; -- cgit v1.2.3 From 7d815165c1a64da9fd1b0f4ac8d97ba938ff1d71 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Jan 2018 09:45:42 -0800 Subject: eventfd: convert to use anon_inode_getfd() Nothing actually calls eventfd_file_create() besides the eventfd2() system call itself. So simplify things by folding it into the system call and using anon_inode_getfd() instead of anon_inode_getfile(). This removes over 40 lines with no change in functionality. (eventfd_file_create() was apparently added years ago for KVM irqfd's, but was never used.) Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- include/linux/eventfd.h | 5 ----- 1 file changed, 5 deletions(-) (limited to 'include') diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 60b2985e8a18..15826192cc23 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -30,7 +30,6 @@ struct file; #ifdef CONFIG_EVENTFD -struct file *eventfd_file_create(unsigned int count, int flags); struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx); void eventfd_ctx_put(struct eventfd_ctx *ctx); struct file *eventfd_fget(int fd); @@ -47,10 +46,6 @@ int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *w * Ugly ugly ugly error layer to support modules that uses eventfd but * pretend to work in !CONFIG_EVENTFD configurations. Namely, AIO. */ -static inline struct file *eventfd_file_create(unsigned int count, int flags) -{ - return ERR_PTR(-ENOSYS); -} static inline struct eventfd_ctx *eventfd_ctx_fdget(int fd) { -- cgit v1.2.3 From b6364572d641c8eba9eab9bcc31d8962f96ddf15 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Jan 2018 09:45:43 -0800 Subject: eventfd: fold eventfd_ctx_read() into eventfd_read() eventfd_ctx_read() is not used outside of eventfd.c, so unexport it and fold it into eventfd_read(). This slightly simplifies the code and makes it more analogous to eventfd_write(). (eventfd_ctx_read() was apparently added years ago for KVM irqfd's, but was never used.) Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- include/linux/eventfd.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include') diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 15826192cc23..566fef14d0a6 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -36,7 +36,6 @@ struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); struct eventfd_ctx *eventfd_ctx_fileget(struct file *file); __u64 eventfd_signal(struct eventfd_ctx *ctx, __u64 n); -ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, __u64 *cnt); int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt); @@ -62,12 +61,6 @@ static inline void eventfd_ctx_put(struct eventfd_ctx *ctx) } -static inline ssize_t eventfd_ctx_read(struct eventfd_ctx *ctx, int no_wait, - __u64 *cnt) -{ - return -ENOSYS; -} - static inline int eventfd_ctx_remove_wait_queue(struct eventfd_ctx *ctx, wait_queue_entry_t *wait, __u64 *cnt) { -- cgit v1.2.3 From 105f2b7096075eacb6d2c83a6e00b652c2951063 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Sat, 6 Jan 2018 09:45:44 -0800 Subject: eventfd: fold eventfd_ctx_get() into eventfd_ctx_fileget() eventfd_ctx_get() is not used outside of eventfd.c, so unexport it and fold it into eventfd_ctx_fileget(). (eventfd_ctx_get() was apparently added years ago for KVM irqfd's, but was never used.) Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- include/linux/eventfd.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/eventfd.h b/include/linux/eventfd.h index 566fef14d0a6..7094718b653b 100644 --- a/include/linux/eventfd.h +++ b/include/linux/eventfd.h @@ -26,11 +26,11 @@ #define EFD_SHARED_FCNTL_FLAGS (O_CLOEXEC | O_NONBLOCK) #define EFD_FLAGS_SET (EFD_SHARED_FCNTL_FLAGS | EFD_SEMAPHORE) +struct eventfd_ctx; struct file; #ifdef CONFIG_EVENTFD -struct eventfd_ctx *eventfd_ctx_get(struct eventfd_ctx *ctx); void eventfd_ctx_put(struct eventfd_ctx *ctx); struct file *eventfd_fget(int fd); struct eventfd_ctx *eventfd_ctx_fdget(int fd); -- cgit v1.2.3 From 50fd2f298bef9d1f69ac755f1fdf70cd98746be2 Mon Sep 17 00:00:00 2001 From: Al Viro Date: Sun, 7 Jan 2018 13:06:15 -0500 Subject: new primitive: vmemdup_user() similar to memdup_user(), but does *not* guarantee that result will be physically contiguous; use only in cases where that's not a requirement and free it with kvfree(). Signed-off-by: Al Viro --- include/linux/string.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/string.h b/include/linux/string.h index 410ecf17de3c..12d5429de0c8 100644 --- a/include/linux/string.h +++ b/include/linux/string.h @@ -11,6 +11,7 @@ extern char *strndup_user(const char __user *, long); extern void *memdup_user(const void __user *, size_t); +extern void *vmemdup_user(const void __user *, size_t); extern void *memdup_user_nul(const void __user *, size_t); /* -- cgit v1.2.3 From e1fc742e14e01d84d9693c4aca4ab23da65811fb Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?J=C3=BCrg=20Billeter?= Date: Fri, 29 Sep 2017 14:07:17 +0200 Subject: fs: add RWF_APPEND MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is the per-I/O equivalent of O_APPEND to support atomic append operations on any open file. If a file is opened with O_APPEND, pwrite() ignores the offset and always appends data to the end of the file. RWF_APPEND enables atomic append and pwrite() with offset on a single file descriptor. Signed-off-by: Jürg Billeter Signed-off-by: Al Viro --- include/linux/fs.h | 2 ++ include/uapi/linux/fs.h | 6 +++++- 2 files changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 6276f8315e5b..85c8ddc55760 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -3224,6 +3224,8 @@ static inline int kiocb_set_rw_flags(struct kiocb *ki, rwf_t flags) ki->ki_flags |= IOCB_DSYNC; if (flags & RWF_SYNC) ki->ki_flags |= (IOCB_DSYNC | IOCB_SYNC); + if (flags & RWF_APPEND) + ki->ki_flags |= IOCB_APPEND; return 0; } diff --git a/include/uapi/linux/fs.h b/include/uapi/linux/fs.h index 4199f8acbce5..d2a8313fabd7 100644 --- a/include/uapi/linux/fs.h +++ b/include/uapi/linux/fs.h @@ -377,7 +377,11 @@ typedef int __bitwise __kernel_rwf_t; /* per-IO, return -EAGAIN if operation would block */ #define RWF_NOWAIT ((__force __kernel_rwf_t)0x00000008) +/* per-IO O_APPEND */ +#define RWF_APPEND ((__force __kernel_rwf_t)0x00000010) + /* mask of flags supported by the kernel */ -#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT) +#define RWF_SUPPORTED (RWF_HIPRI | RWF_DSYNC | RWF_SYNC | RWF_NOWAIT |\ + RWF_APPEND) #endif /* _UAPI_LINUX_FS_H */ -- cgit v1.2.3 From 4bfd054ae11ea061685c4a2a6234fdc8e92fad41 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 16 Jan 2018 21:44:24 -0800 Subject: fs: fold __inode_permission() into inode_permission() Since commit 9c630ebefeee ("ovl: simplify permission checking"), overlayfs doesn't call __inode_permission() anymore, which leaves no users other than inode_permission(). So just fold it back into inode_permission(). Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/fs.h b/include/linux/fs.h index 85c8ddc55760..b49251112add 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2699,7 +2699,6 @@ extern sector_t bmap(struct inode *, sector_t); #endif extern int notify_change(struct dentry *, struct iattr *, struct inode **); extern int inode_permission(struct inode *, int); -extern int __inode_permission(struct inode *, int); extern int generic_permission(struct inode *, int); extern int __check_sticky(struct inode *dir, struct inode *inode); -- cgit v1.2.3 From 01950a349ec254f28bf9ad06e74a166521d213e1 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Tue, 16 Jan 2018 22:25:12 -0800 Subject: fs/buffer.c: fold init_buffer() into init_page_buffers() Since commit e76004093db1 ("fs/buffer.c: remove unnecessary init operation after allocating buffer_head"), there are no callers of init_buffer() outside of init_page_buffers(). So just fold it into init_page_buffers(). Signed-off-by: Eric Biggers Signed-off-by: Al Viro --- include/linux/buffer_head.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index 8b1bf8d3d4a2..58a82f58e44e 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -151,7 +151,6 @@ void buffer_check_dirty_writeback(struct page *page, void mark_buffer_dirty(struct buffer_head *bh); void mark_buffer_write_io_error(struct buffer_head *bh); -void init_buffer(struct buffer_head *, bh_end_io_t *, void *); void touch_buffer(struct buffer_head *bh); void set_bh_page(struct buffer_head *bh, struct page *page, unsigned long offset); -- cgit v1.2.3