From 4cef191d05871fbc8bb3b812880e1997e855a3b9 Mon Sep 17 00:00:00 2001 From: Jaedon Shin Date: Fri, 25 Mar 2016 12:46:54 +0900 Subject: net: phy: bcm7xxx: Add entries for Broadcom BCM7346 and BCM7362 Add PHY entries for the Broadcom BCM7346 and BCM7362 chips, these are 40nm generation Ethernet PHY. Fixes: 815717d1473e ("net: phy: bcm7xxx: Remove wildcard entries") Signed-off-by: Jaedon Shin Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index f0ba9c2ec639..e3354b74286c 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -24,6 +24,8 @@ #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 +#define PHY_ID_BCM7346 0x600d8650 +#define PHY_ID_BCM7362 0x600d84b0 #define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7435 0x600d8750 -- cgit v1.2.3 From 96c22a3293512ba684e73a981196430f524689da Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 23 Mar 2016 14:14:48 +0200 Subject: configfs: fix CONFIGFS_BIN_ATTR_[RW]O definitions The type should be struct configfs_bin_attribute and not struct configfs_attribute. Signed-off-by: Octavian Purdila Signed-off-by: Christoph Hellwig --- include/linux/configfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 485fe5519448..d9d6a9d77489 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -188,7 +188,7 @@ static struct configfs_bin_attribute _pfx##attr_##_name = { \ } #define CONFIGFS_BIN_ATTR_RO(_pfx, _name, _priv, _maxsz) \ -static struct configfs_attribute _pfx##attr_##_name = { \ +static struct configfs_bin_attribute _pfx##attr_##_name = { \ .cb_attr = { \ .ca_name = __stringify(_name), \ .ca_mode = S_IRUGO, \ @@ -200,7 +200,7 @@ static struct configfs_attribute _pfx##attr_##_name = { \ } #define CONFIGFS_BIN_ATTR_WO(_pfx, _name, _priv, _maxsz) \ -static struct configfs_attribute _pfx##attr_##_name = { \ +static struct configfs_bin_attribute _pfx##attr_##_name = { \ .cb_attr = { \ .ca_name = __stringify(_name), \ .ca_mode = S_IWUSR, \ -- cgit v1.2.3 From d101a125954eae1d397adda94ca6319485a50493 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sat, 26 Mar 2016 16:14:37 -0400 Subject: fs: add file_dentry() This series fixes bugs in nfs and ext4 due to 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay"). Regular files opened on overlayfs will result in the file being opened on the underlying filesystem, while f_path points to the overlayfs mount/dentry. This confuses filesystems which get the dentry from struct file and assume it's theirs. Add a new helper, file_dentry() [*], to get the filesystem's own dentry from the file. This checks file->f_path.dentry->d_flags against DCACHE_OP_REAL, and returns file->f_path.dentry if DCACHE_OP_REAL is not set (this is the common, non-overlayfs case). In the uncommon case it will call into overlayfs's ->d_real() to get the underlying dentry, matching file_inode(file). The reason we need to check against the inode is that if the file is copied up while being open, d_real() would return the upper dentry, while the open file comes from the lower dentry. [*] If possible, it's better simply to use file_inode() instead. Signed-off-by: Miklos Szeredi Signed-off-by: Theodore Ts'o Tested-by: Goldwyn Rodrigues Reviewed-by: Trond Myklebust Cc: # v4.2 Cc: David Howells Cc: Al Viro Cc: Daniel Axtens --- include/linux/dcache.h | 10 ++++++++++ include/linux/fs.h | 10 ++++++++++ 2 files changed, 20 insertions(+) (limited to 'include/linux') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7cb043d8f4e8..4bb4de8d95ea 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -161,6 +161,7 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); struct inode *(*d_select_inode)(struct dentry *, unsigned); + struct dentry *(*d_real)(struct dentry *, struct inode *); } ____cacheline_aligned; /* @@ -229,6 +230,7 @@ struct dentry_operations { #define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */ #define DCACHE_ENCRYPTED_WITH_KEY 0x04000000 /* dir is encrypted with a valid key */ +#define DCACHE_OP_REAL 0x08000000 extern seqlock_t rename_lock; @@ -555,4 +557,12 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) return upper; } +static inline struct dentry *d_real(struct dentry *dentry) +{ + if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) + return dentry->d_op->d_real(dentry, NULL); + else + return dentry; +} + #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 35d99266ca9a..b2ed2311b021 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1241,6 +1241,16 @@ static inline struct inode *file_inode(const struct file *f) return f->f_inode; } +static inline struct dentry *file_dentry(const struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + + if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) + return dentry->d_op->d_real(dentry, file_inode(file)); + else + return dentry; +} + static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { return locks_lock_inode_wait(file_inode(filp), fl); -- cgit v1.2.3 From 596cf3fe5854fe2b1703b0466ed6bf9cfb83c91e Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Wed, 16 Mar 2016 21:49:00 +0100 Subject: netfilter: ipset: fix race condition in ipset save, swap and delete This fix adds a new reference counter (ref_netlink) for the struct ip_set. The other reference counter (ref) can be swapped out by ip_set_swap and we need a separate counter to keep track of references for netlink events like dump. Using the same ref counter for dump causes a race condition which can be demonstrated by the following script: ipset create hash_ip1 hash:ip family inet hashsize 1024 maxelem 500000 \ counters ipset create hash_ip2 hash:ip family inet hashsize 300000 maxelem 500000 \ counters ipset create hash_ip3 hash:ip family inet hashsize 1024 maxelem 500000 \ counters ipset save & ipset swap hash_ip3 hash_ip2 ipset destroy hash_ip3 /* will crash the machine */ Swap will exchange the values of ref so destroy will see ref = 0 instead of ref = 1. With this fix in place swap will not succeed because ipset save still has ref_netlink on the set (ip_set_swap doesn't swap ref_netlink). Both delete and swap will error out if ref_netlink != 0 on the set. Note: The changes to *_head functions is because previously we would increment ref whenever we called these functions, we don't do that anymore. Reviewed-by: Joshua Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 0e1f433cc4b7..f48b8a664b0f 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -234,6 +234,10 @@ struct ip_set { spinlock_t lock; /* References to the set */ u32 ref; + /* References to the set for netlink events like dump, + * ref can be swapped out by ip_set_swap + */ + u32 ref_netlink; /* The core set type */ struct ip_set_type *type; /* The type variant doing the real job */ -- cgit v1.2.3 From fc0c2028135c7f75fce36b90e44efb8003a9173b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Mar 2016 10:30:19 -0800 Subject: x86, pmem: use memcpy_mcsafe() for memcpy_from_pmem() Update the definition of memcpy_from_pmem() to return 0 or a negative error code. Implement x86/arch_memcpy_from_pmem() with memcpy_mcsafe(). Cc: Borislav Petkov Cc: Tony Luck Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Acked-by: Ingo Molnar Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- include/linux/pmem.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 3ec5309e29f3..ac6d872ce067 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -42,6 +42,13 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, BUG(); } +static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, + size_t n) +{ + BUG(); + return -EFAULT; +} + static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, struct iov_iter *i) { @@ -66,14 +73,17 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) #endif /* - * Architectures that define ARCH_HAS_PMEM_API must provide - * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(), - * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem() - * and arch_has_wmb_pmem(). + * memcpy_from_pmem - read from persistent memory with error handling + * @dst: destination buffer + * @src: source buffer + * @size: transfer length + * + * Returns 0 on success negative error code on failure. */ -static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) +static inline int memcpy_from_pmem(void *dst, void __pmem const *src, + size_t size) { - memcpy(dst, (void __force const *) src, size); + return arch_memcpy_from_pmem(dst, src, size); } static inline bool arch_has_pmem_api(void) -- cgit v1.2.3 From 5acba71e18833b9d06686b3751598bfa263a3ac3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Mar 2016 15:37:59 +0100 Subject: locking/atomic: Introduce atomic_fetch_or() This is deemed to replace the type generic fetch_or() which brings a lot of issues such as macro induced block variable aliasing and sloppy types. Not to mention fetch_or() doesn't refer to any namespace, adding even more confusion. So lets provide an atomic_t version. Current and next users of fetch_or() are thus encouraged to use atomic_t. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1458830281-4255-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index df4f369254c0..3d64c0852164 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -558,6 +558,27 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +/** + * atomic_fetch_or - perform *p |= mask and return old value of *p + * @p: pointer to atomic_t + * @mask: mask to OR on the atomic_t + */ +#ifndef atomic_fetch_or +static inline int atomic_fetch_or(atomic_t *p, int mask) +{ + int old, val = atomic_read(p); + + for (;;) { + old = atomic_cmpxchg(p, val, val | mask); + if (old == val) + break; + val = old; + } + + return old; +} +#endif + /** * fetch_or - perform *ptr |= mask and return old value of *ptr * @ptr: pointer to value -- cgit v1.2.3 From f009a7a767e792d5ab0b46c08d46236ea5271dd9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Mar 2016 15:38:00 +0100 Subject: timers/nohz: Convert tick dependency mask to atomic_t The tick dependency mask was intially unsigned long because this is the type on which clear_bit() operates on and fetch_or() accepts it. But now that we have atomic_fetch_or(), we can instead use atomic_andnot() to clear the bit. This consolidates the type of our tick dependency mask, reduce its size on structures and benefit from possible architecture optimizations on atomic_t operations. Suggested-by: Linus Torvalds Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1458830281-4255-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 60bba7e032dc..52c4847b05e2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -720,7 +720,7 @@ struct signal_struct { struct task_cputime cputime_expires; #ifdef CONFIG_NO_HZ_FULL - unsigned long tick_dep_mask; + atomic_t tick_dep_mask; #endif struct list_head cpu_timers[3]; @@ -1549,7 +1549,7 @@ struct task_struct { #endif #ifdef CONFIG_NO_HZ_FULL - unsigned long tick_dep_mask; + atomic_t tick_dep_mask; #endif unsigned long nvcsw, nivcsw; /* context switch counts */ u64 start_time; /* monotonic time in nsec */ -- cgit v1.2.3 From 5529578a27288d11d4d15635c258c6dde0f0fb10 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Mar 2016 15:38:01 +0100 Subject: locking/atomic, sched: Unexport fetch_or() This patch functionally reverts: 5fd7a09cfb8c ("atomic: Export fetch_or()") During the merge Linus observed that the generic version of fetch_or() was messy: " This makes the ugly "fetch_or()" macro that the scheduler used internally a new generic helper, and does a bad job at it. " e23604edac2a Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Now that we have introduced atomic_fetch_or(), fetch_or() is only used by the scheduler in order to deal with thread_info flags which type can vary across architectures. Lets confine fetch_or() back to the scheduler so that we encourage future users to use the more robust and well typed atomic_t version instead. While at it, fetch_or() gets robustified, pasting improvements from a previous patch by Ingo Molnar that avoids needless expression re-evaluations in the loop. Reported-by: Linus Torvalds Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1458830281-4255-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include/linux') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 3d64c0852164..506c3531832e 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -579,27 +579,6 @@ static inline int atomic_fetch_or(atomic_t *p, int mask) } #endif -/** - * fetch_or - perform *ptr |= mask and return old value of *ptr - * @ptr: pointer to value - * @mask: mask to OR on the value - * - * cmpxchg based fetch_or, macro so it works for different integer types - */ -#ifndef fetch_or -#define fetch_or(ptr, mask) \ -({ typeof(*(ptr)) __old, __val = *(ptr); \ - for (;;) { \ - __old = cmpxchg((ptr), __val, __val | (mask)); \ - if (__old == __val) \ - break; \ - __val = __old; \ - } \ - __old; \ -}) -#endif - - #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif -- cgit v1.2.3 From 5a5abb1fa3b05dd6aa821525832644c1e7d2905f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Mar 2016 02:13:18 +0200 Subject: tun, bpf: fix suspicious RCU usage in tun_{attach, detach}_filter Sasha Levin reported a suspicious rcu_dereference_protected() warning found while fuzzing with trinity that is similar to this one: [ 52.765684] net/core/filter.c:2262 suspicious rcu_dereference_protected() usage! [ 52.765688] other info that might help us debug this: [ 52.765695] rcu_scheduler_active = 1, debug_locks = 1 [ 52.765701] 1 lock held by a.out/1525: [ 52.765704] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 [ 52.765721] stack backtrace: [ 52.765728] CPU: 1 PID: 1525 Comm: a.out Not tainted 4.5.0+ #264 [...] [ 52.765768] Call Trace: [ 52.765775] [] dump_stack+0x85/0xc8 [ 52.765784] [] lockdep_rcu_suspicious+0xd5/0x110 [ 52.765792] [] sk_detach_filter+0x82/0x90 [ 52.765801] [] tun_detach_filter+0x35/0x90 [tun] [ 52.765810] [] __tun_chr_ioctl+0x354/0x1130 [tun] [ 52.765818] [] ? selinux_file_ioctl+0x130/0x210 [ 52.765827] [] tun_chr_ioctl+0x13/0x20 [tun] [ 52.765834] [] do_vfs_ioctl+0x96/0x690 [ 52.765843] [] ? security_file_ioctl+0x43/0x60 [ 52.765850] [] SyS_ioctl+0x79/0x90 [ 52.765858] [] do_syscall_64+0x62/0x140 [ 52.765866] [] entry_SYSCALL64_slow_path+0x25/0x25 Same can be triggered with PROVE_RCU (+ PROVE_RCU_REPEATEDLY) enabled from tun_attach_filter() when user space calls ioctl(tun_fd, TUN{ATTACH, DETACH}FILTER, ...) for adding/removing a BPF filter on tap devices. Since the fix in f91ff5b9ff52 ("net: sk_{detach|attach}_filter() rcu fixes") sk_attach_filter()/sk_detach_filter() now dereferences the filter with rcu_dereference_protected(), checking whether socket lock is held in control path. Since its introduction in 994051625981 ("tun: socket filter support"), tap filters are managed under RTNL lock from __tun_chr_ioctl(). Thus the sock_owned_by_user(sk) doesn't apply in this specific case and therefore triggers the false positive. Extend the BPF API with __sk_attach_filter()/__sk_detach_filter() pair that is used by tap filters and pass in lockdep_rtnl_is_held() for the rcu_dereference_protected() checks instead. Reported-by: Sasha Levin Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include/linux') diff --git a/include/linux/filter.h b/include/linux/filter.h index 43aa1f8855c7..a51a5361695f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -465,10 +465,14 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, + bool locked); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); +int __sk_detach_filter(struct sock *sk, bool locked); + int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); -- cgit v1.2.3 From d7e944c8ddc0983640a9a32868fb217485d12ca2 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 1 Apr 2016 09:07:15 +0200 Subject: Revert "stmmac: Fix 'eth0: No PHY found' regression" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 88f8b1bb41c6208f81b6a480244533ded7b59493. due to problems on GeekBox and Banana Pi M1 board when connected to a real transceiver instead of a switch via fixed-link. Signed-off-by: Giuseppe Cavallaro Cc: Gabriel Fernandez Cc: Andreas Färber Cc: Frank Schäfer Cc: Dinh Nguyen Cc: David S. Miller Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 4bcf5a61aada..6e53fa8942a4 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -114,7 +114,6 @@ struct plat_stmmacenet_data { int interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; - struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; int has_gmac; -- cgit v1.2.3 From a7657f128c279ae5796ab2ca7d04a7819f4259f0 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 1 Apr 2016 09:07:16 +0200 Subject: stmmac: fix MDIO settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initially the phy_bus_name was added to manipulate the driver name but it was recently just used to manage the fixed-link and then to take some decision at run-time. So the patch uses the is_pseudo_fixed_link and removes the phy_bus_name variable not necessary anymore. The driver can manage the mdio registration by using phy-handle, dwmac-mdio and own parameter e.g. snps,phy-addr. This patch takes care about all these possible configurations and fixes the mdio registration in case of there is a real transceiver or a switch (that needs to be managed by using fixed-link). Signed-off-by: Giuseppe Cavallaro Reviewed-by: Andreas Färber Tested-by: Frank Schäfer Cc: Gabriel Fernandez Cc: Dinh Nguyen Cc: David S. Miller Cc: Phil Reid Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 6e53fa8942a4..e6bc30a42a74 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -108,12 +108,12 @@ struct stmmac_axi { }; struct plat_stmmacenet_data { - char *phy_bus_name; int bus_id; int phy_addr; int interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; + struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; int has_gmac; -- cgit v1.2.3 From 969e8d7e47f93ef693028667480558de8f70523f Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Fri, 1 Apr 2016 14:31:17 -0700 Subject: include/linux/huge_mm.h: return NULL instead of false for pmd_trans_huge_lock() The return value of pmd_trans_huge_lock() is a pointer, not a boolean value, so use NULL instead of false as the return value. Signed-off-by: Chen Gang Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 79b0ef6aaa14..7008623e24b1 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -127,7 +127,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else - return false; + return NULL; } static inline int hpage_nr_pages(struct page *page) { -- cgit v1.2.3 From 09cbfeaf1a5a67bfb3201e0c83c810cecb2efa5a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 1 Apr 2016 15:29:47 +0300 Subject: mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> ; - >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> ; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/bio.h | 2 +- include/linux/blkdev.h | 2 +- include/linux/buffer_head.h | 4 ++-- include/linux/ceph/libceph.h | 4 ++-- include/linux/f2fs_fs.h | 4 ++-- include/linux/fs.h | 4 ++-- include/linux/nfs_page.h | 2 +- include/linux/pagemap.h | 14 +++++++------- include/linux/swap.h | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include/linux') diff --git a/include/linux/bio.h b/include/linux/bio.h index 88bc64f00bb5..6b7481f62218 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -41,7 +41,7 @@ #endif #define BIO_MAX_PAGES 256 -#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_CACHE_SHIFT) +#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_SHIFT) #define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9) /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7e5d7e018bea..669e419d6234 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1372,7 +1372,7 @@ unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); static inline void put_dev_sector(Sector p) { - page_cache_release(p.v); + put_page(p.v); } static inline bool __bvec_gap_to_prev(struct request_queue *q, diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c67f052cc5e5..d48daa3f6f20 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -43,7 +43,7 @@ enum bh_state_bits { */ }; -#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512) +#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512) struct page; struct buffer_head; @@ -263,7 +263,7 @@ void buffer_init(void); static inline void attach_page_buffers(struct page *page, struct buffer_head *head) { - page_cache_get(page); + get_page(page); SetPagePrivate(page); set_page_private(page, (unsigned long)head); } diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index e7975e4681e1..db92a8d4926e 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -176,8 +176,8 @@ extern void ceph_put_snap_context(struct ceph_snap_context *sc); */ static inline int calc_pages_for(u64 off, u64 len) { - return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - - (off >> PAGE_CACHE_SHIFT); + return ((off+len+PAGE_SIZE-1) >> PAGE_SHIFT) - + (off >> PAGE_SHIFT); } extern struct kmem_cache *ceph_inode_cachep; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 9eb215a155e0..b90e9bdbd1dd 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -262,7 +262,7 @@ struct f2fs_node { /* * For NAT entries */ -#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry)) +#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ @@ -282,7 +282,7 @@ struct f2fs_nat_block { * Not allow to change this. */ #define SIT_VBLOCK_MAP_SIZE 64 -#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) +#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) /* * Note that f2fs_sit_entry->vblocks has the following bit-field information. diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..304991a80e23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -929,7 +929,7 @@ static inline struct file *get_file(struct file *f) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) +#define MAX_LFS_FILESIZE (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) #elif BITS_PER_LONG==64 #define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) #endif @@ -2067,7 +2067,7 @@ extern int generic_update_time(struct inode *, struct timespec *, int); /* /sys/fs */ extern struct kobject *fs_kobj; -#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) +#define MAX_RW_COUNT (INT_MAX & PAGE_MASK) #ifdef CONFIG_MANDATORY_FILE_LOCKING extern int locks_mandatory_locked(struct file *); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index f2f650f136ee..efada239205e 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -184,7 +184,7 @@ nfs_list_entry(struct list_head *head) static inline loff_t req_offset(struct nfs_page *req) { - return (((loff_t)req->wb_index) << PAGE_CACHE_SHIFT) + req->wb_offset; + return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset; } #endif /* _LINUX_NFS_PAGE_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 1ebd65c91422..f396ccb900cc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -390,13 +390,13 @@ static inline pgoff_t page_to_pgoff(struct page *page) return page->index << compound_order(page); if (likely(!PageTransTail(page))) - return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + return page->index; /* * We don't initialize ->index for tail pages: calculate based on * head page */ - pgoff = compound_head(page)->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + pgoff = compound_head(page)->index; pgoff += page - compound_head(page); return pgoff; } @@ -406,12 +406,12 @@ static inline pgoff_t page_to_pgoff(struct page *page) */ static inline loff_t page_offset(struct page *page) { - return ((loff_t)page->index) << PAGE_CACHE_SHIFT; + return ((loff_t)page->index) << PAGE_SHIFT; } static inline loff_t page_file_offset(struct page *page) { - return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT; + return ((loff_t)page_file_index(page)) << PAGE_SHIFT; } extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, @@ -425,7 +425,7 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, return linear_hugepage_index(vma, address); pgoff = (address - vma->vm_start) >> PAGE_SHIFT; pgoff += vma->vm_pgoff; - return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT); + return pgoff; } extern void __lock_page(struct page *page); @@ -671,8 +671,8 @@ static inline int add_to_page_cache(struct page *page, static inline unsigned long dir_pages(struct inode *inode) { - return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> + PAGE_SHIFT; } #endif /* _LINUX_PAGEMAP_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index d18b65c53dbb..3d980ea1c946 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -435,7 +435,7 @@ struct backing_dev_info; /* only sparc can not include linux/pagemap.h in this file * so leave page_cache_release and release_pages undeclared... */ #define free_page_and_swap_cache(page) \ - page_cache_release(page) + put_page(page) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr), false); -- cgit v1.2.3 From ea1754a084760e68886f5b725c8eaada9cc57155 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 1 Apr 2016 15:29:48 +0300 Subject: mm, fs: remove remaining PAGE_CACHE_* and page_cache_{get,release} usage Mostly direct substitution with occasional adjustment or removing outdated comments. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/backing-dev-defs.h | 2 +- include/linux/mm.h | 2 +- include/linux/mm_types.h | 2 +- include/linux/nfs_page.h | 4 ++-- include/linux/nilfs2_fs.h | 4 ++-- include/linux/pagemap.h | 3 +-- include/linux/sunrpc/svc.h | 2 +- include/linux/swap.h | 2 +- 8 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include/linux') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 1b4d69f68c33..3f103076d0bf 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -135,7 +135,7 @@ struct bdi_writeback { struct backing_dev_info { struct list_head bdi_list; - unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ + unsigned long ra_pages; /* max readahead in PAGE_SIZE units */ unsigned int capabilities; /* Device capabilities */ congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ diff --git a/include/linux/mm.h b/include/linux/mm.h index ed6407d1b7b5..ffcff53e3b2b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -623,7 +623,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, * * A page may belong to an inode's memory mapping. In this case, page->mapping * is the pointer to the inode, and page->index is the file offset of the page, - * in units of PAGE_CACHE_SIZE. + * in units of PAGE_SIZE. * * If pagecache pages are not associated with an inode, they are said to be * anonymous pages. These may become associated with the swapcache, and in that diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 944b2b37313b..c2d75b4fa86c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -341,7 +341,7 @@ struct vm_area_struct { /* Information about our backing store: */ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE - units, *not* PAGE_CACHE_SIZE */ + units */ struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index efada239205e..957049f72290 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -41,8 +41,8 @@ struct nfs_page { struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ struct nfs_lock_context *wb_lock_context; /* lock context info */ - pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */ - unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ + pgoff_t wb_index; /* Offset >> PAGE_SHIFT */ + unsigned int wb_offset, /* Offset & ~PAGE_MASK */ wb_pgbase, /* Start of page data */ wb_bytes; /* Length of request */ struct kref wb_kref; /* reference count */ diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 9abb763e4b86..e9fcf90b270d 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -331,7 +331,7 @@ static inline unsigned nilfs_rec_len_from_disk(__le16 dlen) { unsigned len = le16_to_cpu(dlen); -#if !defined(__KERNEL__) || (PAGE_CACHE_SIZE >= 65536) +#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536) if (len == NILFS_MAX_REC_LEN) return 1 << 16; #endif @@ -340,7 +340,7 @@ static inline unsigned nilfs_rec_len_from_disk(__le16 dlen) static inline __le16 nilfs_rec_len_to_disk(unsigned len) { -#if !defined(__KERNEL__) || (PAGE_CACHE_SIZE >= 65536) +#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536) if (len == (1 << 16)) return cpu_to_le16(NILFS_MAX_REC_LEN); else if (len > (1 << 16)) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index f396ccb900cc..b3fc0370c14f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -535,8 +535,7 @@ extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter); /* * Fault a userspace page into pagetables. Return non-zero on a fault. * - * This assumes that two userspace pages are always sufficient. That's - * not true if PAGE_CACHE_SIZE > PAGE_SIZE. + * This assumes that two userspace pages are always sufficient. */ static inline int fault_in_pages_writeable(char __user *uaddr, int size) { diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index cc0fc712bb82..7ca44fb5b675 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -129,7 +129,7 @@ static inline void svc_get(struct svc_serv *serv) * * These happen to all be powers of 2, which is not strictly * necessary but helps enforce the real limitation, which is - * that they should be multiples of PAGE_CACHE_SIZE. + * that they should be multiples of PAGE_SIZE. * * For UDP transports, a block plus NFS,RPC, and UDP headers * has to fit into the IP datagram limit of 64K. The largest diff --git a/include/linux/swap.h b/include/linux/swap.h index 3d980ea1c946..2b83359c19ca 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -433,7 +433,7 @@ struct backing_dev_info; #define si_swapinfo(val) \ do { (val)->freeswap = (val)->totalswap = 0; } while (0) /* only sparc can not include linux/pagemap.h in this file - * so leave page_cache_release and release_pages undeclared... */ + * so leave put_page and release_pages undeclared... */ #define free_page_and_swap_cache(page) \ put_page(page) #define free_pages_and_swap_cache(pages, nr) \ -- cgit v1.2.3 From 1fa64f198b9f8d6ec0f7aec7c18dc94684391140 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 1 Apr 2016 15:29:49 +0300 Subject: mm: drop PAGE_CACHE_* and page_cache_{get,release} definition All users gone. We can remove these macros. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b3fc0370c14f..7e1ab155c67c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -86,21 +86,6 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) (__force unsigned long)mask; } -/* - * The page cache can be done in larger chunks than - * one page, because it allows for more efficient - * throughput (it can then be mapped into user - * space in smaller chunks for same flexibility). - * - * Or rather, it _will_ be done in larger chunks. - */ -#define PAGE_CACHE_SHIFT PAGE_SHIFT -#define PAGE_CACHE_SIZE PAGE_SIZE -#define PAGE_CACHE_MASK PAGE_MASK -#define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK) - -#define page_cache_get(page) get_page(page) -#define page_cache_release(page) put_page(page) void release_pages(struct page **pages, int nr, bool cold); /* -- cgit v1.2.3 From b70bb984489363aadd5ccc94d919629d9e264d36 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 Mar 2016 22:37:17 +0100 Subject: iommu: provide of_xlate pointer unconditionally iommu drivers that support the standard DT bindings use a of_xlate callback pointer, but that is only part of struct iommu_ops when CONFIG_OF_IOMMU is enabled, leading to build errors in randconfig builds when that is not provided: drivers/iommu/mtk_iommu.c:497:2: error: unknown field 'of_xlate' specified in initializer .of_xlate = mtk_iommu_of_xlate, ^ drivers/iommu/mtk_iommu.c:497:14: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] .of_xlate = mtk_iommu_of_xlate, ^~~~~~~~~~~~~~~~~~ drivers/iommu/mtk_iommu.c:497:14: note: (near initialization for 'mtk_iommu_ops.domain_get_attr') We can work around it by adding more #ifdefs in each driver, but it seems nicer to just allow setting the pointer even if it is unused. This makes the driver code look nicer, and it gives better compile-time coverage when test building on other architectures. Signed-off-by: Arnd Bergmann Fixes: 0df4fabe208d ("iommu/mediatek: Add mt8173 IOMMU driver") Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a5c539fa5d2b..ef7a6ecd8584 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -195,9 +195,7 @@ struct iommu_ops { /* Get the number of windows per domain */ u32 (*domain_get_windows)(struct iommu_domain *domain); -#ifdef CONFIG_OF_IOMMU int (*of_xlate)(struct device *dev, struct of_phandle_args *args); -#endif unsigned long pgsize_bitmap; void *priv; -- cgit v1.2.3 From 95272c29378ee7dc15f43fa2758cb28a5913a06d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 31 Mar 2016 09:38:51 +0200 Subject: compiler-gcc: disable -ftracer for __noclone functions -ftracer can duplicate asm blocks causing compilation to fail in noclone functions. For example, KVM declares a global variable in an asm like asm("2: ... \n .pushsection data \n .global vmx_return \n vmx_return: .long 2b"); and -ftracer causes a double declaration. Cc: Andrew Morton Cc: Michal Marek Cc: stable@vger.kernel.org Cc: kvm@vger.kernel.org Reported-by: Linda Walsh Signed-off-by: Paolo Bonzini --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 22ab246feed3..eeae401a2412 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -199,7 +199,7 @@ #define unreachable() __builtin_unreachable() /* Mark a function definition as prohibited from being cloned. */ -#define __noclone __attribute__((__noclone__)) +#define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) #endif /* GCC_VERSION >= 40500 */ -- cgit v1.2.3 From c12d2da56d0e07d230968ee2305aaa86b93a6832 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Apr 2016 10:24:58 +0200 Subject: mm/gup: Remove the macro overload API migration helpers from the get_user*() APIs The pkeys changes brought about a truly hideous set of macros in: cde70140fed8 ("mm/gup: Overload get_user_pages() functions") ... which macros are (ab-)using the fact that __VA_ARGS__ can be used to shift parameter positions in macro arguments without breaking the build and so can be used to call separate C functions depending on the number of arguments of the macro. This allowed easy migration of these 3 GUP APIs, as both these variants worked at the C level: old: ret = get_user_pages(current, current->mm, address, 1, 1, 0, &page, NULL); new: ret = get_user_pages(address, 1, 1, 0, &page, NULL); ... while we also generated a (functionally harmless but noticeable) build time warning if the old API was used. As there are over 300 uses of these APIs, this trick eased the migration of the API and avoided excessive migration pain in linux-next. Now, with its work done, get rid of all of that complication and ugliness: 3 files changed, 16 insertions(+), 140 deletions(-) ... where the linecount of the migration hack was further inflated by the fact that there are NOMMU variants of these GUP APIs as well. Much of the conversion was done in linux-next over the past couple of months, and Linus recently removed all remaining old API uses from the upstream tree in the following upstrea commit: cb107161df3c ("Convert straggling drivers to new six-argument get_user_pages()") There was one more old-API usage in mm/gup.c, in the CONFIG_HAVE_GENERIC_RCU_GUP code path that ARM, ARM64 and PowerPC uses. After this commit any old API usage will break the build. [ Also fixed a PowerPC/HAVE_GENERIC_RCU_GUP warning reported by Stephen Rothwell. ] Cc: Andrew Morton Cc: Dave Hansen Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- include/linux/mm.h | 64 +++--------------------------------------------------- 1 file changed, 3 insertions(+), 61 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index ed6407d1b7b5..d6508a025a31 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1250,78 +1250,20 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages6(unsigned long start, unsigned long nr_pages, +long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, +long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, unsigned int gup_flags); -long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages, +long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); -/* suppress warnings from use in EXPORT_SYMBOL() */ -#ifndef __DISABLE_GUP_DEPRECATED -#define __gup_deprecated __deprecated -#else -#define __gup_deprecated -#endif -/* - * These macros provide backward-compatibility with the old - * get_user_pages() variants which took tsk/mm. These - * functions/macros provide both compile-time __deprecated so we - * can catch old-style use and not break the build. The actual - * functions also have WARN_ON()s to let us know at runtime if - * the get_user_pages() should have been the "remote" variant. - * - * These are hideous, but temporary. - * - * If you run into one of these __deprecated warnings, look - * at how you are calling get_user_pages(). If you are calling - * it with current/current->mm as the first two arguments, - * simply remove those arguments. The behavior will be the same - * as it is now. If you are calling it on another task, use - * get_user_pages_remote() instead. - * - * Any questions? Ask Dave Hansen - */ -long -__gup_deprecated -get_user_pages8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - struct vm_area_struct **vmas); -#define GUP_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages, ...) \ - get_user_pages -#define get_user_pages(...) GUP_MACRO(__VA_ARGS__, \ - get_user_pages8, x, \ - get_user_pages6, x, x, x, x, x)(__VA_ARGS__) - -__gup_deprecated -long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - int *locked); -#define GUPL_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages_locked, ...) \ - get_user_pages_locked -#define get_user_pages_locked(...) GUPL_MACRO(__VA_ARGS__, \ - get_user_pages_locked8, x, \ - get_user_pages_locked6, x, x, x, x)(__VA_ARGS__) - -__gup_deprecated -long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages); -#define GUPU_MACRO(_1, _2, _3, _4, _5, _6, _7, get_user_pages_unlocked, ...) \ - get_user_pages_unlocked -#define get_user_pages_unlocked(...) GUPU_MACRO(__VA_ARGS__, \ - get_user_pages_unlocked7, x, \ - get_user_pages_unlocked5, x, x, x, x)(__VA_ARGS__) - /* Container for pinned pfns / pages */ struct frame_vector { unsigned int nr_allocated; /* Number of frames we have space for */ -- cgit v1.2.3 From a0ca153f98db8cf25298565a09e11fe9d82846ad Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 5 Apr 2016 09:13:39 -0700 Subject: GRE: Disable segmentation offloads w/ CSUM and we are encapsulated via FOU This patch fixes an issue I found in which we were dropping frames if we had enabled checksums on GRE headers that were encapsulated by either FOU or GUE. Without this patch I was barely able to get 1 Gb/s of throughput. With this patch applied I am now at least getting around 6 Gb/s. The issue is due to the fact that with FOU or GUE applied we do not provide a transport offset pointing to the GRE header, nor do we offload it in software as the GRE header is completely skipped by GSO and treated like a VXLAN or GENEVE type header. As such we need to prevent the stack from generating it and also prevent GRE from generating it via any interface we create. Fixes: c3483384ee511 ("gro: Allow tunnel stacking in the case of FOU/GUE") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb0d5d09c2e4..8395308a2445 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2120,7 +2120,10 @@ struct napi_gro_cb { /* Used in foo-over-udp, set in udp[46]_gro_receive */ u8 is_ipv6:1; - /* 7 bit hole */ + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; + + /* 6 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; -- cgit v1.2.3 From b32e4482aadfd1322357f46d4ed8a990603664d9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 11 Apr 2016 15:51:57 -0700 Subject: fscrypto: don't let data integrity writebacks fail with ENOMEM This patch fixes the issue introduced by the ext4 crypto fix in a same manner. For F2FS, however, we flush the pending IOs and wait for a while to acquire free memory. Fixes: c9af28fdd4492 ("ext4 crypto: don't let data integrity writebacks fail with ENOMEM") Cc: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- include/linux/fscrypto.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index cd91f75de49b..6027f6bbb061 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -263,9 +263,9 @@ static inline void fscrypt_set_d_op(struct dentry *dentry) extern struct kmem_cache *fscrypt_info_cachep; int fscrypt_initialize(void); -extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *); +extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); -extern struct page *fscrypt_encrypt_page(struct inode *, struct page *); +extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t); extern int fscrypt_decrypt_page(struct page *); extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); extern void fscrypt_pullback_bio_page(struct page **, bool); @@ -299,7 +299,8 @@ extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, #endif /* crypto.c */ -static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i) +static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i, + gfp_t f) { return ERR_PTR(-EOPNOTSUPP); } @@ -310,7 +311,7 @@ static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c) } static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i, - struct page *p) + struct page *p, gfp_t f) { return ERR_PTR(-EOPNOTSUPP); } -- cgit v1.2.3 From 1363074667a6b7d0507527742ccd7bbed5e3ceaa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Apr 2016 12:27:09 +0200 Subject: USB: uas: Add a new NO_REPORT_LUNS quirk Add a new NO_REPORT_LUNS quirk and set it for Seagate drives with an usb-id of: 0bc2:331a, as these will fail to respond to a REPORT_LUNS command. Cc: stable@vger.kernel.org Reported-and-tested-by: David Webb Signed-off-by: Hans de Goede Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 7f5f78bd15ad..245f57dbbb61 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -79,6 +79,8 @@ /* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */ \ US_FLAG(MAX_SECTORS_240, 0x08000000) \ /* Sets max_sectors to 240 */ \ + US_FLAG(NO_REPORT_LUNS, 0x10000000) \ + /* Cannot handle REPORT_LUNS */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From cba2e47abcbd80e3f46f460899290402f98090ec Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 12 Apr 2016 18:10:52 -0600 Subject: pmem: fix BUG() error in pmem.h:48 on X86_32 After 'commit fc0c2028135c ("x86, pmem: use memcpy_mcsafe() for memcpy_from_pmem()")', probing a PMEM device hits the BUG() error below on X86_32 kernel. kernel BUG at include/linux/pmem.h:48! memcpy_from_pmem() calls arch_memcpy_from_pmem(), which is unimplemented since CONFIG_ARCH_HAS_PMEM_API is undefined on X86_32. Fix the BUG() error by adding default_memcpy_from_pmem(). Acked-by: Dan Williams Signed-off-by: Toshi Kani Signed-off-by: Ross Zwisler Cc: Dan Williams Cc: Ross Zwisler --- include/linux/pmem.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index ac6d872ce067..57d146fe44dd 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -72,6 +72,18 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) } #endif +static inline bool arch_has_pmem_api(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); +} + +static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src, + size_t size) +{ + memcpy(dst, (void __force *) src, size); + return 0; +} + /* * memcpy_from_pmem - read from persistent memory with error handling * @dst: destination buffer @@ -83,12 +95,10 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) static inline int memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) { - return arch_memcpy_from_pmem(dst, src, size); -} - -static inline bool arch_has_pmem_api(void) -{ - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); + if (arch_has_pmem_api()) + return arch_memcpy_from_pmem(dst, src, size); + else + return default_memcpy_from_pmem(dst, src, size); } /** -- cgit v1.2.3 From 34dbbcdbf63360661ff7bda6c5f52f99ac515f92 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 Apr 2016 11:22:00 -0700 Subject: Make file credentials available to the seqfile interfaces A lot of seqfile users seem to be using things like %pK that uses the credentials of the current process, but that is actually completely wrong for filesystem interfaces. The unix semantics for permission checking files is to check permissions at _open_ time, not at read or write time, and that is not just a small detail: passing off stdin/stdout/stderr to a suid application and making the actual IO happen in privileged context is a classic exploit technique. So if we want to be able to look at permissions at read time, we need to use the file open credentials, not the current ones. Normal file accesses can just use "f_cred" (or any of the helper functions that do that, like file_ns_capable()), but the seqfile interfaces do not have any such options. It turns out that seq_file _does_ save away the user_ns information of the file, though. Since user_ns is just part of the full credential information, replace that special case with saving off the cred pointer instead, and suddenly seq_file has all the permission information it needs. Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index dde00defbaa5..f3d45dd42695 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -7,13 +7,10 @@ #include #include #include +#include +#include struct seq_operations; -struct file; -struct path; -struct inode; -struct dentry; -struct user_namespace; struct seq_file { char *buf; @@ -27,9 +24,7 @@ struct seq_file { struct mutex lock; const struct seq_operations *op; int poll_event; -#ifdef CONFIG_USER_NS - struct user_namespace *user_ns; -#endif + const struct file *file; void *private; }; @@ -147,7 +142,7 @@ int seq_release_private(struct inode *, struct file *); static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { #ifdef CONFIG_USER_NS - return seq->user_ns; + return seq->file->f_cred->user_ns; #else extern struct user_namespace init_user_ns; return &init_user_ns; -- cgit v1.2.3 From d894ba18d4e449b3a7f6eb491f16c9e02933736e Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 12 Apr 2016 13:11:25 -0400 Subject: soreuseport: fix ordering for mixed v4/v6 sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the SO_REUSEPORT socket option, it is possible to create sockets in the AF_INET and AF_INET6 domains which are bound to the same IPv4 address. This is only possible with SO_REUSEPORT and when not using IPV6_V6ONLY on the AF_INET6 sockets. Prior to the commits referenced below, an incoming IPv4 packet would always be routed to a socket of type AF_INET when this mixed-mode was used. After those changes, the same packet would be routed to the most recently bound socket (if this happened to be an AF_INET6 socket, it would have an IPv4 mapped IPv6 address). The change in behavior occurred because the recent SO_REUSEPORT optimizations short-circuit the socket scoring logic as soon as they find a match. They did not take into account the scoring logic that favors AF_INET sockets over AF_INET6 sockets in the event of a tie. To fix this problem, this patch changes the insertion order of AF_INET and AF_INET6 addresses in the TCP and UDP socket lists when the sockets have SO_REUSEPORT set. AF_INET sockets will be inserted at the head of the list and AF_INET6 sockets with SO_REUSEPORT set will always be inserted at the tail of the list. This will force AF_INET sockets to always be considered first. Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") Fixes: 125e80b88687 ("soreuseport: fast reuseport TCP socket selection") Reported-by: Maciej Żenczykowski Signed-off-by: Craig Gallek Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist_nulls.h | 39 +++++++++++++++++++++++++++++++++++++++ 1 file changed, 39 insertions(+) (limited to 'include/linux') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 1c33dd7da4a7..4ae95f7e8597 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -98,6 +98,45 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, if (!is_a_nulls(first)) first->pprev = &n->next; } + +/** + * hlist_nulls_add_tail_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the end of the specified hlist_nulls, + * while permitting racing traversals. NOTE: tail insertion requires + * list traversal. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *i, *last = NULL; + + for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i); + i = hlist_nulls_next_rcu(i)) + last = i; + + if (last) { + n->next = last->next; + n->pprev = &last->next; + rcu_assign_pointer(hlist_nulls_next_rcu(last), n); + } else { + hlist_nulls_add_head_rcu(n, h); + } +} + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. -- cgit v1.2.3 From cb92148b58a49455f3a7204eba3aee09a8b7683c Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 15 Apr 2016 13:00:11 -0500 Subject: PCI: Add pci_set_vpd_size() to set VPD size After 104daa71b396 ("PCI: Determine actual VPD size on first access"), the PCI core computes the valid VPD size by parsing the VPD starting at offset 0x0. We don't attempt to read past that valid size because that causes some devices to crash. However, some devices do have data past that valid size. For example, Chelsio adapters contain two VPD structures, and the driver needs both of them. Add pci_set_vpd_size(). If a driver knows it is safe to read past the end of the VPD data structure at offset 0, it can use pci_set_vpd_size() to allow access to as much data as it needs. [bhelgaas: changelog, split patches, rename to pci_set_vpd_size() and return int (not ssize_t)] Fixes: 104daa71b396 ("PCI: Determine actual VPD size on first access") Tested-by: Steve Wise Signed-off-by: Casey Leedom Signed-off-by: Hariprasad Shenai Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/pci.h b/include/linux/pci.h index 004b8133417d..932ec74909c6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1111,6 +1111,7 @@ void pci_unlock_rescan_remove(void); /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); +int pci_set_vpd_size(struct pci_dev *dev, size_t len); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx); -- cgit v1.2.3 From 67245ff332064c01b760afa7a384ccda024bfd24 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Apr 2016 15:16:07 -0700 Subject: devpts: clean up interface to pty drivers This gets rid of the horrible notion of having that struct inode *ptmx_inode be the linchpin of the interface between the pty code and devpts. By de-emphasizing the ptmx inode, a lot of things actually get cleaner, and we will have a much saner way forward. In particular, this will allow us to associate with any particular devpts instance at open-time, and not be artificially tied to one particular ptmx inode. The patch itself is actually fairly straightforward, and apart from some locking and return path cleanups it's pretty mechanical: - the interfaces that devpts exposes all take "struct pts_fs_info *" instead of "struct inode *ptmx_inode" now. NOTE! The "struct pts_fs_info" thing is a completely opaque structure as far as the pty driver is concerned: it's still declared entirely internally to devpts. So the pty code can't actually access it in any way, just pass it as a "cookie" to the devpts code. - the "look up the pts fs info" is now a single clear operation, that also does the reference count increment on the pts superblock. So "devpts_add/del_ref()" is gone, and replaced by a "lookup and get ref" operation (devpts_get_ref(inode)), along with a "put ref" op (devpts_put_ref()). - the pty master "tty->driver_data" field now contains the pts_fs_info, not the ptmx inode. - because we don't care about the ptmx inode any more as some kind of base index, the ref counting can now drop the inode games - it just gets the ref on the superblock. - the pts_fs_info now has a back-pointer to the super_block. That's so that we can easily look up the information we actually need. Although quite often, the pts fs info was actually all we wanted, and not having to look it up based on some magical inode makes things more straightforward. In particular, now that "devpts_get_ref(inode)" operation should really be the *only* place we need to look up what devpts instance we're associated with, and we do it exactly once, at ptmx_open() time. The other side of this is that one ptmx node could now be associated with multiple different devpts instances - you could have a single /dev/ptmx node, and then have multiple mount namespaces with their own instances of devpts mounted on /dev/pts/. And that's all perfectly sane in a model where we just look up the pts instance at open time. This will eventually allow us to get rid of our odd single-vs-multiple pts instance model, but this patch in itself changes no semantics, only an internal binding model. Cc: Eric Biederman Cc: Peter Anvin Cc: Andy Lutomirski Cc: Al Viro Cc: Peter Hurley Cc: Serge Hallyn Cc: Willy Tarreau Cc: Aurelien Jarno Cc: Alan Cox Cc: Jann Horn Cc: Greg KH Cc: Jiri Slaby Cc: Florian Weimer Signed-off-by: Linus Torvalds --- include/linux/devpts_fs.h | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index e0ee0b3000b2..358a4db72a27 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -15,38 +15,24 @@ #include +struct pts_fs_info; + #ifdef CONFIG_UNIX98_PTYS -int devpts_new_index(struct inode *ptmx_inode); -void devpts_kill_index(struct inode *ptmx_inode, int idx); -void devpts_add_ref(struct inode *ptmx_inode); -void devpts_del_ref(struct inode *ptmx_inode); +/* Look up a pts fs info and get a ref to it */ +struct pts_fs_info *devpts_get_ref(struct inode *, struct file *); +void devpts_put_ref(struct pts_fs_info *); + +int devpts_new_index(struct pts_fs_info *); +void devpts_kill_index(struct pts_fs_info *, int); + /* mknod in devpts */ -struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, - void *priv); +struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *); /* get private structure */ void *devpts_get_priv(struct inode *pts_inode); /* unlink */ void devpts_pty_kill(struct inode *inode); -#else - -/* Dummy stubs in the no-pty case */ -static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; } -static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { } -static inline void devpts_add_ref(struct inode *ptmx_inode) { } -static inline void devpts_del_ref(struct inode *ptmx_inode) { } -static inline struct inode *devpts_pty_new(struct inode *ptmx_inode, - dev_t device, int index, void *priv) -{ - return ERR_PTR(-EINVAL); -} -static inline void *devpts_get_priv(struct inode *pts_inode) -{ - return NULL; -} -static inline void devpts_pty_kill(struct inode *inode) { } - #endif -- cgit v1.2.3 From 1d0fd42fa31d18ba0a3e0dd008c9e93e1cebe451 Mon Sep 17 00:00:00 2001 From: Wei Ni Date: Thu, 3 Mar 2016 17:33:46 +0800 Subject: thermal: consistently use int for trip temp The commit 17e8351a7739 consistently use int for temperature, however it missed a few in trip temperature and thermal_core. In current codes, the trip->temperature used "unsigned long" and zone->temperature used"int", if the temperature is negative value, it will get wrong result when compare temperature with trip temperature. This patch can fix it. Signed-off-by: Wei Ni Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index a55d0523f75d..1b8a5a7876ce 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -352,8 +352,8 @@ struct thermal_zone_of_device_ops { struct thermal_trip { struct device_node *np; - unsigned long int temperature; - unsigned long int hysteresis; + int temperature; + int hysteresis; enum thermal_trip_type type; }; -- cgit v1.2.3 From 4bfd2e6e53435a214888fd35e230157a38ffc6a0 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 20 Apr 2016 16:01:16 +0300 Subject: net/mlx4_core: Avoid repeated calls to pci enable/disable Maintain the PCI status and provide wrappers for enabling and disabling the PCI device. Performing the actions more than once without doing its opposite results in warning logs. This occurred when EEH hotplugged the device causing a warning for disabling an already disabled device. Fixes: 2ba5fbd62b25 ('net/mlx4_core: Handle AER flow properly') Signed-off-by: Daniel Jurgens Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8541a913f6a3..d1f904c8b2cb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -828,6 +828,11 @@ struct mlx4_vf_dev { u8 n_ports; }; +enum mlx4_pci_status { + MLX4_PCI_STATUS_DISABLED, + MLX4_PCI_STATUS_ENABLED, +}; + struct mlx4_dev_persistent { struct pci_dev *pdev; struct mlx4_dev *dev; @@ -841,6 +846,8 @@ struct mlx4_dev_persistent { u8 state; struct mutex interface_state_mutex; /* protect SW state */ u8 interface_state; + struct mutex pci_status_mutex; /* sync pci state */ + enum mlx4_pci_status pci_status; }; struct mlx4_dev { -- cgit v1.2.3 From 75dd602a5198a6e5f75534db52b6e6fbaabb33d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Mar 2016 11:36:59 +0200 Subject: lockdep: Fix lock_chain::base size lock_chain::base is used to store an index into the chain_hlocks[] array, however that array contains more elements than can be indexed using the u16. Change the lock_chain structure to use a bitfield to encode the data it needs and add BUILD_BUG_ON() assertions to check the fields are wide enough. Also, for DEBUG_LOCKDEP, assert that we don't run out of elements of that array; as that would wreck the collision detectoring. Signed-off-by: Peter Zijlstra (Intel) Cc: Alfredo Alvarez Fernandez Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Sedat Dilek Cc: Theodore Ts'o Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160330093659.GS3408@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d026b190c530..d10ef06971b5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -196,9 +196,11 @@ struct lock_list { * We record lock dependency chains, so that we can cache them: */ struct lock_chain { - u8 irq_context; - u8 depth; - u16 base; + /* see BUILD_BUG_ON()s in lookup_chain_cache() */ + unsigned int irq_context : 2, + depth : 6, + base : 24; + /* 4 byte hole */ struct hlist_node entry; u64 chain_key; }; -- cgit v1.2.3 From 046339eaab26804f52f6604877f5674f70815b26 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 22 Apr 2016 00:33:03 +0300 Subject: net/mlx5e: Device's mtu field is u16 and not int For set/query MTU port firmware commands the MTU field is 16 bits, here I changed all the "int mtu" parameters of the functions wrapping those firmware commands to be u16. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/port.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index a1d145abd4eb..b30250ab7604 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -54,9 +54,9 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); -void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); -void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, u8 port); int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, -- cgit v1.2.3 From cd255efff9baadd654d6160e52d17ae7c568c9d3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 22 Apr 2016 00:33:05 +0300 Subject: net/mlx5e: Use vport MTU rather than physical port MTU Set and report vport MTU rather than physical MTU, Driver will set both vport and physical port mtu and will rely on the query of vport mtu. SRIOV VFs have to report their MTU to their vport manager (PF), and this will allow them to work with any MTU they need without failing the request. Also for some cases where the PF is not a port owner, PF can work with MTU less than the physical port mtu if set physical port mtu didn't take effect. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/vport.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index bd93e6323603..301da4a5e6bf 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, u16 vport, u8 *addr); +int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); +int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); -- cgit v1.2.3 From 5fc7197d3a256d9c5de3134870304b24892a4908 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Fri, 22 Apr 2016 00:33:07 +0300 Subject: net/mlx5: Add pci shutdown callback This patch introduces kexec support for mlx5. When switching kernels, kexec() calls shutdown, which unloads the driver and cleans its resources. In addition, remove unregister netdev from shutdown flow. This will allow a clean shutdown, even if some netdev clients did not release their reference from this netdev. Releasing The HW resources only is enough as the kernel is shutting down Signed-off-by: Majd Dibbiny Signed-off-by: Tariq Toukan Signed-off-by: Haggai Abramovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include/linux') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index dcd5ac8d3b14..369c837d40f5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -519,8 +519,9 @@ enum mlx5_device_state { }; enum mlx5_interface_state { - MLX5_INTERFACE_STATE_DOWN, - MLX5_INTERFACE_STATE_UP, + MLX5_INTERFACE_STATE_DOWN = BIT(0), + MLX5_INTERFACE_STATE_UP = BIT(1), + MLX5_INTERFACE_STATE_SHUTDOWN = BIT(2), }; enum mlx5_pci_status { @@ -544,7 +545,7 @@ struct mlx5_core_dev { enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; - enum mlx5_interface_state interface_state; + unsigned long intf_state; void (*event) (struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); -- cgit v1.2.3 From 6c1ea260f89709e0021d2c59f8fd2a104b5b1123 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 11 Apr 2016 19:34:49 +0200 Subject: libceph: make authorizer destruction independent of ceph_auth_client Starting the kernel client with cephx disabled and then enabling cephx and restarting userspace daemons can result in a crash: [262671.478162] BUG: unable to handle kernel paging request at ffffebe000000000 [262671.531460] IP: [] kfree+0x5a/0x130 [262671.584334] PGD 0 [262671.635847] Oops: 0000 [#1] SMP [262672.055841] CPU: 22 PID: 2961272 Comm: kworker/22:2 Not tainted 4.2.0-34-generic #39~14.04.1-Ubuntu [262672.162338] Hardware name: Dell Inc. PowerEdge R720/068CDY, BIOS 2.4.3 07/09/2014 [262672.268937] Workqueue: ceph-msgr con_work [libceph] [262672.322290] task: ffff88081c2d0dc0 ti: ffff880149ae8000 task.ti: ffff880149ae8000 [262672.428330] RIP: 0010:[] [] kfree+0x5a/0x130 [262672.535880] RSP: 0018:ffff880149aeba58 EFLAGS: 00010286 [262672.589486] RAX: 000001e000000000 RBX: 0000000000000012 RCX: ffff8807e7461018 [262672.695980] RDX: 000077ff80000000 RSI: ffff88081af2be04 RDI: 0000000000000012 [262672.803668] RBP: ffff880149aeba78 R08: 0000000000000000 R09: 0000000000000000 [262672.912299] R10: ffffebe000000000 R11: ffff880819a60e78 R12: ffff8800aec8df40 [262673.021769] R13: ffffffffc035f70f R14: ffff8807e5b138e0 R15: ffff880da9785840 [262673.131722] FS: 0000000000000000(0000) GS:ffff88081fac0000(0000) knlGS:0000000000000000 [262673.245377] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [262673.303281] CR2: ffffebe000000000 CR3: 0000000001c0d000 CR4: 00000000001406e0 [262673.417556] Stack: [262673.472943] ffff880149aeba88 ffff88081af2be04 ffff8800aec8df40 ffff88081af2be04 [262673.583767] ffff880149aeba98 ffffffffc035f70f ffff880149aebac8 ffff8800aec8df00 [262673.694546] ffff880149aebac8 ffffffffc035c89e ffff8807e5b138e0 ffff8805b047f800 [262673.805230] Call Trace: [262673.859116] [] ceph_x_destroy_authorizer+0x1f/0x50 [libceph] [262673.968705] [] ceph_auth_destroy_authorizer+0x3e/0x60 [libceph] [262674.078852] [] put_osd+0x45/0x80 [libceph] [262674.134249] [] remove_osd+0xae/0x140 [libceph] [262674.189124] [] __reset_osd+0x103/0x150 [libceph] [262674.243749] [] kick_requests+0x223/0x460 [libceph] [262674.297485] [] ceph_osdc_handle_map+0x282/0x5e0 [libceph] [262674.350813] [] dispatch+0x4e/0x720 [libceph] [262674.403312] [] try_read+0x3d1/0x1090 [libceph] [262674.454712] [] ? dequeue_entity+0x152/0x690 [262674.505096] [] con_work+0xcb/0x1300 [libceph] [262674.555104] [] process_one_work+0x14e/0x3d0 [262674.604072] [] worker_thread+0x11a/0x470 [262674.652187] [] ? rescuer_thread+0x310/0x310 [262674.699022] [] kthread+0xd2/0xf0 [262674.744494] [] ? kthread_create_on_node+0x1c0/0x1c0 [262674.789543] [] ret_from_fork+0x3f/0x70 [262674.834094] [] ? kthread_create_on_node+0x1c0/0x1c0 What happens is the following: (1) new MON session is established (2) old "none" ac is destroyed (3) new "cephx" ac is constructed ... (4) old OSD session (w/ "none" authorizer) is put ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer) osd->o_auth.authorizer in the "none" case is just a bare pointer into ac, which contains a single static copy for all services. By the time we get to (4), "none" ac, freed in (2), is long gone. On top of that, a new vtable installed in (3) points us at ceph_x_destroy_authorizer(), so we end up trying to destroy a "none" authorizer with a "cephx" destructor operating on invalid memory! To fix this, decouple authorizer destruction from ac and do away with a single static "none" authorizer by making a copy for each OSD or MDS session. Authorizers themselves are independent of ac and so there is no reason for destroy_authorizer() to be an ac op. Make it an op on the authorizer itself by turning ceph_authorizer into a real struct. Fixes: http://tracker.ceph.com/issues/15447 Reported-by: Alan Zhang Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/auth.h | 10 +++++----- include/linux/ceph/osd_client.h | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 260d78b587c4..1563265d2097 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -12,9 +12,12 @@ */ struct ceph_auth_client; -struct ceph_authorizer; struct ceph_msg; +struct ceph_authorizer { + void (*destroy)(struct ceph_authorizer *); +}; + struct ceph_auth_handshake { struct ceph_authorizer *authorizer; void *authorizer_buf; @@ -62,8 +65,6 @@ struct ceph_auth_client_ops { struct ceph_auth_handshake *auth); int (*verify_authorizer_reply)(struct ceph_auth_client *ac, struct ceph_authorizer *a, size_t len); - void (*destroy_authorizer)(struct ceph_auth_client *ac, - struct ceph_authorizer *a); void (*invalidate_authorizer)(struct ceph_auth_client *ac, int peer_type); @@ -112,8 +113,7 @@ extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth); -extern void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac, - struct ceph_authorizer *a); +void ceph_auth_destroy_authorizer(struct ceph_authorizer *a); extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *a); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 4343df806710..cbf460927c42 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -16,7 +16,6 @@ struct ceph_msg; struct ceph_snap_context; struct ceph_osd_request; struct ceph_osd_client; -struct ceph_authorizer; /* * completion callback for async writepages -- cgit v1.2.3 From 841645b5f2dfceac69b78fcd0c9050868d41ea61 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 25 Apr 2016 15:33:55 -0400 Subject: ipv6: Revert optional address flusing on ifdown. This reverts the following three commits: 70af921db6f8835f4b11c65731116560adb00c14 799977d9aafbf0ca0b9c39b04cbfb16db71302c9 f1705ec197e705b79ea40fe7a2cc5acfa1d3bfac The feature was ill conceived, has terrible semantics, and has added nothing but regressions to the already fragile ipv6 stack. Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7edc14fb66b6..4b2267e1b7c3 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -62,7 +62,6 @@ struct ipv6_devconf { struct in6_addr secret; } stable_secret; __s32 use_oif_addrs_only; - __s32 keep_addr_on_down; void *sysctl; }; -- cgit v1.2.3 From 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Apr 2016 19:06:48 -0400 Subject: cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback Since e93ad19d0564 ("cpuset: make mm migration asynchronous"), cpuset kicks off asynchronous NUMA node migration if necessary during task migration and flushes it from cpuset_post_attach_flush() which is called at the end of __cgroup_procs_write(). This is to avoid performing migration with cgroup_threadgroup_rwsem write-locked which can lead to deadlock through dependency on kworker creation. memcg has a similar issue with charge moving, so let's convert it to an official callback rather than the current one-off cpuset specific function. This patch adds cgroup_subsys->post_attach callback and makes cpuset register cpuset_post_attach_flush() as its ->post_attach. The conversion is mostly one-to-one except that the new callback is called under cgroup_mutex. This is to guarantee that no other migration operations are started before ->post_attach callbacks are finished. cgroup_mutex is one of the outermost mutex in the system and has never been and shouldn't be a problem. We can add specialized synchronization around __cgroup_procs_write() but I don't think there's any noticeable benefit. Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Cc: # 4.4+ prerequisite for the next patch --- include/linux/cgroup-defs.h | 1 + include/linux/cpuset.h | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 3e39ae5bc799..5b17de62c962 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -444,6 +444,7 @@ struct cgroup_subsys { int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); + void (*post_attach)(void); int (*can_fork)(struct task_struct *task); void (*cancel_fork)(struct task_struct *task); void (*fork)(struct task_struct *task); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index fea160ee5803..85a868ccb493 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask) task_unlock(current); } -extern void cpuset_post_attach_flush(void); - #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } @@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq) return false; } -static inline void cpuset_post_attach_flush(void) -{ -} - #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ -- cgit v1.2.3 From 6a923934c33c750a595868af6bef5f1a1fa90054 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 26 Apr 2016 11:47:41 -0400 Subject: Revert "ipv6: Revert optional address flusing on ifdown." This reverts commit 841645b5f2dfceac69b78fcd0c9050868d41ea61. Ok, this puts the feature back. I've decided to apply David A.'s bug fix and run with that rather than make everyone wait another whole release for this feature. Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 4b2267e1b7c3..7edc14fb66b6 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -62,6 +62,7 @@ struct ipv6_devconf { struct in6_addr secret; } stable_secret; __s32 use_oif_addrs_only; + __s32 keep_addr_on_down; void *sysctl; }; -- cgit v1.2.3 From 8ead9dd54716d1e05e129959f702fcc1786f82b4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Apr 2016 20:04:08 -0700 Subject: devpts: more pty driver interface cleanups This is more prep-work for the upcoming pty changes. Still just code cleanup with no actual semantic changes. This removes a bunch pointless complexity by just having the slave pty side remember the dentry associated with the devpts slave rather than the inode. That allows us to remove all the "look up the dentry" code for when we want to remove it again. Together with moving the tty pointer from "inode->i_private" to "dentry->d_fsdata" and getting rid of pointless inode locking, this removes about 30 lines of code. Not only is the end result smaller, it's simpler and easier to understand. The old code, for example, depended on the d_find_alias() to not just find the dentry, but also to check that it is still hashed, which in turn validated the tty pointer in the inode. That is a _very_ roundabout way to say "invalidate the cached tty pointer when the dentry is removed". The new code just does dentry->d_fsdata = NULL; in devpts_pty_kill() instead, invalidating the tty pointer rather more directly and obviously. Don't do something complex and subtle when the obvious straightforward approach will do. The rest of the patch (ie apart from code deletion and the above tty pointer clearing) is just switching the calling convention to pass the dentry or file pointer around instead of the inode. Cc: Eric Biederman Cc: Peter Anvin Cc: Andy Lutomirski Cc: Al Viro Cc: Peter Hurley Cc: Serge Hallyn Cc: Willy Tarreau Cc: Aurelien Jarno Cc: Alan Cox Cc: Jann Horn Cc: Greg KH Cc: Jiri Slaby Cc: Florian Weimer Signed-off-by: Linus Torvalds --- include/linux/devpts_fs.h | 6 +++--- include/linux/tty_driver.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 358a4db72a27..5871f292b596 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -27,11 +27,11 @@ int devpts_new_index(struct pts_fs_info *); void devpts_kill_index(struct pts_fs_info *, int); /* mknod in devpts */ -struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *); +struct dentry *devpts_pty_new(struct pts_fs_info *, int, void *); /* get private structure */ -void *devpts_get_priv(struct inode *pts_inode); +void *devpts_get_priv(struct dentry *); /* unlink */ -void devpts_pty_kill(struct inode *inode); +void devpts_pty_kill(struct dentry *); #endif diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 161052477f77..b742b5e47cc2 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -7,7 +7,7 @@ * defined; unless noted otherwise, they are optional, and can be * filled in with a null pointer. * - * struct tty_struct * (*lookup)(struct tty_driver *self, int idx) + * struct tty_struct * (*lookup)(struct tty_driver *self, struct file *, int idx) * * Return the tty device corresponding to idx, NULL if there is not * one currently in use and an ERR_PTR value on error. Called under @@ -250,7 +250,7 @@ struct serial_icounter_struct; struct tty_operations { struct tty_struct * (*lookup)(struct tty_driver *driver, - struct inode *inode, int idx); + struct file *filp, int idx); int (*install)(struct tty_driver *driver, struct tty_struct *tty); void (*remove)(struct tty_driver *driver, struct tty_struct *tty); int (*open)(struct tty_struct * tty, struct file * filp); -- cgit v1.2.3 From 986ef95ecdd3eb6fa29433e68faa94c7624083be Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 31 Mar 2016 19:03:25 +0300 Subject: IB/mlx5: Expose correct max_sge_rd limit mlx5 devices (Connect-IB, ConnectX-4, ConnectX-4-LX) has a limitation where rdma read work queue entries cannot exceed 512 bytes. A rdma_read wqe needs to fit in 512 bytes: - wqe control segment (16 bytes) - rdma segment (16 bytes) - scatter elements (16 bytes each) So max_sge_rd should be: (512 - 16 - 16) / 16 = 30. Cc: linux-stable@vger.kernel.org Reported-by: Christoph Hellwig Tested-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8156e3c9239c..b3575f392492 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -392,6 +392,17 @@ enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; +enum { + /* + * Max wqe size for rdma read is 512 bytes, so this + * limits our max_sge_rd as the wqe needs to fit: + * - ctrl segment (16 bytes) + * - rdma segment (16 bytes) + * - scatter elements (16 bytes each) + */ + MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 +}; + struct mlx5_inbox_hdr { __be16 opcode; u8 rsvd[4]; -- cgit v1.2.3 From 66ee95d16a7f1b7b4f1dd74a2d81c6e19dc29a14 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 28 Apr 2016 16:18:24 -0700 Subject: mm: exclude HugeTLB pages from THP page_mapped() logic HugeTLB pages cannot be split, so we use the compound_mapcount to track rmaps. Currently page_mapped() will check the compound_mapcount, but will also go through the constituent pages of a THP compound page and query the individual _mapcount's too. Unfortunately, page_mapped() does not distinguish between HugeTLB and THP compound pages and assumes that a compound page always needs to have HPAGE_PMD_NR pages querying. For most cases when dealing with HugeTLB this is just inefficient, but for scenarios where the HugeTLB page size is less than the pmd block size (e.g. when using contiguous bit on ARM) this can lead to crashes. This patch adjusts the page_mapped function such that we skip the unnecessary THP reference checks for HugeTLB pages. Fixes: e1534ae95004 ("mm: differentiate page_mapped() from page_mapcount() for compound pages") Signed-off-by: Steve Capper Acked-by: Kirill A. Shutemov Cc: Will Deacon Cc: Catalin Marinas Cc: Michal Hocko Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index a55e5be0894f..79b6c18d0a38 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1031,6 +1031,8 @@ static inline bool page_mapped(struct page *page) page = compound_head(page); if (atomic_read(compound_mapcount_ptr(page)) >= 0) return true; + if (PageHuge(page)) + return false; for (i = 0; i < hpage_nr_pages(page); i++) { if (atomic_read(&page[i]._mapcount) >= 0) return true; -- cgit v1.2.3 From aa88b68c3b1dce8bc3fd54c8a7372a777ff265cd Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 28 Apr 2016 16:18:27 -0700 Subject: thp: keep huge zero page pinned until tlb flush Andrea has found[1] a race condition on MMU-gather based TLB flush vs split_huge_page() or shrinker which frees huge zero under us (patch 1/2 and 2/2 respectively). With new THP refcounting, we don't need patch 1/2: mmu_gather keeps the page pinned until flush is complete and the pin prevents the page from being split under us. We still need patch 2/2. This is simplified version of Andrea's patch. We don't need fancy encoding. [1] http://lkml.kernel.org/r/1447938052-22165-1-git-send-email-aarcange@redhat.com Signed-off-by: Kirill A. Shutemov Reported-by: Andrea Arcangeli Reviewed-by: Andrea Arcangeli Cc: "Aneesh Kumar K.V" Cc: Mel Gorman Cc: Hugh Dickins Cc: Johannes Weiner Cc: Dave Hansen Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include/linux') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7008623e24b1..d7b9e5346fba 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -152,6 +152,7 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) } struct page *get_huge_zero_page(void); +void put_huge_zero_page(void); #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) @@ -208,6 +209,10 @@ static inline bool is_huge_zero_page(struct page *page) return false; } +static inline void put_huge_zero_page(void) +{ + BUILD_BUG(); +} static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags) -- cgit v1.2.3 From 28093f9f34cedeaea0f481c58446d9dac6dd620f Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 28 Apr 2016 16:18:35 -0700 Subject: numa: fix /proc//numa_maps for THP In gather_pte_stats() a THP pmd is cast into a pte, which is wrong because the layouts may differ depending on the architecture. On s390 this will lead to inaccurate numa_maps accounting in /proc because of misguided pte_present() and pte_dirty() checks on the fake pte. On other architectures pte_present() and pte_dirty() may work by chance, but there may be an issue with direct-access (dax) mappings w/o underlying struct pages when HAVE_PTE_SPECIAL is set and THP is available. In vm_normal_page() the fake pte will be checked with pte_special() and because there is no "special" bit in a pmd, this will always return false and the VM_PFNMAP | VM_MIXEDMAP checking will be skipped. On dax mappings w/o struct pages, an invalid struct page pointer would then be returned that can crash the kernel. This patch fixes the numa_maps THP handling by introducing new "_pmd" variants of the can_gather_numa_stats() and vm_normal_page() functions. Signed-off-by: Gerald Schaefer Cc: Naoya Horiguchi Cc: "Kirill A . Shutemov" Cc: Konstantin Khlebnikov Cc: Michal Hocko Cc: Vlastimil Babka Cc: Jerome Marchand Cc: Johannes Weiner Cc: Dave Hansen Cc: Mel Gorman Cc: Dan Williams Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Michael Holzheu Cc: [4.3+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 79b6c18d0a38..864d7221de84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1140,6 +1140,8 @@ struct zap_details { struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t pmd); int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); -- cgit v1.2.3