From 4cef191d05871fbc8bb3b812880e1997e855a3b9 Mon Sep 17 00:00:00 2001 From: Jaedon Shin Date: Fri, 25 Mar 2016 12:46:54 +0900 Subject: net: phy: bcm7xxx: Add entries for Broadcom BCM7346 and BCM7362 Add PHY entries for the Broadcom BCM7346 and BCM7362 chips, these are 40nm generation Ethernet PHY. Fixes: 815717d1473e ("net: phy: bcm7xxx: Remove wildcard entries") Signed-off-by: Jaedon Shin Signed-off-by: David S. Miller --- include/linux/brcmphy.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/brcmphy.h b/include/linux/brcmphy.h index f0ba9c2ec639..e3354b74286c 100644 --- a/include/linux/brcmphy.h +++ b/include/linux/brcmphy.h @@ -24,6 +24,8 @@ #define PHY_ID_BCM7250 0xae025280 #define PHY_ID_BCM7364 0xae025260 #define PHY_ID_BCM7366 0x600d8490 +#define PHY_ID_BCM7346 0x600d8650 +#define PHY_ID_BCM7362 0x600d84b0 #define PHY_ID_BCM7425 0x600d86b0 #define PHY_ID_BCM7429 0x600d8730 #define PHY_ID_BCM7435 0x600d8750 -- cgit v1.2.3 From 96c22a3293512ba684e73a981196430f524689da Mon Sep 17 00:00:00 2001 From: Octavian Purdila Date: Wed, 23 Mar 2016 14:14:48 +0200 Subject: configfs: fix CONFIGFS_BIN_ATTR_[RW]O definitions The type should be struct configfs_bin_attribute and not struct configfs_attribute. Signed-off-by: Octavian Purdila Signed-off-by: Christoph Hellwig --- include/linux/configfs.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/configfs.h b/include/linux/configfs.h index 485fe5519448..d9d6a9d77489 100644 --- a/include/linux/configfs.h +++ b/include/linux/configfs.h @@ -188,7 +188,7 @@ static struct configfs_bin_attribute _pfx##attr_##_name = { \ } #define CONFIGFS_BIN_ATTR_RO(_pfx, _name, _priv, _maxsz) \ -static struct configfs_attribute _pfx##attr_##_name = { \ +static struct configfs_bin_attribute _pfx##attr_##_name = { \ .cb_attr = { \ .ca_name = __stringify(_name), \ .ca_mode = S_IRUGO, \ @@ -200,7 +200,7 @@ static struct configfs_attribute _pfx##attr_##_name = { \ } #define CONFIGFS_BIN_ATTR_WO(_pfx, _name, _priv, _maxsz) \ -static struct configfs_attribute _pfx##attr_##_name = { \ +static struct configfs_bin_attribute _pfx##attr_##_name = { \ .cb_attr = { \ .ca_name = __stringify(_name), \ .ca_mode = S_IWUSR, \ -- cgit v1.2.3 From d101a125954eae1d397adda94ca6319485a50493 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Sat, 26 Mar 2016 16:14:37 -0400 Subject: fs: add file_dentry() This series fixes bugs in nfs and ext4 due to 4bacc9c9234c ("overlayfs: Make f_path always point to the overlay and f_inode to the underlay"). Regular files opened on overlayfs will result in the file being opened on the underlying filesystem, while f_path points to the overlayfs mount/dentry. This confuses filesystems which get the dentry from struct file and assume it's theirs. Add a new helper, file_dentry() [*], to get the filesystem's own dentry from the file. This checks file->f_path.dentry->d_flags against DCACHE_OP_REAL, and returns file->f_path.dentry if DCACHE_OP_REAL is not set (this is the common, non-overlayfs case). In the uncommon case it will call into overlayfs's ->d_real() to get the underlying dentry, matching file_inode(file). The reason we need to check against the inode is that if the file is copied up while being open, d_real() would return the upper dentry, while the open file comes from the lower dentry. [*] If possible, it's better simply to use file_inode() instead. Signed-off-by: Miklos Szeredi Signed-off-by: Theodore Ts'o Tested-by: Goldwyn Rodrigues Reviewed-by: Trond Myklebust Cc: # v4.2 Cc: David Howells Cc: Al Viro Cc: Daniel Axtens --- include/linux/dcache.h | 10 ++++++++++ include/linux/fs.h | 10 ++++++++++ 2 files changed, 20 insertions(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 7cb043d8f4e8..4bb4de8d95ea 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -161,6 +161,7 @@ struct dentry_operations { struct vfsmount *(*d_automount)(struct path *); int (*d_manage)(struct dentry *, bool); struct inode *(*d_select_inode)(struct dentry *, unsigned); + struct dentry *(*d_real)(struct dentry *, struct inode *); } ____cacheline_aligned; /* @@ -229,6 +230,7 @@ struct dentry_operations { #define DCACHE_OP_SELECT_INODE 0x02000000 /* Unioned entry: dcache op selects inode */ #define DCACHE_ENCRYPTED_WITH_KEY 0x04000000 /* dir is encrypted with a valid key */ +#define DCACHE_OP_REAL 0x08000000 extern seqlock_t rename_lock; @@ -555,4 +557,12 @@ static inline struct dentry *d_backing_dentry(struct dentry *upper) return upper; } +static inline struct dentry *d_real(struct dentry *dentry) +{ + if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) + return dentry->d_op->d_real(dentry, NULL); + else + return dentry; +} + #endif /* __LINUX_DCACHE_H */ diff --git a/include/linux/fs.h b/include/linux/fs.h index 35d99266ca9a..b2ed2311b021 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -1241,6 +1241,16 @@ static inline struct inode *file_inode(const struct file *f) return f->f_inode; } +static inline struct dentry *file_dentry(const struct file *file) +{ + struct dentry *dentry = file->f_path.dentry; + + if (unlikely(dentry->d_flags & DCACHE_OP_REAL)) + return dentry->d_op->d_real(dentry, file_inode(file)); + else + return dentry; +} + static inline int locks_lock_file_wait(struct file *filp, struct file_lock *fl) { return locks_lock_inode_wait(file_inode(filp), fl); -- cgit v1.2.3 From 596cf3fe5854fe2b1703b0466ed6bf9cfb83c91e Mon Sep 17 00:00:00 2001 From: Vishwanath Pai Date: Wed, 16 Mar 2016 21:49:00 +0100 Subject: netfilter: ipset: fix race condition in ipset save, swap and delete This fix adds a new reference counter (ref_netlink) for the struct ip_set. The other reference counter (ref) can be swapped out by ip_set_swap and we need a separate counter to keep track of references for netlink events like dump. Using the same ref counter for dump causes a race condition which can be demonstrated by the following script: ipset create hash_ip1 hash:ip family inet hashsize 1024 maxelem 500000 \ counters ipset create hash_ip2 hash:ip family inet hashsize 300000 maxelem 500000 \ counters ipset create hash_ip3 hash:ip family inet hashsize 1024 maxelem 500000 \ counters ipset save & ipset swap hash_ip3 hash_ip2 ipset destroy hash_ip3 /* will crash the machine */ Swap will exchange the values of ref so destroy will see ref = 0 instead of ref = 1. With this fix in place swap will not succeed because ipset save still has ref_netlink on the set (ip_set_swap doesn't swap ref_netlink). Both delete and swap will error out if ref_netlink != 0 on the set. Note: The changes to *_head functions is because previously we would increment ref whenever we called these functions, we don't do that anymore. Reviewed-by: Joshua Hunt Signed-off-by: Vishwanath Pai Signed-off-by: Jozsef Kadlecsik Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/ipset/ip_set.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/netfilter/ipset/ip_set.h b/include/linux/netfilter/ipset/ip_set.h index 0e1f433cc4b7..f48b8a664b0f 100644 --- a/include/linux/netfilter/ipset/ip_set.h +++ b/include/linux/netfilter/ipset/ip_set.h @@ -234,6 +234,10 @@ struct ip_set { spinlock_t lock; /* References to the set */ u32 ref; + /* References to the set for netlink events like dump, + * ref can be swapped out by ip_set_swap + */ + u32 ref_netlink; /* The core set type */ struct ip_set_type *type; /* The type variant doing the real job */ -- cgit v1.2.3 From fc0c2028135c7f75fce36b90e44efb8003a9173b Mon Sep 17 00:00:00 2001 From: Dan Williams Date: Tue, 8 Mar 2016 10:30:19 -0800 Subject: x86, pmem: use memcpy_mcsafe() for memcpy_from_pmem() Update the definition of memcpy_from_pmem() to return 0 or a negative error code. Implement x86/arch_memcpy_from_pmem() with memcpy_mcsafe(). Cc: Borislav Petkov Cc: Tony Luck Cc: Thomas Gleixner Cc: Andy Lutomirski Cc: Peter Zijlstra Cc: Andrew Morton Cc: Linus Torvalds Acked-by: Ingo Molnar Reviewed-by: Ross Zwisler Signed-off-by: Dan Williams --- include/linux/pmem.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index 3ec5309e29f3..ac6d872ce067 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -42,6 +42,13 @@ static inline void arch_memcpy_to_pmem(void __pmem *dst, const void *src, BUG(); } +static inline int arch_memcpy_from_pmem(void *dst, const void __pmem *src, + size_t n) +{ + BUG(); + return -EFAULT; +} + static inline size_t arch_copy_from_iter_pmem(void __pmem *addr, size_t bytes, struct iov_iter *i) { @@ -66,14 +73,17 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) #endif /* - * Architectures that define ARCH_HAS_PMEM_API must provide - * implementations for arch_memcpy_to_pmem(), arch_wmb_pmem(), - * arch_copy_from_iter_pmem(), arch_clear_pmem(), arch_wb_cache_pmem() - * and arch_has_wmb_pmem(). + * memcpy_from_pmem - read from persistent memory with error handling + * @dst: destination buffer + * @src: source buffer + * @size: transfer length + * + * Returns 0 on success negative error code on failure. */ -static inline void memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) +static inline int memcpy_from_pmem(void *dst, void __pmem const *src, + size_t size) { - memcpy(dst, (void __force const *) src, size); + return arch_memcpy_from_pmem(dst, src, size); } static inline bool arch_has_pmem_api(void) -- cgit v1.2.3 From 5acba71e18833b9d06686b3751598bfa263a3ac3 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Mar 2016 15:37:59 +0100 Subject: locking/atomic: Introduce atomic_fetch_or() This is deemed to replace the type generic fetch_or() which brings a lot of issues such as macro induced block variable aliasing and sloppy types. Not to mention fetch_or() doesn't refer to any namespace, adding even more confusion. So lets provide an atomic_t version. Current and next users of fetch_or() are thus encouraged to use atomic_t. Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1458830281-4255-2-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) (limited to 'include') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index df4f369254c0..3d64c0852164 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -558,6 +558,27 @@ static inline int atomic_dec_if_positive(atomic_t *v) } #endif +/** + * atomic_fetch_or - perform *p |= mask and return old value of *p + * @p: pointer to atomic_t + * @mask: mask to OR on the atomic_t + */ +#ifndef atomic_fetch_or +static inline int atomic_fetch_or(atomic_t *p, int mask) +{ + int old, val = atomic_read(p); + + for (;;) { + old = atomic_cmpxchg(p, val, val | mask); + if (old == val) + break; + val = old; + } + + return old; +} +#endif + /** * fetch_or - perform *ptr |= mask and return old value of *ptr * @ptr: pointer to value -- cgit v1.2.3 From f009a7a767e792d5ab0b46c08d46236ea5271dd9 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Mar 2016 15:38:00 +0100 Subject: timers/nohz: Convert tick dependency mask to atomic_t The tick dependency mask was intially unsigned long because this is the type on which clear_bit() operates on and fetch_or() accepts it. But now that we have atomic_fetch_or(), we can instead use atomic_andnot() to clear the bit. This consolidates the type of our tick dependency mask, reduce its size on structures and benefit from possible architecture optimizations on atomic_t operations. Suggested-by: Linus Torvalds Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1458830281-4255-3-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/sched.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index 60bba7e032dc..52c4847b05e2 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -720,7 +720,7 @@ struct signal_struct { struct task_cputime cputime_expires; #ifdef CONFIG_NO_HZ_FULL - unsigned long tick_dep_mask; + atomic_t tick_dep_mask; #endif struct list_head cpu_timers[3]; @@ -1549,7 +1549,7 @@ struct task_struct { #endif #ifdef CONFIG_NO_HZ_FULL - unsigned long tick_dep_mask; + atomic_t tick_dep_mask; #endif unsigned long nvcsw, nivcsw; /* context switch counts */ u64 start_time; /* monotonic time in nsec */ -- cgit v1.2.3 From 5529578a27288d11d4d15635c258c6dde0f0fb10 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 24 Mar 2016 15:38:01 +0100 Subject: locking/atomic, sched: Unexport fetch_or() This patch functionally reverts: 5fd7a09cfb8c ("atomic: Export fetch_or()") During the merge Linus observed that the generic version of fetch_or() was messy: " This makes the ugly "fetch_or()" macro that the scheduler used internally a new generic helper, and does a bad job at it. " e23604edac2a Merge branch 'timers-nohz-for-linus' of git://git.kernel.org/pub/scm/linux/kernel/git/tip/tip Now that we have introduced atomic_fetch_or(), fetch_or() is only used by the scheduler in order to deal with thread_info flags which type can vary across architectures. Lets confine fetch_or() back to the scheduler so that we encourage future users to use the more robust and well typed atomic_t version instead. While at it, fetch_or() gets robustified, pasting improvements from a previous patch by Ingo Molnar that avoids needless expression re-evaluations in the loop. Reported-by: Linus Torvalds Signed-off-by: Frederic Weisbecker Cc: Andrew Morton Cc: Peter Zijlstra Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/1458830281-4255-4-git-send-email-fweisbec@gmail.com Signed-off-by: Ingo Molnar --- include/linux/atomic.h | 21 --------------------- 1 file changed, 21 deletions(-) (limited to 'include') diff --git a/include/linux/atomic.h b/include/linux/atomic.h index 3d64c0852164..506c3531832e 100644 --- a/include/linux/atomic.h +++ b/include/linux/atomic.h @@ -579,27 +579,6 @@ static inline int atomic_fetch_or(atomic_t *p, int mask) } #endif -/** - * fetch_or - perform *ptr |= mask and return old value of *ptr - * @ptr: pointer to value - * @mask: mask to OR on the value - * - * cmpxchg based fetch_or, macro so it works for different integer types - */ -#ifndef fetch_or -#define fetch_or(ptr, mask) \ -({ typeof(*(ptr)) __old, __val = *(ptr); \ - for (;;) { \ - __old = cmpxchg((ptr), __val, __val | (mask)); \ - if (__old == __val) \ - break; \ - __val = __old; \ - } \ - __old; \ -}) -#endif - - #ifdef CONFIG_GENERIC_ATOMIC64 #include #endif -- cgit v1.2.3 From 743bc4b069517d3bae69349a1a23511eaaae5ede Mon Sep 17 00:00:00 2001 From: John Youn Date: Mon, 28 Mar 2016 16:12:21 -0700 Subject: usb: ch9: Fix SSP Device Cap wFunctionalitySupport type The wFunctionalitySupport field should be __le16. Signed-off-by: John Youn Signed-off-by: Felipe Balbi --- include/uapi/linux/usb/ch9.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/usb/ch9.h b/include/uapi/linux/usb/ch9.h index 06d6c6228a7a..d5ce71607972 100644 --- a/include/uapi/linux/usb/ch9.h +++ b/include/uapi/linux/usb/ch9.h @@ -899,7 +899,7 @@ struct usb_ssp_cap_descriptor { __le32 bmAttributes; #define USB_SSP_SUBLINK_SPEED_ATTRIBS (0x1f << 0) /* sublink speed entries */ #define USB_SSP_SUBLINK_SPEED_IDS (0xf << 5) /* speed ID entries */ - __u16 wFunctionalitySupport; + __le16 wFunctionalitySupport; #define USB_SSP_MIN_SUBLINK_SPEED_ATTRIBUTE_ID (0xf) #define USB_SSP_MIN_RX_LANE_COUNT (0xf << 8) #define USB_SSP_MIN_TX_LANE_COUNT (0xf << 12) -- cgit v1.2.3 From 283d757378371e8044d873e661b1dccee46c5887 Mon Sep 17 00:00:00 2001 From: Denys Vlasenko Date: Wed, 30 Mar 2016 00:14:57 +0200 Subject: uapi/linux/stddef.h: Provide __always_inline to userspace headers Josh Boyer reported that my recent change to uapi/linux/swab.h broke the Qemu build: bc27fb68aaad ("include/uapi/linux/byteorder, swab: force inlining of some byteswap operations") Unfortunately, UAPI headers don't include compiler.h so fixing it there is not enough, add an __always_inline definition to uapi/linux/stddef.h instead. Testcase: "make headers_install" and try to compile this: #include void main() {} Reported-by: Josh Boyer Signed-off-by: Denys Vlasenko Cc: Andrew Morton Cc: Arnd Bergmann Cc: David Rientjes Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: Thomas Graf Cc: linux-kernel@vger.kernel.org Link: http://lkml.kernel.org/r/1459289697-12875-1-git-send-email-dvlasenk@redhat.com Signed-off-by: Ingo Molnar --- include/uapi/linux/stddef.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/stddef.h b/include/uapi/linux/stddef.h index aa9f10428743..621fa8ac4425 100644 --- a/include/uapi/linux/stddef.h +++ b/include/uapi/linux/stddef.h @@ -1 +1,5 @@ #include + +#ifndef __always_inline +#define __always_inline inline +#endif -- cgit v1.2.3 From c0e760c9c66ebd8a5a1ede81868677f4df993dfb Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Wed, 30 Mar 2016 00:02:00 +0200 Subject: bpf: make padding in bpf_tunnel_key explicit Make the 2 byte padding in struct bpf_tunnel_key between tunnel_ttl and tunnel_label members explicit. No issue has been observed, and gcc/llvm does padding for the old struct already, where tunnel_label was not yet present, so the current code works, but since it's part of uapi, make sure we don't introduce holes in structs. Therefore, add tunnel_ext that we can use generically in future (f.e. to flag OAM messages for backends, etc). Also add the offset to the compat tests to be sure should some compilers not padd the tail of the old version of bpf_tunnel_key. Fixes: 4018ab1875e0 ("bpf: support flow label for bpf_skb_{set, get}_tunnel_key") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: David S. Miller --- include/uapi/linux/bpf.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index 924f537183fd..23917bb47bf3 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -375,6 +375,7 @@ struct bpf_tunnel_key { }; __u8 tunnel_tos; __u8 tunnel_ttl; + __u16 tunnel_ext; __u32 tunnel_label; }; -- cgit v1.2.3 From ce7043fd903eb9722a885b98b53fffe2cedfb047 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Mar 2016 13:03:33 +0200 Subject: target: remove ->fabric_cleanup_nodeacl Instead we can clean up the list of default ACLs in core code. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- include/target/target_core_fabric.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 685a51aa98cc..5a9dd1892b70 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -87,7 +87,6 @@ struct target_core_fabric_ops { struct config_group *, const char *); void (*fabric_drop_np)(struct se_tpg_np *); int (*fabric_init_nodeacl)(struct se_node_acl *, const char *); - void (*fabric_cleanup_nodeacl)(struct se_node_acl *); struct configfs_attribute **tfc_discovery_attrs; struct configfs_attribute **tfc_wwn_attrs; -- cgit v1.2.3 From 839559e10601ed4459c802cbfb69747bf1844078 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 29 Mar 2016 13:03:35 +0200 Subject: target: add a new add_wwn_groups fabrics method We need to have the WWN fully initialized before addig default groups to it, so add a new method to add these groups after the WWN has been initialized. Also remove the default groups in the core while we're at it. Signed-off-by: Christoph Hellwig Signed-off-by: Nicholas Bellinger --- include/target/target_core_fabric.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/target/target_core_fabric.h b/include/target/target_core_fabric.h index 5a9dd1892b70..8ff6d40a294f 100644 --- a/include/target/target_core_fabric.h +++ b/include/target/target_core_fabric.h @@ -76,6 +76,7 @@ struct target_core_fabric_ops { struct se_wwn *(*fabric_make_wwn)(struct target_fabric_configfs *, struct config_group *, const char *); void (*fabric_drop_wwn)(struct se_wwn *); + void (*add_wwn_groups)(struct se_wwn *); struct se_portal_group *(*fabric_make_tpg)(struct se_wwn *, struct config_group *, const char *); void (*fabric_drop_tpg)(struct se_portal_group *); -- cgit v1.2.3 From 90195c3651800f9a7c14956f90c2b4eb0bc8f1fb Mon Sep 17 00:00:00 2001 From: Philipp Zabel Date: Tue, 23 Feb 2016 10:22:50 +0100 Subject: gpu: ipu-cpmem: modify ipu_cpmem_set_yuv_planar_full for better control Let ipu_cpmem_set_yuv_planar_full take a DRM_FORMAT instead of a V4L2_PIXFMT and allow better control over U/V stride, U offset and V offset settings in the CPMEM. Signed-off-by: Philipp Zabel --- include/video/imx-ipu-v3.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index eeba75395f7d..aa921052e7a6 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -194,8 +194,9 @@ int ipu_cpmem_set_format_rgb(struct ipuv3_channel *ch, int ipu_cpmem_set_format_passthrough(struct ipuv3_channel *ch, int width); void ipu_cpmem_set_yuv_interleaved(struct ipuv3_channel *ch, u32 pixel_format); void ipu_cpmem_set_yuv_planar_full(struct ipuv3_channel *ch, - u32 pixel_format, int stride, - int u_offset, int v_offset); + unsigned int uv_stride, + unsigned int u_offset, + unsigned int v_offset); void ipu_cpmem_set_yuv_planar(struct ipuv3_channel *ch, u32 pixel_format, int stride, int height); int ipu_cpmem_set_fmt(struct ipuv3_channel *ch, u32 drm_fourcc); -- cgit v1.2.3 From 2bbe32f7398c9b38916983b5823e11d6aaa10be2 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 14 Mar 2016 16:10:09 +0800 Subject: gpu: ipu-v3: ipu-dmfc: Make function ipu_dmfc_init_channel() return void Since the function ipu_dmfc_init_channel() always returns zero, we may change the return type to void to simplify the code. Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel --- include/video/imx-ipu-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index aa921052e7a6..8555d3728b52 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -237,7 +237,7 @@ void ipu_dmfc_disable_channel(struct dmfc_channel *dmfc); int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc, unsigned long bandwidth_mbs, int burstsize); void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc); -int ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width); +void ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width); struct dmfc_channel *ipu_dmfc_get(struct ipu_soc *ipu, int ipuv3_channel); void ipu_dmfc_put(struct dmfc_channel *dmfc); -- cgit v1.2.3 From 27630c206bb00a252d21576d92f57bdcc3ab9455 Mon Sep 17 00:00:00 2001 From: Liu Ying Date: Mon, 14 Mar 2016 16:10:10 +0800 Subject: gpu: ipu-v3: ipu-dmfc: Rename ipu_dmfc_init_channel to ipu_dmfc_config_wait4eot The function name 'ipu_dmfc_config_wait4eot' matches the implementation of the function better than 'ipu_dmfc_init_channel', since it only touches the wait4eot bits. Signed-off-by: Liu Ying Signed-off-by: Philipp Zabel --- include/video/imx-ipu-v3.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/video/imx-ipu-v3.h b/include/video/imx-ipu-v3.h index 8555d3728b52..ad66589f2ae6 100644 --- a/include/video/imx-ipu-v3.h +++ b/include/video/imx-ipu-v3.h @@ -237,7 +237,7 @@ void ipu_dmfc_disable_channel(struct dmfc_channel *dmfc); int ipu_dmfc_alloc_bandwidth(struct dmfc_channel *dmfc, unsigned long bandwidth_mbs, int burstsize); void ipu_dmfc_free_bandwidth(struct dmfc_channel *dmfc); -void ipu_dmfc_init_channel(struct dmfc_channel *dmfc, int width); +void ipu_dmfc_config_wait4eot(struct dmfc_channel *dmfc, int width); struct dmfc_channel *ipu_dmfc_get(struct ipu_soc *ipu, int ipuv3_channel); void ipu_dmfc_put(struct dmfc_channel *dmfc); -- cgit v1.2.3 From 5a5abb1fa3b05dd6aa821525832644c1e7d2905f Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 31 Mar 2016 02:13:18 +0200 Subject: tun, bpf: fix suspicious RCU usage in tun_{attach, detach}_filter Sasha Levin reported a suspicious rcu_dereference_protected() warning found while fuzzing with trinity that is similar to this one: [ 52.765684] net/core/filter.c:2262 suspicious rcu_dereference_protected() usage! [ 52.765688] other info that might help us debug this: [ 52.765695] rcu_scheduler_active = 1, debug_locks = 1 [ 52.765701] 1 lock held by a.out/1525: [ 52.765704] #0: (rtnl_mutex){+.+.+.}, at: [] rtnl_lock+0x17/0x20 [ 52.765721] stack backtrace: [ 52.765728] CPU: 1 PID: 1525 Comm: a.out Not tainted 4.5.0+ #264 [...] [ 52.765768] Call Trace: [ 52.765775] [] dump_stack+0x85/0xc8 [ 52.765784] [] lockdep_rcu_suspicious+0xd5/0x110 [ 52.765792] [] sk_detach_filter+0x82/0x90 [ 52.765801] [] tun_detach_filter+0x35/0x90 [tun] [ 52.765810] [] __tun_chr_ioctl+0x354/0x1130 [tun] [ 52.765818] [] ? selinux_file_ioctl+0x130/0x210 [ 52.765827] [] tun_chr_ioctl+0x13/0x20 [tun] [ 52.765834] [] do_vfs_ioctl+0x96/0x690 [ 52.765843] [] ? security_file_ioctl+0x43/0x60 [ 52.765850] [] SyS_ioctl+0x79/0x90 [ 52.765858] [] do_syscall_64+0x62/0x140 [ 52.765866] [] entry_SYSCALL64_slow_path+0x25/0x25 Same can be triggered with PROVE_RCU (+ PROVE_RCU_REPEATEDLY) enabled from tun_attach_filter() when user space calls ioctl(tun_fd, TUN{ATTACH, DETACH}FILTER, ...) for adding/removing a BPF filter on tap devices. Since the fix in f91ff5b9ff52 ("net: sk_{detach|attach}_filter() rcu fixes") sk_attach_filter()/sk_detach_filter() now dereferences the filter with rcu_dereference_protected(), checking whether socket lock is held in control path. Since its introduction in 994051625981 ("tun: socket filter support"), tap filters are managed under RTNL lock from __tun_chr_ioctl(). Thus the sock_owned_by_user(sk) doesn't apply in this specific case and therefore triggers the false positive. Extend the BPF API with __sk_attach_filter()/__sk_detach_filter() pair that is used by tap filters and pass in lockdep_rtnl_is_held() for the rcu_dereference_protected() checks instead. Reported-by: Sasha Levin Signed-off-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/filter.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/filter.h b/include/linux/filter.h index 43aa1f8855c7..a51a5361695f 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -465,10 +465,14 @@ int bpf_prog_create_from_user(struct bpf_prog **pfp, struct sock_fprog *fprog, void bpf_prog_destroy(struct bpf_prog *fp); int sk_attach_filter(struct sock_fprog *fprog, struct sock *sk); +int __sk_attach_filter(struct sock_fprog *fprog, struct sock *sk, + bool locked); int sk_attach_bpf(u32 ufd, struct sock *sk); int sk_reuseport_attach_filter(struct sock_fprog *fprog, struct sock *sk); int sk_reuseport_attach_bpf(u32 ufd, struct sock *sk); int sk_detach_filter(struct sock *sk); +int __sk_detach_filter(struct sock *sk, bool locked); + int sk_get_filter(struct sock *sk, struct sock_filter __user *filter, unsigned int len); -- cgit v1.2.3 From d7e944c8ddc0983640a9a32868fb217485d12ca2 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 1 Apr 2016 09:07:15 +0200 Subject: Revert "stmmac: Fix 'eth0: No PHY found' regression" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts commit 88f8b1bb41c6208f81b6a480244533ded7b59493. due to problems on GeekBox and Banana Pi M1 board when connected to a real transceiver instead of a switch via fixed-link. Signed-off-by: Giuseppe Cavallaro Cc: Gabriel Fernandez Cc: Andreas Färber Cc: Frank Schäfer Cc: Dinh Nguyen Cc: David S. Miller Signed-off-by: David S. Miller --- include/linux/stmmac.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 4bcf5a61aada..6e53fa8942a4 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -114,7 +114,6 @@ struct plat_stmmacenet_data { int interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; - struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; int has_gmac; -- cgit v1.2.3 From a7657f128c279ae5796ab2ca7d04a7819f4259f0 Mon Sep 17 00:00:00 2001 From: Giuseppe CAVALLARO Date: Fri, 1 Apr 2016 09:07:16 +0200 Subject: stmmac: fix MDIO settings MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Initially the phy_bus_name was added to manipulate the driver name but it was recently just used to manage the fixed-link and then to take some decision at run-time. So the patch uses the is_pseudo_fixed_link and removes the phy_bus_name variable not necessary anymore. The driver can manage the mdio registration by using phy-handle, dwmac-mdio and own parameter e.g. snps,phy-addr. This patch takes care about all these possible configurations and fixes the mdio registration in case of there is a real transceiver or a switch (that needs to be managed by using fixed-link). Signed-off-by: Giuseppe Cavallaro Reviewed-by: Andreas Färber Tested-by: Frank Schäfer Cc: Gabriel Fernandez Cc: Dinh Nguyen Cc: David S. Miller Cc: Phil Reid Signed-off-by: David S. Miller --- include/linux/stmmac.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/stmmac.h b/include/linux/stmmac.h index 6e53fa8942a4..e6bc30a42a74 100644 --- a/include/linux/stmmac.h +++ b/include/linux/stmmac.h @@ -108,12 +108,12 @@ struct stmmac_axi { }; struct plat_stmmacenet_data { - char *phy_bus_name; int bus_id; int phy_addr; int interface; struct stmmac_mdio_bus_data *mdio_bus_data; struct device_node *phy_node; + struct device_node *mdio_node; struct stmmac_dma_cfg *dma_cfg; int clk_csr; int has_gmac; -- cgit v1.2.3 From 969e8d7e47f93ef693028667480558de8f70523f Mon Sep 17 00:00:00 2001 From: Chen Gang Date: Fri, 1 Apr 2016 14:31:17 -0700 Subject: include/linux/huge_mm.h: return NULL instead of false for pmd_trans_huge_lock() The return value of pmd_trans_huge_lock() is a pointer, not a boolean value, so use NULL instead of false as the return value. Signed-off-by: Chen Gang Acked-by: Kirill A. Shutemov Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 79b0ef6aaa14..7008623e24b1 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -127,7 +127,7 @@ static inline spinlock_t *pmd_trans_huge_lock(pmd_t *pmd, if (pmd_trans_huge(*pmd) || pmd_devmap(*pmd)) return __pmd_trans_huge_lock(pmd, vma); else - return false; + return NULL; } static inline int hpage_nr_pages(struct page *page) { -- cgit v1.2.3 From bbe3de2560280c0420d36a192e69f2772e893cf4 Mon Sep 17 00:00:00 2001 From: Lucas Stach Date: Fri, 1 Apr 2016 14:31:32 -0700 Subject: mm/page_isolation: fix tracepoint to mirror check function behavior Page isolation has not failed if the fin pfn extends beyond the end pfn and test_pages_isolated checks this correctly. Fix the tracepoint to report the same result as the actual check function. Signed-off-by: Lucas Stach Acked-by: Vlastimil Babka Cc: Joonsoo Kim Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/trace/events/page_isolation.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/page_isolation.h b/include/trace/events/page_isolation.h index 6fb644029c80..8738a78e6bf4 100644 --- a/include/trace/events/page_isolation.h +++ b/include/trace/events/page_isolation.h @@ -29,7 +29,7 @@ TRACE_EVENT(test_pages_isolated, TP_printk("start_pfn=0x%lx end_pfn=0x%lx fin_pfn=0x%lx ret=%s", __entry->start_pfn, __entry->end_pfn, __entry->fin_pfn, - __entry->end_pfn == __entry->fin_pfn ? "success" : "fail") + __entry->end_pfn <= __entry->fin_pfn ? "success" : "fail") ); #endif /* _TRACE_PAGE_ISOLATION_H */ -- cgit v1.2.3 From 0f5dcf8de974db5970d48d12a202997baa2846a1 Mon Sep 17 00:00:00 2001 From: Mark Fasheh Date: Tue, 29 Mar 2016 17:19:55 -0700 Subject: btrfs: Add qgroup tracing This patch adds tracepoints to the qgroup code on both the reporting side (insert_dirty_extents) and the accounting side. Taken together it allows us to see what qgroup operations have happened, and what their result was. Signed-off-by: Mark Fasheh Reviewed-by: David Sterba Signed-off-by: David Sterba --- include/trace/events/btrfs.h | 89 +++++++++++++++++++++++++++++++++++++++++++- 1 file changed, 88 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/btrfs.h b/include/trace/events/btrfs.h index d866f21efbbf..467a4d205ff6 100644 --- a/include/trace/events/btrfs.h +++ b/include/trace/events/btrfs.h @@ -23,7 +23,7 @@ struct map_lookup; struct extent_buffer; struct btrfs_work; struct __btrfs_workqueue; -struct btrfs_qgroup_operation; +struct btrfs_qgroup_extent_record; #define show_ref_type(type) \ __print_symbolic(type, \ @@ -1231,6 +1231,93 @@ DEFINE_EVENT(btrfs__qgroup_delayed_ref, btrfs_qgroup_free_delayed_ref, TP_ARGS(ref_root, reserved) ); + +DECLARE_EVENT_CLASS(btrfs_qgroup_extent, + TP_PROTO(struct btrfs_qgroup_extent_record *rec), + + TP_ARGS(rec), + + TP_STRUCT__entry( + __field( u64, bytenr ) + __field( u64, num_bytes ) + ), + + TP_fast_assign( + __entry->bytenr = rec->bytenr, + __entry->num_bytes = rec->num_bytes; + ), + + TP_printk("bytenr = %llu, num_bytes = %llu", + (unsigned long long)__entry->bytenr, + (unsigned long long)__entry->num_bytes) +); + +DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_account_extents, + + TP_PROTO(struct btrfs_qgroup_extent_record *rec), + + TP_ARGS(rec) +); + +DEFINE_EVENT(btrfs_qgroup_extent, btrfs_qgroup_insert_dirty_extent, + + TP_PROTO(struct btrfs_qgroup_extent_record *rec), + + TP_ARGS(rec) +); + +TRACE_EVENT(btrfs_qgroup_account_extent, + + TP_PROTO(u64 bytenr, u64 num_bytes, u64 nr_old_roots, u64 nr_new_roots), + + TP_ARGS(bytenr, num_bytes, nr_old_roots, nr_new_roots), + + TP_STRUCT__entry( + __field( u64, bytenr ) + __field( u64, num_bytes ) + __field( u64, nr_old_roots ) + __field( u64, nr_new_roots ) + ), + + TP_fast_assign( + __entry->bytenr = bytenr; + __entry->num_bytes = num_bytes; + __entry->nr_old_roots = nr_old_roots; + __entry->nr_new_roots = nr_new_roots; + ), + + TP_printk("bytenr = %llu, num_bytes = %llu, nr_old_roots = %llu, " + "nr_new_roots = %llu", + __entry->bytenr, + __entry->num_bytes, + __entry->nr_old_roots, + __entry->nr_new_roots) +); + +TRACE_EVENT(qgroup_update_counters, + + TP_PROTO(u64 qgid, u64 cur_old_count, u64 cur_new_count), + + TP_ARGS(qgid, cur_old_count, cur_new_count), + + TP_STRUCT__entry( + __field( u64, qgid ) + __field( u64, cur_old_count ) + __field( u64, cur_new_count ) + ), + + TP_fast_assign( + __entry->qgid = qgid; + __entry->cur_old_count = cur_old_count; + __entry->cur_new_count = cur_new_count; + ), + + TP_printk("qgid = %llu, cur_old_count = %llu, cur_new_count = %llu", + __entry->qgid, + __entry->cur_old_count, + __entry->cur_new_count) +); + #endif /* _TRACE_BTRFS_H */ /* This part must be outside protection */ -- cgit v1.2.3 From 09cbfeaf1a5a67bfb3201e0c83c810cecb2efa5a Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 1 Apr 2016 15:29:47 +0300 Subject: mm, fs: get rid of PAGE_CACHE_* and page_cache_{get,release} macros PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} macros were introduced *long* time ago with promise that one day it will be possible to implement page cache with bigger chunks than PAGE_SIZE. This promise never materialized. And unlikely will. We have many places where PAGE_CACHE_SIZE assumed to be equal to PAGE_SIZE. And it's constant source of confusion on whether PAGE_CACHE_* or PAGE_* constant should be used in a particular case, especially on the border between fs and mm. Global switching to PAGE_CACHE_SIZE != PAGE_SIZE would cause to much breakage to be doable. Let's stop pretending that pages in page cache are special. They are not. The changes are pretty straight-forward: - << (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> ; - >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) -> ; - PAGE_CACHE_{SIZE,SHIFT,MASK,ALIGN} -> PAGE_{SIZE,SHIFT,MASK,ALIGN}; - page_cache_get() -> get_page(); - page_cache_release() -> put_page(); This patch contains automated changes generated with coccinelle using script below. For some reason, coccinelle doesn't patch header files. I've called spatch for them manually. The only adjustment after coccinelle is revert of changes to PAGE_CAHCE_ALIGN definition: we are going to drop it later. There are few places in the code where coccinelle didn't reach. I'll fix them manually in a separate patch. Comments and documentation also will be addressed with the separate patch. virtual patch @@ expression E; @@ - E << (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ expression E; @@ - E >> (PAGE_CACHE_SHIFT - PAGE_SHIFT) + E @@ @@ - PAGE_CACHE_SHIFT + PAGE_SHIFT @@ @@ - PAGE_CACHE_SIZE + PAGE_SIZE @@ @@ - PAGE_CACHE_MASK + PAGE_MASK @@ expression E; @@ - PAGE_CACHE_ALIGN(E) + PAGE_ALIGN(E) @@ expression E; @@ - page_cache_get(E) + get_page(E) @@ expression E; @@ - page_cache_release(E) + put_page(E) Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/bio.h | 2 +- include/linux/blkdev.h | 2 +- include/linux/buffer_head.h | 4 ++-- include/linux/ceph/libceph.h | 4 ++-- include/linux/f2fs_fs.h | 4 ++-- include/linux/fs.h | 4 ++-- include/linux/nfs_page.h | 2 +- include/linux/pagemap.h | 14 +++++++------- include/linux/swap.h | 2 +- 9 files changed, 19 insertions(+), 19 deletions(-) (limited to 'include') diff --git a/include/linux/bio.h b/include/linux/bio.h index 88bc64f00bb5..6b7481f62218 100644 --- a/include/linux/bio.h +++ b/include/linux/bio.h @@ -41,7 +41,7 @@ #endif #define BIO_MAX_PAGES 256 -#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_CACHE_SHIFT) +#define BIO_MAX_SIZE (BIO_MAX_PAGES << PAGE_SHIFT) #define BIO_MAX_SECTORS (BIO_MAX_SIZE >> 9) /* diff --git a/include/linux/blkdev.h b/include/linux/blkdev.h index 7e5d7e018bea..669e419d6234 100644 --- a/include/linux/blkdev.h +++ b/include/linux/blkdev.h @@ -1372,7 +1372,7 @@ unsigned char *read_dev_sector(struct block_device *, sector_t, Sector *); static inline void put_dev_sector(Sector p) { - page_cache_release(p.v); + put_page(p.v); } static inline bool __bvec_gap_to_prev(struct request_queue *q, diff --git a/include/linux/buffer_head.h b/include/linux/buffer_head.h index c67f052cc5e5..d48daa3f6f20 100644 --- a/include/linux/buffer_head.h +++ b/include/linux/buffer_head.h @@ -43,7 +43,7 @@ enum bh_state_bits { */ }; -#define MAX_BUF_PER_PAGE (PAGE_CACHE_SIZE / 512) +#define MAX_BUF_PER_PAGE (PAGE_SIZE / 512) struct page; struct buffer_head; @@ -263,7 +263,7 @@ void buffer_init(void); static inline void attach_page_buffers(struct page *page, struct buffer_head *head) { - page_cache_get(page); + get_page(page); SetPagePrivate(page); set_page_private(page, (unsigned long)head); } diff --git a/include/linux/ceph/libceph.h b/include/linux/ceph/libceph.h index e7975e4681e1..db92a8d4926e 100644 --- a/include/linux/ceph/libceph.h +++ b/include/linux/ceph/libceph.h @@ -176,8 +176,8 @@ extern void ceph_put_snap_context(struct ceph_snap_context *sc); */ static inline int calc_pages_for(u64 off, u64 len) { - return ((off+len+PAGE_CACHE_SIZE-1) >> PAGE_CACHE_SHIFT) - - (off >> PAGE_CACHE_SHIFT); + return ((off+len+PAGE_SIZE-1) >> PAGE_SHIFT) - + (off >> PAGE_SHIFT); } extern struct kmem_cache *ceph_inode_cachep; diff --git a/include/linux/f2fs_fs.h b/include/linux/f2fs_fs.h index 9eb215a155e0..b90e9bdbd1dd 100644 --- a/include/linux/f2fs_fs.h +++ b/include/linux/f2fs_fs.h @@ -262,7 +262,7 @@ struct f2fs_node { /* * For NAT entries */ -#define NAT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_nat_entry)) +#define NAT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_nat_entry)) struct f2fs_nat_entry { __u8 version; /* latest version of cached nat entry */ @@ -282,7 +282,7 @@ struct f2fs_nat_block { * Not allow to change this. */ #define SIT_VBLOCK_MAP_SIZE 64 -#define SIT_ENTRY_PER_BLOCK (PAGE_CACHE_SIZE / sizeof(struct f2fs_sit_entry)) +#define SIT_ENTRY_PER_BLOCK (PAGE_SIZE / sizeof(struct f2fs_sit_entry)) /* * Note that f2fs_sit_entry->vblocks has the following bit-field information. diff --git a/include/linux/fs.h b/include/linux/fs.h index 14a97194b34b..304991a80e23 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -929,7 +929,7 @@ static inline struct file *get_file(struct file *f) /* Page cache limit. The filesystems should put that into their s_maxbytes limits, otherwise bad things can happen in VM. */ #if BITS_PER_LONG==32 -#define MAX_LFS_FILESIZE (((loff_t)PAGE_CACHE_SIZE << (BITS_PER_LONG-1))-1) +#define MAX_LFS_FILESIZE (((loff_t)PAGE_SIZE << (BITS_PER_LONG-1))-1) #elif BITS_PER_LONG==64 #define MAX_LFS_FILESIZE ((loff_t)0x7fffffffffffffffLL) #endif @@ -2067,7 +2067,7 @@ extern int generic_update_time(struct inode *, struct timespec *, int); /* /sys/fs */ extern struct kobject *fs_kobj; -#define MAX_RW_COUNT (INT_MAX & PAGE_CACHE_MASK) +#define MAX_RW_COUNT (INT_MAX & PAGE_MASK) #ifdef CONFIG_MANDATORY_FILE_LOCKING extern int locks_mandatory_locked(struct file *); diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index f2f650f136ee..efada239205e 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -184,7 +184,7 @@ nfs_list_entry(struct list_head *head) static inline loff_t req_offset(struct nfs_page *req) { - return (((loff_t)req->wb_index) << PAGE_CACHE_SHIFT) + req->wb_offset; + return (((loff_t)req->wb_index) << PAGE_SHIFT) + req->wb_offset; } #endif /* _LINUX_NFS_PAGE_H */ diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index 1ebd65c91422..f396ccb900cc 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -390,13 +390,13 @@ static inline pgoff_t page_to_pgoff(struct page *page) return page->index << compound_order(page); if (likely(!PageTransTail(page))) - return page->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + return page->index; /* * We don't initialize ->index for tail pages: calculate based on * head page */ - pgoff = compound_head(page)->index << (PAGE_CACHE_SHIFT - PAGE_SHIFT); + pgoff = compound_head(page)->index; pgoff += page - compound_head(page); return pgoff; } @@ -406,12 +406,12 @@ static inline pgoff_t page_to_pgoff(struct page *page) */ static inline loff_t page_offset(struct page *page) { - return ((loff_t)page->index) << PAGE_CACHE_SHIFT; + return ((loff_t)page->index) << PAGE_SHIFT; } static inline loff_t page_file_offset(struct page *page) { - return ((loff_t)page_file_index(page)) << PAGE_CACHE_SHIFT; + return ((loff_t)page_file_index(page)) << PAGE_SHIFT; } extern pgoff_t linear_hugepage_index(struct vm_area_struct *vma, @@ -425,7 +425,7 @@ static inline pgoff_t linear_page_index(struct vm_area_struct *vma, return linear_hugepage_index(vma, address); pgoff = (address - vma->vm_start) >> PAGE_SHIFT; pgoff += vma->vm_pgoff; - return pgoff >> (PAGE_CACHE_SHIFT - PAGE_SHIFT); + return pgoff; } extern void __lock_page(struct page *page); @@ -671,8 +671,8 @@ static inline int add_to_page_cache(struct page *page, static inline unsigned long dir_pages(struct inode *inode) { - return (unsigned long)(inode->i_size + PAGE_CACHE_SIZE - 1) >> - PAGE_CACHE_SHIFT; + return (unsigned long)(inode->i_size + PAGE_SIZE - 1) >> + PAGE_SHIFT; } #endif /* _LINUX_PAGEMAP_H */ diff --git a/include/linux/swap.h b/include/linux/swap.h index d18b65c53dbb..3d980ea1c946 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -435,7 +435,7 @@ struct backing_dev_info; /* only sparc can not include linux/pagemap.h in this file * so leave page_cache_release and release_pages undeclared... */ #define free_page_and_swap_cache(page) \ - page_cache_release(page) + put_page(page) #define free_pages_and_swap_cache(pages, nr) \ release_pages((pages), (nr), false); -- cgit v1.2.3 From ea1754a084760e68886f5b725c8eaada9cc57155 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 1 Apr 2016 15:29:48 +0300 Subject: mm, fs: remove remaining PAGE_CACHE_* and page_cache_{get,release} usage Mostly direct substitution with occasional adjustment or removing outdated comments. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/backing-dev-defs.h | 2 +- include/linux/mm.h | 2 +- include/linux/mm_types.h | 2 +- include/linux/nfs_page.h | 4 ++-- include/linux/nilfs2_fs.h | 4 ++-- include/linux/pagemap.h | 3 +-- include/linux/sunrpc/svc.h | 2 +- include/linux/swap.h | 2 +- 8 files changed, 10 insertions(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/backing-dev-defs.h b/include/linux/backing-dev-defs.h index 1b4d69f68c33..3f103076d0bf 100644 --- a/include/linux/backing-dev-defs.h +++ b/include/linux/backing-dev-defs.h @@ -135,7 +135,7 @@ struct bdi_writeback { struct backing_dev_info { struct list_head bdi_list; - unsigned long ra_pages; /* max readahead in PAGE_CACHE_SIZE units */ + unsigned long ra_pages; /* max readahead in PAGE_SIZE units */ unsigned int capabilities; /* Device capabilities */ congested_fn *congested_fn; /* Function pointer if device is md/dm */ void *congested_data; /* Pointer to aux data for congested func */ diff --git a/include/linux/mm.h b/include/linux/mm.h index ed6407d1b7b5..ffcff53e3b2b 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -623,7 +623,7 @@ void do_set_pte(struct vm_area_struct *vma, unsigned long address, * * A page may belong to an inode's memory mapping. In this case, page->mapping * is the pointer to the inode, and page->index is the file offset of the page, - * in units of PAGE_CACHE_SIZE. + * in units of PAGE_SIZE. * * If pagecache pages are not associated with an inode, they are said to be * anonymous pages. These may become associated with the swapcache, and in that diff --git a/include/linux/mm_types.h b/include/linux/mm_types.h index 944b2b37313b..c2d75b4fa86c 100644 --- a/include/linux/mm_types.h +++ b/include/linux/mm_types.h @@ -341,7 +341,7 @@ struct vm_area_struct { /* Information about our backing store: */ unsigned long vm_pgoff; /* Offset (within vm_file) in PAGE_SIZE - units, *not* PAGE_CACHE_SIZE */ + units */ struct file * vm_file; /* File we map to (can be NULL). */ void * vm_private_data; /* was vm_pte (shared mem) */ diff --git a/include/linux/nfs_page.h b/include/linux/nfs_page.h index efada239205e..957049f72290 100644 --- a/include/linux/nfs_page.h +++ b/include/linux/nfs_page.h @@ -41,8 +41,8 @@ struct nfs_page { struct page *wb_page; /* page to read in/write out */ struct nfs_open_context *wb_context; /* File state context info */ struct nfs_lock_context *wb_lock_context; /* lock context info */ - pgoff_t wb_index; /* Offset >> PAGE_CACHE_SHIFT */ - unsigned int wb_offset, /* Offset & ~PAGE_CACHE_MASK */ + pgoff_t wb_index; /* Offset >> PAGE_SHIFT */ + unsigned int wb_offset, /* Offset & ~PAGE_MASK */ wb_pgbase, /* Start of page data */ wb_bytes; /* Length of request */ struct kref wb_kref; /* reference count */ diff --git a/include/linux/nilfs2_fs.h b/include/linux/nilfs2_fs.h index 9abb763e4b86..e9fcf90b270d 100644 --- a/include/linux/nilfs2_fs.h +++ b/include/linux/nilfs2_fs.h @@ -331,7 +331,7 @@ static inline unsigned nilfs_rec_len_from_disk(__le16 dlen) { unsigned len = le16_to_cpu(dlen); -#if !defined(__KERNEL__) || (PAGE_CACHE_SIZE >= 65536) +#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536) if (len == NILFS_MAX_REC_LEN) return 1 << 16; #endif @@ -340,7 +340,7 @@ static inline unsigned nilfs_rec_len_from_disk(__le16 dlen) static inline __le16 nilfs_rec_len_to_disk(unsigned len) { -#if !defined(__KERNEL__) || (PAGE_CACHE_SIZE >= 65536) +#if !defined(__KERNEL__) || (PAGE_SIZE >= 65536) if (len == (1 << 16)) return cpu_to_le16(NILFS_MAX_REC_LEN); else if (len > (1 << 16)) diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index f396ccb900cc..b3fc0370c14f 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -535,8 +535,7 @@ extern void add_page_wait_queue(struct page *page, wait_queue_t *waiter); /* * Fault a userspace page into pagetables. Return non-zero on a fault. * - * This assumes that two userspace pages are always sufficient. That's - * not true if PAGE_CACHE_SIZE > PAGE_SIZE. + * This assumes that two userspace pages are always sufficient. */ static inline int fault_in_pages_writeable(char __user *uaddr, int size) { diff --git a/include/linux/sunrpc/svc.h b/include/linux/sunrpc/svc.h index cc0fc712bb82..7ca44fb5b675 100644 --- a/include/linux/sunrpc/svc.h +++ b/include/linux/sunrpc/svc.h @@ -129,7 +129,7 @@ static inline void svc_get(struct svc_serv *serv) * * These happen to all be powers of 2, which is not strictly * necessary but helps enforce the real limitation, which is - * that they should be multiples of PAGE_CACHE_SIZE. + * that they should be multiples of PAGE_SIZE. * * For UDP transports, a block plus NFS,RPC, and UDP headers * has to fit into the IP datagram limit of 64K. The largest diff --git a/include/linux/swap.h b/include/linux/swap.h index 3d980ea1c946..2b83359c19ca 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -433,7 +433,7 @@ struct backing_dev_info; #define si_swapinfo(val) \ do { (val)->freeswap = (val)->totalswap = 0; } while (0) /* only sparc can not include linux/pagemap.h in this file - * so leave page_cache_release and release_pages undeclared... */ + * so leave put_page and release_pages undeclared... */ #define free_page_and_swap_cache(page) \ put_page(page) #define free_pages_and_swap_cache(pages, nr) \ -- cgit v1.2.3 From 1fa64f198b9f8d6ec0f7aec7c18dc94684391140 Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Fri, 1 Apr 2016 15:29:49 +0300 Subject: mm: drop PAGE_CACHE_* and page_cache_{get,release} definition All users gone. We can remove these macros. Signed-off-by: Kirill A. Shutemov Acked-by: Michal Hocko Signed-off-by: Linus Torvalds --- include/linux/pagemap.h | 15 --------------- 1 file changed, 15 deletions(-) (limited to 'include') diff --git a/include/linux/pagemap.h b/include/linux/pagemap.h index b3fc0370c14f..7e1ab155c67c 100644 --- a/include/linux/pagemap.h +++ b/include/linux/pagemap.h @@ -86,21 +86,6 @@ static inline void mapping_set_gfp_mask(struct address_space *m, gfp_t mask) (__force unsigned long)mask; } -/* - * The page cache can be done in larger chunks than - * one page, because it allows for more efficient - * throughput (it can then be mapped into user - * space in smaller chunks for same flexibility). - * - * Or rather, it _will_ be done in larger chunks. - */ -#define PAGE_CACHE_SHIFT PAGE_SHIFT -#define PAGE_CACHE_SIZE PAGE_SIZE -#define PAGE_CACHE_MASK PAGE_MASK -#define PAGE_CACHE_ALIGN(addr) (((addr)+PAGE_CACHE_SIZE-1)&PAGE_CACHE_MASK) - -#define page_cache_get(page) get_page(page) -#define page_cache_release(page) put_page(page) void release_pages(struct page **pages, int nr, bool cold); /* -- cgit v1.2.3 From 749b48faaf64d1081d7216068ff3da92c230bad0 Mon Sep 17 00:00:00 2001 From: Alex Deucher Date: Fri, 1 Apr 2016 10:12:49 -0400 Subject: drm/ttm: use phys_addr_t for ttm_bus_placement MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Fixes ttm on platforms like PPC460 where the CPU is in 32-bit mode, but the physical addresses are >32 bits. Extracted from a patch by Hans Verkuil. Tested-by: Julian Margetson Acked-by: Thomas Hellstrom Reviewed-by: Christian König Cc: Thomas Hellstrom Cc: Julian Margetson Cc: Hans Verkuil Signed-off-by: Alex Deucher --- include/drm/ttm/ttm_bo_api.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/ttm/ttm_bo_api.h b/include/drm/ttm/ttm_bo_api.h index afae2316bd43..055a08ddac02 100644 --- a/include/drm/ttm/ttm_bo_api.h +++ b/include/drm/ttm/ttm_bo_api.h @@ -92,7 +92,7 @@ struct ttm_placement { */ struct ttm_bus_placement { void *addr; - unsigned long base; + phys_addr_t base; unsigned long size; unsigned long offset; bool is_iomem; -- cgit v1.2.3 From 84ea3a18c06c7ae94b76f763c462bf1e0ce4dc79 Mon Sep 17 00:00:00 2001 From: Luis de Bethencourt Date: Fri, 18 Mar 2016 16:09:29 +0000 Subject: mac80211: add doc for RX_FLAG_DUP_VALIDATED flag Add documentation for the flag for duplication check. Fixes the following warning when running make htmldocs: warning: Enum value 'RX_FLAG_DUP_VALIDATED' not described in enum 'mac80211_rx_flags' Signed-off-by: Luis de Bethencourt [fix description] Signed-off-by: Johannes Berg --- include/net/mac80211.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/mac80211.h b/include/net/mac80211.h index 0c09da34b67a..e385eb3076a1 100644 --- a/include/net/mac80211.h +++ b/include/net/mac80211.h @@ -1001,6 +1001,8 @@ ieee80211_tx_info_clear_status(struct ieee80211_tx_info *info) * flag indicates that the PN was verified for replay protection. * Note that this flag is also currently only supported when a frame * is also decrypted (ie. @RX_FLAG_DECRYPTED must be set) + * @RX_FLAG_DUP_VALIDATED: The driver should set this flag if it did + * de-duplication by itself. * @RX_FLAG_FAILED_FCS_CRC: Set this flag if the FCS check failed on * the frame. * @RX_FLAG_FAILED_PLCP_CRC: Set this flag if the PCLP check failed on -- cgit v1.2.3 From 5ddfe0858ea7848c5d4efe3f4319e7543522e0ee Mon Sep 17 00:00:00 2001 From: Hannes Reinecke Date: Fri, 1 Apr 2016 08:57:36 +0200 Subject: scsi: Do not attach VPD to devices that don't support it The patch "scsi: rescan VPD attributes" introduced a regression in which devices that don't support VPD were being scanned for VPD attributes anyway. This could cause issues for some devices and should be avoided so the check for scsi_level has been moved out of scsi_add_lun and into scsi_attach_vpd so that all callers will not scan VPD for devices that don't support it. [mkp: Merge fix] Fixes: 09e2b0b14690 ("scsi: rescan VPD attributes") Cc: #v4.5+ Suggested-by: Alexander Duyck Signed-off-by: Hannes Reinecke Reviewed-by: Johannes Thumshirn Signed-off-by: Martin K. Petersen --- include/scsi/scsi_device.h | 25 +++++++++++++++++++++++++ 1 file changed, 25 insertions(+) (limited to 'include') diff --git a/include/scsi/scsi_device.h b/include/scsi/scsi_device.h index c067019ed12a..74d79bde7075 100644 --- a/include/scsi/scsi_device.h +++ b/include/scsi/scsi_device.h @@ -516,6 +516,31 @@ static inline int scsi_device_tpgs(struct scsi_device *sdev) return sdev->inquiry ? (sdev->inquiry[5] >> 4) & 0x3 : 0; } +/** + * scsi_device_supports_vpd - test if a device supports VPD pages + * @sdev: the &struct scsi_device to test + * + * If the 'try_vpd_pages' flag is set it takes precedence. + * Otherwise we will assume VPD pages are supported if the + * SCSI level is at least SPC-3 and 'skip_vpd_pages' is not set. + */ +static inline int scsi_device_supports_vpd(struct scsi_device *sdev) +{ + /* Attempt VPD inquiry if the device blacklist explicitly calls + * for it. + */ + if (sdev->try_vpd_pages) + return 1; + /* + * Although VPD inquiries can go to SCSI-2 type devices, + * some USB ones crash on receiving them, and the pages + * we currently ask for are for SPC-3 and beyond + */ + if (sdev->scsi_level > SCSI_SPC_2 && !sdev->skip_vpd_pages) + return 1; + return 0; +} + #define MODULE_ALIAS_SCSI_DEVICE(type) \ MODULE_ALIAS("scsi:t-" __stringify(type) "*") #define SCSI_DEVICE_MODALIAS_FMT "scsi:t-0x%02x" -- cgit v1.2.3 From b70bb984489363aadd5ccc94d919629d9e264d36 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Tue, 15 Mar 2016 22:37:17 +0100 Subject: iommu: provide of_xlate pointer unconditionally iommu drivers that support the standard DT bindings use a of_xlate callback pointer, but that is only part of struct iommu_ops when CONFIG_OF_IOMMU is enabled, leading to build errors in randconfig builds when that is not provided: drivers/iommu/mtk_iommu.c:497:2: error: unknown field 'of_xlate' specified in initializer .of_xlate = mtk_iommu_of_xlate, ^ drivers/iommu/mtk_iommu.c:497:14: error: initialization from incompatible pointer type [-Werror=incompatible-pointer-types] .of_xlate = mtk_iommu_of_xlate, ^~~~~~~~~~~~~~~~~~ drivers/iommu/mtk_iommu.c:497:14: note: (near initialization for 'mtk_iommu_ops.domain_get_attr') We can work around it by adding more #ifdefs in each driver, but it seems nicer to just allow setting the pointer even if it is unused. This makes the driver code look nicer, and it gives better compile-time coverage when test building on other architectures. Signed-off-by: Arnd Bergmann Fixes: 0df4fabe208d ("iommu/mediatek: Add mt8173 IOMMU driver") Reviewed-by: Robin Murphy Signed-off-by: Joerg Roedel --- include/linux/iommu.h | 2 -- 1 file changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/iommu.h b/include/linux/iommu.h index a5c539fa5d2b..ef7a6ecd8584 100644 --- a/include/linux/iommu.h +++ b/include/linux/iommu.h @@ -195,9 +195,7 @@ struct iommu_ops { /* Get the number of windows per domain */ u32 (*domain_get_windows)(struct iommu_domain *domain); -#ifdef CONFIG_OF_IOMMU int (*of_xlate)(struct device *dev, struct of_phandle_args *args); -#endif unsigned long pgsize_bitmap; void *priv; -- cgit v1.2.3 From 95272c29378ee7dc15f43fa2758cb28a5913a06d Mon Sep 17 00:00:00 2001 From: Paolo Bonzini Date: Thu, 31 Mar 2016 09:38:51 +0200 Subject: compiler-gcc: disable -ftracer for __noclone functions -ftracer can duplicate asm blocks causing compilation to fail in noclone functions. For example, KVM declares a global variable in an asm like asm("2: ... \n .pushsection data \n .global vmx_return \n vmx_return: .long 2b"); and -ftracer causes a double declaration. Cc: Andrew Morton Cc: Michal Marek Cc: stable@vger.kernel.org Cc: kvm@vger.kernel.org Reported-by: Linda Walsh Signed-off-by: Paolo Bonzini --- include/linux/compiler-gcc.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/compiler-gcc.h b/include/linux/compiler-gcc.h index 22ab246feed3..eeae401a2412 100644 --- a/include/linux/compiler-gcc.h +++ b/include/linux/compiler-gcc.h @@ -199,7 +199,7 @@ #define unreachable() __builtin_unreachable() /* Mark a function definition as prohibited from being cloned. */ -#define __noclone __attribute__((__noclone__)) +#define __noclone __attribute__((__noclone__, __optimize__("no-tracer"))) #endif /* GCC_VERSION >= 40500 */ -- cgit v1.2.3 From eb8e97715f29a1240cdf67b0df725be27433259f Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Fri, 1 Apr 2016 14:30:32 -0300 Subject: sctp: use list_* in sctp_list_dequeue Use list_* helpers in sctp_list_dequeue, more readable. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 65521cfdcade..03fb33efcae2 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -386,11 +386,9 @@ static inline struct list_head *sctp_list_dequeue(struct list_head *list) { struct list_head *result = NULL; - if (list->next != list) { + if (!list_empty(list)) { result = list->next; - list->next = result->next; - list->next->prev = list; - INIT_LIST_HEAD(result); + list_del_init(result); } return result; } -- cgit v1.2.3 From 60901df3aed230d4565dca003f11b6a95fbf30d9 Mon Sep 17 00:00:00 2001 From: Ross Lagerwall Date: Thu, 17 Mar 2016 16:51:59 +0000 Subject: xen: Fix page <-> pfn conversion on 32 bit systems Commit 1084b1988d22dc165c9dbbc2b0e057f9248ac4db (xen: Add Xen specific page definition) caused a regression in 4.4. The xen functions to convert between pages and pfns fail due to an overflow on systems where a physical address may not fit in an unsigned long (e.g. x86 32 bit PAE systems). Rework the conversion to avoid overflow. This should also result in simpler object code. This bug manifested itself as disk corruption with Linux 4.4 when using blkfront in a Xen HVM x86 32 bit guest with more than 4 GiB of memory. Signed-off-by: Ross Lagerwall Cc: # 4.4+ Signed-off-by: David Vrabel --- include/xen/page.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/xen/page.h b/include/xen/page.h index 96294ac93755..9dc46cb8a0fd 100644 --- a/include/xen/page.h +++ b/include/xen/page.h @@ -15,9 +15,9 @@ */ #define xen_pfn_to_page(xen_pfn) \ - ((pfn_to_page(((unsigned long)(xen_pfn) << XEN_PAGE_SHIFT) >> PAGE_SHIFT))) + (pfn_to_page((unsigned long)(xen_pfn) >> (PAGE_SHIFT - XEN_PAGE_SHIFT))) #define page_to_xen_pfn(page) \ - (((page_to_pfn(page)) << PAGE_SHIFT) >> XEN_PAGE_SHIFT) + ((page_to_pfn(page)) << (PAGE_SHIFT - XEN_PAGE_SHIFT)) #define XEN_PFN_PER_PAGE (PAGE_SIZE / XEN_PAGE_SIZE) -- cgit v1.2.3 From 18fcf49f87f4b280d3cd695fc77766004b223af5 Mon Sep 17 00:00:00 2001 From: WANG Cong Date: Mon, 4 Apr 2016 10:32:48 -0700 Subject: net_sched: fix a memory leak in tc action Fixes: ddf97ccdd7cb ("net_sched: add network namespace support for tc actions") Reported-by: Dmitry Vyukov Tested-by: Dmitry Vyukov Cc: Jamal Hadi Salim Signed-off-by: Cong Wang Signed-off-by: David S. Miller --- include/net/act_api.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/act_api.h b/include/net/act_api.h index 2a19fe111c78..03e322b30218 100644 --- a/include/net/act_api.h +++ b/include/net/act_api.h @@ -135,6 +135,7 @@ void tcf_hashinfo_destroy(const struct tc_action_ops *ops, static inline void tc_action_net_exit(struct tc_action_net *tn) { tcf_hashinfo_destroy(tn->ops, tn->hinfo); + kfree(tn->hinfo); } int tcf_generic_walker(struct tc_action_net *tn, struct sk_buff *skb, -- cgit v1.2.3 From c12d2da56d0e07d230968ee2305aaa86b93a6832 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Mon, 4 Apr 2016 10:24:58 +0200 Subject: mm/gup: Remove the macro overload API migration helpers from the get_user*() APIs The pkeys changes brought about a truly hideous set of macros in: cde70140fed8 ("mm/gup: Overload get_user_pages() functions") ... which macros are (ab-)using the fact that __VA_ARGS__ can be used to shift parameter positions in macro arguments without breaking the build and so can be used to call separate C functions depending on the number of arguments of the macro. This allowed easy migration of these 3 GUP APIs, as both these variants worked at the C level: old: ret = get_user_pages(current, current->mm, address, 1, 1, 0, &page, NULL); new: ret = get_user_pages(address, 1, 1, 0, &page, NULL); ... while we also generated a (functionally harmless but noticeable) build time warning if the old API was used. As there are over 300 uses of these APIs, this trick eased the migration of the API and avoided excessive migration pain in linux-next. Now, with its work done, get rid of all of that complication and ugliness: 3 files changed, 16 insertions(+), 140 deletions(-) ... where the linecount of the migration hack was further inflated by the fact that there are NOMMU variants of these GUP APIs as well. Much of the conversion was done in linux-next over the past couple of months, and Linus recently removed all remaining old API uses from the upstream tree in the following upstrea commit: cb107161df3c ("Convert straggling drivers to new six-argument get_user_pages()") There was one more old-API usage in mm/gup.c, in the CONFIG_HAVE_GENERIC_RCU_GUP code path that ARM, ARM64 and PowerPC uses. After this commit any old API usage will break the build. [ Also fixed a PowerPC/HAVE_GENERIC_RCU_GUP warning reported by Stephen Rothwell. ] Cc: Andrew Morton Cc: Dave Hansen Cc: Dave Hansen Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephen Rothwell Cc: Thomas Gleixner Cc: linux-kernel@vger.kernel.org Cc: linux-mm@kvack.org Signed-off-by: Ingo Molnar --- include/linux/mm.h | 64 +++--------------------------------------------------- 1 file changed, 3 insertions(+), 61 deletions(-) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index ed6407d1b7b5..d6508a025a31 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1250,78 +1250,20 @@ long get_user_pages_remote(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages6(unsigned long start, unsigned long nr_pages, +long get_user_pages(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, struct vm_area_struct **vmas); -long get_user_pages_locked6(unsigned long start, unsigned long nr_pages, +long get_user_pages_locked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, int *locked); long __get_user_pages_unlocked(struct task_struct *tsk, struct mm_struct *mm, unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages, unsigned int gup_flags); -long get_user_pages_unlocked5(unsigned long start, unsigned long nr_pages, +long get_user_pages_unlocked(unsigned long start, unsigned long nr_pages, int write, int force, struct page **pages); int get_user_pages_fast(unsigned long start, int nr_pages, int write, struct page **pages); -/* suppress warnings from use in EXPORT_SYMBOL() */ -#ifndef __DISABLE_GUP_DEPRECATED -#define __gup_deprecated __deprecated -#else -#define __gup_deprecated -#endif -/* - * These macros provide backward-compatibility with the old - * get_user_pages() variants which took tsk/mm. These - * functions/macros provide both compile-time __deprecated so we - * can catch old-style use and not break the build. The actual - * functions also have WARN_ON()s to let us know at runtime if - * the get_user_pages() should have been the "remote" variant. - * - * These are hideous, but temporary. - * - * If you run into one of these __deprecated warnings, look - * at how you are calling get_user_pages(). If you are calling - * it with current/current->mm as the first two arguments, - * simply remove those arguments. The behavior will be the same - * as it is now. If you are calling it on another task, use - * get_user_pages_remote() instead. - * - * Any questions? Ask Dave Hansen - */ -long -__gup_deprecated -get_user_pages8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - struct vm_area_struct **vmas); -#define GUP_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages, ...) \ - get_user_pages -#define get_user_pages(...) GUP_MACRO(__VA_ARGS__, \ - get_user_pages8, x, \ - get_user_pages6, x, x, x, x, x)(__VA_ARGS__) - -__gup_deprecated -long get_user_pages_locked8(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages, - int *locked); -#define GUPL_MACRO(_1, _2, _3, _4, _5, _6, _7, _8, get_user_pages_locked, ...) \ - get_user_pages_locked -#define get_user_pages_locked(...) GUPL_MACRO(__VA_ARGS__, \ - get_user_pages_locked8, x, \ - get_user_pages_locked6, x, x, x, x)(__VA_ARGS__) - -__gup_deprecated -long get_user_pages_unlocked7(struct task_struct *tsk, struct mm_struct *mm, - unsigned long start, unsigned long nr_pages, - int write, int force, struct page **pages); -#define GUPU_MACRO(_1, _2, _3, _4, _5, _6, _7, get_user_pages_unlocked, ...) \ - get_user_pages_unlocked -#define get_user_pages_unlocked(...) GUPU_MACRO(__VA_ARGS__, \ - get_user_pages_unlocked7, x, \ - get_user_pages_unlocked5, x, x, x, x)(__VA_ARGS__) - /* Container for pinned pfns / pages */ struct frame_vector { unsigned int nr_allocated; /* Number of frames we have space for */ -- cgit v1.2.3 From c00bbcf8628969e103d4a7b351a53746f1025576 Mon Sep 17 00:00:00 2001 From: Stefan Hajnoczi Date: Tue, 29 Mar 2016 16:43:45 +0100 Subject: virtio: add VIRTIO_CONFIG_S_NEEDS_RESET device status bit The VIRTIO 1.0 specification added the DEVICE_NEEDS_RESET device status bit in "VIRTIO-98: Add DEVICE_NEEDS_RESET". This patch defines the device status bit in the uapi header file so that both the kernel and userspace applications can use it. The bit is currently unused by the virtio guest drivers and vhost. According to the spec "a good implementation will try to recover by issuing a reset". This is not attempted here because it requires auditing the virtio drivers to ensure there are no resource leaks or crashes if the device needs to be reset mid-operation. See "2.1 Device Status Field" in the VIRTIO 1.0 specification for details. Signed-off-by: Stefan Hajnoczi Signed-off-by: Michael S. Tsirkin --- include/uapi/linux/virtio_config.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/virtio_config.h b/include/uapi/linux/virtio_config.h index c18264df9504..4cb65bbfa654 100644 --- a/include/uapi/linux/virtio_config.h +++ b/include/uapi/linux/virtio_config.h @@ -40,6 +40,8 @@ #define VIRTIO_CONFIG_S_DRIVER_OK 4 /* Driver has finished configuring features */ #define VIRTIO_CONFIG_S_FEATURES_OK 8 +/* Device entered invalid state, driver must reset it */ +#define VIRTIO_CONFIG_S_NEEDS_RESET 0x40 /* We've given up on this device. */ #define VIRTIO_CONFIG_S_FAILED 0x80 -- cgit v1.2.3 From a0ca153f98db8cf25298565a09e11fe9d82846ad Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Tue, 5 Apr 2016 09:13:39 -0700 Subject: GRE: Disable segmentation offloads w/ CSUM and we are encapsulated via FOU This patch fixes an issue I found in which we were dropping frames if we had enabled checksums on GRE headers that were encapsulated by either FOU or GUE. Without this patch I was barely able to get 1 Gb/s of throughput. With this patch applied I am now at least getting around 6 Gb/s. The issue is due to the fact that with FOU or GUE applied we do not provide a transport offset pointing to the GRE header, nor do we offload it in software as the GRE header is completely skipped by GSO and treated like a VXLAN or GENEVE type header. As such we need to prevent the stack from generating it and also prevent GRE from generating it via any interface we create. Fixes: c3483384ee511 ("gro: Allow tunnel stacking in the case of FOU/GUE") Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/netdevice.h | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index cb0d5d09c2e4..8395308a2445 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -2120,7 +2120,10 @@ struct napi_gro_cb { /* Used in foo-over-udp, set in udp[46]_gro_receive */ u8 is_ipv6:1; - /* 7 bit hole */ + /* Used in GRE, set in fou/gue_gro_receive */ + u8 is_fou:1; + + /* 6 bit hole */ /* used to support CHECKSUM_COMPLETE for tunneling protocols */ __wsum csum; -- cgit v1.2.3 From ba6f5e33bdbb9ed2014b778fbbaecf20060ca989 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Wed, 6 Apr 2016 15:15:19 -0300 Subject: sctp: avoid refreshing heartbeat timer too often Currently on high rate SCTP streams the heartbeat timer refresh can consume quite a lot of resources as timer updates are costly and it contains a random factor, which a) is also costly and b) invalidates mod_timer() optimization for not editing a timer to the same value. It may even cause the timer to be slightly advanced, for no good reason. As suggested by David Laight this patch now removes this timer update from hot path by leaving the timer on and re-evaluating upon its expiration if the heartbeat is still needed or not, similarly to what is done for TCP. If it's not needed anymore the timer is re-scheduled to the new timeout, considering the time already elapsed. For this, we now record the last tx timestamp per transport, updated in the same spots as hb timer was restarted on tx. Also split up sctp_transport_reset_timers into sctp_transport_reset_t3_rtx and sctp_transport_reset_hb_timer, so we can re-arm T3 without re-arming the heartbeat one. On loopback with MTU of 65535 and data chunks with 1636, so that we have a considerable amount of chunks without stressing system calls, netperf -t SCTP_STREAM -l 30, perf looked like this before: Samples: 103K of event 'cpu-clock', Event count (approx.): 25833000000 Overhead Command Shared Object Symbol + 6,15% netperf [kernel.vmlinux] [k] copy_user_enhanced_fast_string - 5,43% netperf [kernel.vmlinux] [k] _raw_write_unlock_irqrestore - _raw_write_unlock_irqrestore - 96,54% _raw_spin_unlock_irqrestore - 36,14% mod_timer + 97,24% sctp_transport_reset_timers + 2,76% sctp_do_sm + 33,65% __wake_up_sync_key + 28,77% sctp_ulpq_tail_event + 1,40% del_timer - 1,84% mod_timer + 99,03% sctp_transport_reset_timers + 0,97% sctp_do_sm + 1,50% sctp_ulpq_tail_event And after this patch, now with netperf -l 60: Samples: 230K of event 'cpu-clock', Event count (approx.): 57707250000 Overhead Command Shared Object Symbol + 5,65% netperf [kernel.vmlinux] [k] memcpy_erms + 5,59% netperf [kernel.vmlinux] [k] copy_user_enhanced_fast_string - 5,05% netperf [kernel.vmlinux] [k] _raw_spin_unlock_irqrestore - _raw_spin_unlock_irqrestore + 49,89% __wake_up_sync_key + 45,68% sctp_ulpq_tail_event - 2,85% mod_timer + 76,51% sctp_transport_reset_t3_rtx + 23,49% sctp_do_sm + 1,55% del_timer + 2,50% netperf [sctp] [k] sctp_datamsg_from_user + 2,26% netperf [sctp] [k] sctp_sendmsg Throughput-wise, from 6800mbps without the patch to 7050mbps with it, ~3.7%. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/net/sctp/structs.h | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/net/sctp/structs.h b/include/net/sctp/structs.h index 6df1ce7a411c..5a404c354f4c 100644 --- a/include/net/sctp/structs.h +++ b/include/net/sctp/structs.h @@ -847,6 +847,11 @@ struct sctp_transport { */ ktime_t last_time_heard; + /* When was the last time that we sent a chunk using this + * transport? We use this to check for idle transports + */ + unsigned long last_time_sent; + /* Last time(in jiffies) when cwnd is reduced due to the congestion * indication based on ECNE chunk. */ @@ -952,7 +957,8 @@ void sctp_transport_route(struct sctp_transport *, union sctp_addr *, struct sctp_sock *); void sctp_transport_pmtu(struct sctp_transport *, struct sock *sk); void sctp_transport_free(struct sctp_transport *); -void sctp_transport_reset_timers(struct sctp_transport *); +void sctp_transport_reset_t3_rtx(struct sctp_transport *); +void sctp_transport_reset_hb_timer(struct sctp_transport *); int sctp_transport_hold(struct sctp_transport *); void sctp_transport_put(struct sctp_transport *); void sctp_transport_update_rto(struct sctp_transport *, __u32); -- cgit v1.2.3 From 9ab179d83b4e31ea277a123492e419067c2f129a Mon Sep 17 00:00:00 2001 From: David Ahern Date: Thu, 7 Apr 2016 11:10:06 -0700 Subject: net: vrf: Fix dst reference counting Vivek reported a kernel exception deleting a VRF with an active connection through it. The root cause is that the socket has a cached reference to a dst that is destroyed. Converting the dst_destroy to dst_release and letting proper reference counting kick in does not work as the dst has a reference to the device which needs to be released as well. I talked to Hannes about this at netdev and he pointed out the ipv4 and ipv6 dst handling has dst_ifdown for just this scenario. Rather than continuing with the reinvented dst wheel in VRF just remove it and leverage the ipv4 and ipv6 versions. Fixes: 193125dbd8eb2 ("net: Introduce VRF device driver") Fixes: 35402e3136634 ("net: Add IPv6 support to VRF device") Signed-off-by: David Ahern Signed-off-by: David S. Miller --- include/net/ip6_route.h | 3 +++ include/net/route.h | 3 +++ 2 files changed, 6 insertions(+) (limited to 'include') diff --git a/include/net/ip6_route.h b/include/net/ip6_route.h index 295d291269e2..54c779416eec 100644 --- a/include/net/ip6_route.h +++ b/include/net/ip6_route.h @@ -101,6 +101,9 @@ void fib6_force_start_gc(struct net *net); struct rt6_info *addrconf_dst_alloc(struct inet6_dev *idev, const struct in6_addr *addr, bool anycast); +struct rt6_info *ip6_dst_alloc(struct net *net, struct net_device *dev, + int flags); + /* * support functions for ND * diff --git a/include/net/route.h b/include/net/route.h index 9b0a523bb428..6de665bf1750 100644 --- a/include/net/route.h +++ b/include/net/route.h @@ -209,6 +209,9 @@ unsigned int inet_addr_type_dev_table(struct net *net, void ip_rt_multicast_event(struct in_device *); int ip_rt_ioctl(struct net *, unsigned int cmd, void __user *arg); void ip_rt_get_source(u8 *src, struct sk_buff *skb, struct rtable *rt); +struct rtable *rt_dst_alloc(struct net_device *dev, + unsigned int flags, u16 type, + bool nopolicy, bool noxfrm, bool will_cache); struct in_ifaddr; void fib_add_ifaddr(struct in_ifaddr *); -- cgit v1.2.3 From 1ecf689013b598918cd9a261a078e64571b60f90 Mon Sep 17 00:00:00 2001 From: stephen hemminger Date: Mon, 11 Apr 2016 13:39:08 -0700 Subject: devlink: add missing install of header The new devlink.h in uapi was not being installed by make headers_install Signed-off-by: Stephen Hemminger Signed-off-by: David S. Miller --- include/uapi/linux/Kbuild | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/uapi/linux/Kbuild b/include/uapi/linux/Kbuild index b71fd0b5cbad..813ffb2e22c9 100644 --- a/include/uapi/linux/Kbuild +++ b/include/uapi/linux/Kbuild @@ -96,6 +96,7 @@ header-y += cyclades.h header-y += cycx_cfm.h header-y += dcbnl.h header-y += dccp.h +header-y += devlink.h header-y += dlmconstants.h header-y += dlm_device.h header-y += dlm.h -- cgit v1.2.3 From b32e4482aadfd1322357f46d4ed8a990603664d9 Mon Sep 17 00:00:00 2001 From: Jaegeuk Kim Date: Mon, 11 Apr 2016 15:51:57 -0700 Subject: fscrypto: don't let data integrity writebacks fail with ENOMEM This patch fixes the issue introduced by the ext4 crypto fix in a same manner. For F2FS, however, we flush the pending IOs and wait for a while to acquire free memory. Fixes: c9af28fdd4492 ("ext4 crypto: don't let data integrity writebacks fail with ENOMEM") Cc: Theodore Ts'o Signed-off-by: Jaegeuk Kim --- include/linux/fscrypto.h | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/fscrypto.h b/include/linux/fscrypto.h index cd91f75de49b..6027f6bbb061 100644 --- a/include/linux/fscrypto.h +++ b/include/linux/fscrypto.h @@ -263,9 +263,9 @@ static inline void fscrypt_set_d_op(struct dentry *dentry) extern struct kmem_cache *fscrypt_info_cachep; int fscrypt_initialize(void); -extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *); +extern struct fscrypt_ctx *fscrypt_get_ctx(struct inode *, gfp_t); extern void fscrypt_release_ctx(struct fscrypt_ctx *); -extern struct page *fscrypt_encrypt_page(struct inode *, struct page *); +extern struct page *fscrypt_encrypt_page(struct inode *, struct page *, gfp_t); extern int fscrypt_decrypt_page(struct page *); extern void fscrypt_decrypt_bio_pages(struct fscrypt_ctx *, struct bio *); extern void fscrypt_pullback_bio_page(struct page **, bool); @@ -299,7 +299,8 @@ extern int fscrypt_fname_usr_to_disk(struct inode *, const struct qstr *, #endif /* crypto.c */ -static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i) +static inline struct fscrypt_ctx *fscrypt_notsupp_get_ctx(struct inode *i, + gfp_t f) { return ERR_PTR(-EOPNOTSUPP); } @@ -310,7 +311,7 @@ static inline void fscrypt_notsupp_release_ctx(struct fscrypt_ctx *c) } static inline struct page *fscrypt_notsupp_encrypt_page(struct inode *i, - struct page *p) + struct page *p, gfp_t f) { return ERR_PTR(-EOPNOTSUPP); } -- cgit v1.2.3 From 1363074667a6b7d0507527742ccd7bbed5e3ceaa Mon Sep 17 00:00:00 2001 From: Hans de Goede Date: Tue, 12 Apr 2016 12:27:09 +0200 Subject: USB: uas: Add a new NO_REPORT_LUNS quirk Add a new NO_REPORT_LUNS quirk and set it for Seagate drives with an usb-id of: 0bc2:331a, as these will fail to respond to a REPORT_LUNS command. Cc: stable@vger.kernel.org Reported-and-tested-by: David Webb Signed-off-by: Hans de Goede Acked-by: Alan Stern Signed-off-by: Greg Kroah-Hartman --- include/linux/usb_usual.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/usb_usual.h b/include/linux/usb_usual.h index 7f5f78bd15ad..245f57dbbb61 100644 --- a/include/linux/usb_usual.h +++ b/include/linux/usb_usual.h @@ -79,6 +79,8 @@ /* Cannot handle MI_REPORT_SUPPORTED_OPERATION_CODES */ \ US_FLAG(MAX_SECTORS_240, 0x08000000) \ /* Sets max_sectors to 240 */ \ + US_FLAG(NO_REPORT_LUNS, 0x10000000) \ + /* Cannot handle REPORT_LUNS */ \ #define US_FLAG(name, value) US_FL_##name = value , enum { US_DO_ALL_FLAGS }; -- cgit v1.2.3 From e7e0c3e26587749b62d17b9dd0532874186c77f7 Mon Sep 17 00:00:00 2001 From: Sakari Ailus Date: Sun, 3 Apr 2016 16:15:00 -0300 Subject: [media] videobuf2-core: Check user space planes array in dqbuf The number of planes in videobuf2 is specific to a buffer. In order to verify that the planes array provided by the user is long enough, a new vb2_buf_op is required. Call __verify_planes_array() when the dequeued buffer is known. Return an error to the caller if there was one, otherwise remove the buffer from the done list. Signed-off-by: Sakari Ailus Acked-by: Hans Verkuil Cc: stable@vger.kernel.org # for v4.4 and later Signed-off-by: Mauro Carvalho Chehab --- include/media/videobuf2-core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index 8a0f55b6c2ba..5342ff4d748f 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -375,6 +375,9 @@ struct vb2_ops { /** * struct vb2_ops - driver-specific callbacks * + * @verify_planes_array: Verify that a given user space structure contains + * enough planes for the buffer. This is called + * for each dequeued buffer. * @fill_user_buffer: given a vb2_buffer fill in the userspace structure. * For V4L2 this is a struct v4l2_buffer. * @fill_vb2_buffer: given a userspace structure, fill in the vb2_buffer. @@ -384,6 +387,7 @@ struct vb2_ops { * the vb2_buffer struct. */ struct vb2_buf_ops { + int (*verify_planes_array)(struct vb2_buffer *vb, const void *pb); void (*fill_user_buffer)(struct vb2_buffer *vb, void *pb); int (*fill_vb2_buffer)(struct vb2_buffer *vb, const void *pb, struct vb2_plane *planes); -- cgit v1.2.3 From cba2e47abcbd80e3f46f460899290402f98090ec Mon Sep 17 00:00:00 2001 From: Toshi Kani Date: Tue, 12 Apr 2016 18:10:52 -0600 Subject: pmem: fix BUG() error in pmem.h:48 on X86_32 After 'commit fc0c2028135c ("x86, pmem: use memcpy_mcsafe() for memcpy_from_pmem()")', probing a PMEM device hits the BUG() error below on X86_32 kernel. kernel BUG at include/linux/pmem.h:48! memcpy_from_pmem() calls arch_memcpy_from_pmem(), which is unimplemented since CONFIG_ARCH_HAS_PMEM_API is undefined on X86_32. Fix the BUG() error by adding default_memcpy_from_pmem(). Acked-by: Dan Williams Signed-off-by: Toshi Kani Signed-off-by: Ross Zwisler Cc: Dan Williams Cc: Ross Zwisler --- include/linux/pmem.h | 22 ++++++++++++++++------ 1 file changed, 16 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/pmem.h b/include/linux/pmem.h index ac6d872ce067..57d146fe44dd 100644 --- a/include/linux/pmem.h +++ b/include/linux/pmem.h @@ -72,6 +72,18 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) } #endif +static inline bool arch_has_pmem_api(void) +{ + return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); +} + +static inline int default_memcpy_from_pmem(void *dst, void __pmem const *src, + size_t size) +{ + memcpy(dst, (void __force *) src, size); + return 0; +} + /* * memcpy_from_pmem - read from persistent memory with error handling * @dst: destination buffer @@ -83,12 +95,10 @@ static inline void arch_invalidate_pmem(void __pmem *addr, size_t size) static inline int memcpy_from_pmem(void *dst, void __pmem const *src, size_t size) { - return arch_memcpy_from_pmem(dst, src, size); -} - -static inline bool arch_has_pmem_api(void) -{ - return IS_ENABLED(CONFIG_ARCH_HAS_PMEM_API); + if (arch_has_pmem_api()) + return arch_memcpy_from_pmem(dst, src, size); + else + return default_memcpy_from_pmem(dst, src, size); } /** -- cgit v1.2.3 From 34dbbcdbf63360661ff7bda6c5f52f99ac515f92 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Thu, 14 Apr 2016 11:22:00 -0700 Subject: Make file credentials available to the seqfile interfaces A lot of seqfile users seem to be using things like %pK that uses the credentials of the current process, but that is actually completely wrong for filesystem interfaces. The unix semantics for permission checking files is to check permissions at _open_ time, not at read or write time, and that is not just a small detail: passing off stdin/stdout/stderr to a suid application and making the actual IO happen in privileged context is a classic exploit technique. So if we want to be able to look at permissions at read time, we need to use the file open credentials, not the current ones. Normal file accesses can just use "f_cred" (or any of the helper functions that do that, like file_ns_capable()), but the seqfile interfaces do not have any such options. It turns out that seq_file _does_ save away the user_ns information of the file, though. Since user_ns is just part of the full credential information, replace that special case with saving off the cred pointer instead, and suddenly seq_file has all the permission information it needs. Signed-off-by: Linus Torvalds --- include/linux/seq_file.h | 13 ++++--------- 1 file changed, 4 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/linux/seq_file.h b/include/linux/seq_file.h index dde00defbaa5..f3d45dd42695 100644 --- a/include/linux/seq_file.h +++ b/include/linux/seq_file.h @@ -7,13 +7,10 @@ #include #include #include +#include +#include struct seq_operations; -struct file; -struct path; -struct inode; -struct dentry; -struct user_namespace; struct seq_file { char *buf; @@ -27,9 +24,7 @@ struct seq_file { struct mutex lock; const struct seq_operations *op; int poll_event; -#ifdef CONFIG_USER_NS - struct user_namespace *user_ns; -#endif + const struct file *file; void *private; }; @@ -147,7 +142,7 @@ int seq_release_private(struct inode *, struct file *); static inline struct user_namespace *seq_user_ns(struct seq_file *seq) { #ifdef CONFIG_USER_NS - return seq->user_ns; + return seq->file->f_cred->user_ns; #else extern struct user_namespace init_user_ns; return &init_user_ns; -- cgit v1.2.3 From 33c162a980fe03498fcecb917f618ad7e7c55e61 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 11 Apr 2016 15:29:36 -0700 Subject: ipv6: datagram: Update dst cache of a connected datagram sk during pmtu update There is a case in connected UDP socket such that getsockopt(IPV6_MTU) will return a stale MTU value. The reproducible sequence could be the following: 1. Create a connected UDP socket 2. Send some datagrams out 3. Receive a ICMPV6_PKT_TOOBIG 4. No new outgoing datagrams to trigger the sk_dst_check() logic to update the sk->sk_dst_cache. 5. getsockopt(IPV6_MTU) returns the mtu from the invalid sk->sk_dst_cache instead of the newly created RTF_CACHE clone. This patch updates the sk->sk_dst_cache for a connected datagram sk during pmtu-update code path. Note that the sk->sk_v6_daddr is used to do the route lookup instead of skb->data (i.e. iph). It is because a UDP socket can become connected after sending out some datagrams in un-connected state. or It can be connected multiple times to different destinations. Hence, iph may not be related to where sk is currently connected to. It is done under '!sock_owned_by_user(sk)' condition because the user may make another ip6_datagram_connect() (i.e changing the sk->sk_v6_daddr) while dst lookup is happening in the pmtu-update code path. For the sock_owned_by_user(sk) == true case, the next patch will introduce a release_cb() which will update the sk->sk_dst_cache. Test: Server (Connected UDP Socket): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ Route Details: [root@arch-fb-vm1 ~]# ip -6 r show | egrep '2fac' 2fac::/64 dev eth0 proto kernel metric 256 pref medium 2fac:face::/64 via 2fac::face dev eth0 metric 1024 pref medium A simple python code to create a connected UDP socket: import socket import errno HOST = '2fac::1' PORT = 8080 s = socket.socket(socket.AF_INET6, socket.SOCK_DGRAM) s.bind((HOST, PORT)) s.connect(('2fac:face::face', 53)) print("connected") while True: try: data = s.recv(1024) except socket.error as se: if se.errno == errno.EMSGSIZE: pmtu = s.getsockopt(41, 24) print("PMTU:%d" % pmtu) break s.close() Python program output after getting a ICMPV6_PKT_TOOBIG: [root@arch-fb-vm1 ~]# python2 ~/devshare/kernel/tasks/fib6/udp-connect-53-8080.py connected PMTU:1300 Cache routes after recieving TOOBIG: [root@arch-fb-vm1 ~]# ip -6 r show table cache 2fac:face::face via 2fac::face dev eth0 metric 0 cache expires 463sec mtu 1300 pref medium Client (Send the ICMPV6_PKT_TOOBIG): ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~ scapy is used to generate the TOOBIG message. Here is the scapy script I have used: >>> p=Ether(src='da:75:4d:36:ac:32', dst='52:54:00:12:34:66', type=0x86dd)/IPv6(src='2fac::face', dst='2fac::1')/ICMPv6PacketTooBig(mtu=1300)/IPv6(src='2fac:: 1',dst='2fac:face::face', nh='UDP')/UDP(sport=8080,dport=53) >>> sendp(p, iface='qemubr0') Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception") Signed-off-by: Martin KaFai Lau Reported-by: Wei Wang Cc: Cong Wang Cc: Eric Dumazet Cc: Wei Wang Signed-off-by: David S. Miller --- include/net/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index d0aeb97aec5d..fd02e90b1289 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -959,6 +959,7 @@ int compat_ipv6_getsockopt(struct sock *sk, int level, int optname, int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, int addr_len); +int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr); int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); -- cgit v1.2.3 From e646b657f6983017783914a951039e323120dc55 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Mon, 11 Apr 2016 15:29:37 -0700 Subject: ipv6: udp: Do a route lookup and update during release_cb This patch adds a release_cb for UDPv6. It does a route lookup and updates sk->sk_dst_cache if it is needed. It picks up the left-over job from ip6_sk_update_pmtu() if the sk was owned by user during the pmtu update. It takes a rcu_read_lock to protect the __sk_dst_get() operations because another thread may do ip6_dst_store() without taking the sk lock (e.g. sendmsg). Fixes: 45e4fd26683c ("ipv6: Only create RTF_CACHE routes after encountering pmtu exception") Signed-off-by: Martin KaFai Lau Reported-by: Wei Wang Cc: Cong Wang Cc: Eric Dumazet Cc: Wei Wang Signed-off-by: David S. Miller --- include/net/ipv6.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/net/ipv6.h b/include/net/ipv6.h index fd02e90b1289..1be050ada8c5 100644 --- a/include/net/ipv6.h +++ b/include/net/ipv6.h @@ -960,6 +960,7 @@ int ip6_datagram_connect(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_connect_v6_only(struct sock *sk, struct sockaddr *addr, int addr_len); int ip6_datagram_dst_update(struct sock *sk, bool fix_sk_saddr); +void ip6_datagram_release_cb(struct sock *sk); int ipv6_recv_error(struct sock *sk, struct msghdr *msg, int len, int *addr_len); -- cgit v1.2.3 From d894ba18d4e449b3a7f6eb491f16c9e02933736e Mon Sep 17 00:00:00 2001 From: Craig Gallek Date: Tue, 12 Apr 2016 13:11:25 -0400 Subject: soreuseport: fix ordering for mixed v4/v6 sockets MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit With the SO_REUSEPORT socket option, it is possible to create sockets in the AF_INET and AF_INET6 domains which are bound to the same IPv4 address. This is only possible with SO_REUSEPORT and when not using IPV6_V6ONLY on the AF_INET6 sockets. Prior to the commits referenced below, an incoming IPv4 packet would always be routed to a socket of type AF_INET when this mixed-mode was used. After those changes, the same packet would be routed to the most recently bound socket (if this happened to be an AF_INET6 socket, it would have an IPv4 mapped IPv6 address). The change in behavior occurred because the recent SO_REUSEPORT optimizations short-circuit the socket scoring logic as soon as they find a match. They did not take into account the scoring logic that favors AF_INET sockets over AF_INET6 sockets in the event of a tie. To fix this problem, this patch changes the insertion order of AF_INET and AF_INET6 addresses in the TCP and UDP socket lists when the sockets have SO_REUSEPORT set. AF_INET sockets will be inserted at the head of the list and AF_INET6 sockets with SO_REUSEPORT set will always be inserted at the tail of the list. This will force AF_INET sockets to always be considered first. Fixes: e32ea7e74727 ("soreuseport: fast reuseport UDP socket selection") Fixes: 125e80b88687 ("soreuseport: fast reuseport TCP socket selection") Reported-by: Maciej Żenczykowski Signed-off-by: Craig Gallek Signed-off-by: Eric Dumazet Signed-off-by: David S. Miller --- include/linux/rculist_nulls.h | 39 +++++++++++++++++++++++++++++++++++++++ include/net/sock.h | 6 +++++- 2 files changed, 44 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/rculist_nulls.h b/include/linux/rculist_nulls.h index 1c33dd7da4a7..4ae95f7e8597 100644 --- a/include/linux/rculist_nulls.h +++ b/include/linux/rculist_nulls.h @@ -98,6 +98,45 @@ static inline void hlist_nulls_add_head_rcu(struct hlist_nulls_node *n, if (!is_a_nulls(first)) first->pprev = &n->next; } + +/** + * hlist_nulls_add_tail_rcu + * @n: the element to add to the hash list. + * @h: the list to add to. + * + * Description: + * Adds the specified element to the end of the specified hlist_nulls, + * while permitting racing traversals. NOTE: tail insertion requires + * list traversal. + * + * The caller must take whatever precautions are necessary + * (such as holding appropriate locks) to avoid racing + * with another list-mutation primitive, such as hlist_nulls_add_head_rcu() + * or hlist_nulls_del_rcu(), running on this same list. + * However, it is perfectly legal to run concurrently with + * the _rcu list-traversal primitives, such as + * hlist_nulls_for_each_entry_rcu(), used to prevent memory-consistency + * problems on Alpha CPUs. Regardless of the type of CPU, the + * list-traversal primitive must be guarded by rcu_read_lock(). + */ +static inline void hlist_nulls_add_tail_rcu(struct hlist_nulls_node *n, + struct hlist_nulls_head *h) +{ + struct hlist_nulls_node *i, *last = NULL; + + for (i = hlist_nulls_first_rcu(h); !is_a_nulls(i); + i = hlist_nulls_next_rcu(i)) + last = i; + + if (last) { + n->next = last->next; + n->pprev = &last->next; + rcu_assign_pointer(hlist_nulls_next_rcu(last), n); + } else { + hlist_nulls_add_head_rcu(n, h); + } +} + /** * hlist_nulls_for_each_entry_rcu - iterate over rcu list of given type * @tpos: the type * to use as a loop cursor. diff --git a/include/net/sock.h b/include/net/sock.h index 255d3e03727b..121ffc115c4f 100644 --- a/include/net/sock.h +++ b/include/net/sock.h @@ -630,7 +630,11 @@ static inline void sk_add_node_rcu(struct sock *sk, struct hlist_head *list) static inline void __sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) { - hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); + if (IS_ENABLED(CONFIG_IPV6) && sk->sk_reuseport && + sk->sk_family == AF_INET6) + hlist_nulls_add_tail_rcu(&sk->sk_nulls_node, list); + else + hlist_nulls_add_head_rcu(&sk->sk_nulls_node, list); } static inline void sk_nulls_add_node_rcu(struct sock *sk, struct hlist_nulls_head *list) -- cgit v1.2.3 From cb92148b58a49455f3a7204eba3aee09a8b7683c Mon Sep 17 00:00:00 2001 From: Hariprasad Shenai Date: Fri, 15 Apr 2016 13:00:11 -0500 Subject: PCI: Add pci_set_vpd_size() to set VPD size After 104daa71b396 ("PCI: Determine actual VPD size on first access"), the PCI core computes the valid VPD size by parsing the VPD starting at offset 0x0. We don't attempt to read past that valid size because that causes some devices to crash. However, some devices do have data past that valid size. For example, Chelsio adapters contain two VPD structures, and the driver needs both of them. Add pci_set_vpd_size(). If a driver knows it is safe to read past the end of the VPD data structure at offset 0, it can use pci_set_vpd_size() to allow access to as much data as it needs. [bhelgaas: changelog, split patches, rename to pci_set_vpd_size() and return int (not ssize_t)] Fixes: 104daa71b396 ("PCI: Determine actual VPD size on first access") Tested-by: Steve Wise Signed-off-by: Casey Leedom Signed-off-by: Hariprasad Shenai Signed-off-by: Bjorn Helgaas --- include/linux/pci.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/pci.h b/include/linux/pci.h index 004b8133417d..932ec74909c6 100644 --- a/include/linux/pci.h +++ b/include/linux/pci.h @@ -1111,6 +1111,7 @@ void pci_unlock_rescan_remove(void); /* Vital product data routines */ ssize_t pci_read_vpd(struct pci_dev *dev, loff_t pos, size_t count, void *buf); ssize_t pci_write_vpd(struct pci_dev *dev, loff_t pos, size_t count, const void *buf); +int pci_set_vpd_size(struct pci_dev *dev, size_t len); /* Helper functions for low-level code (drivers/pci/setup-[bus,res].c) */ resource_size_t pcibios_retrieve_fw_addr(struct pci_dev *dev, int idx); -- cgit v1.2.3 From 67245ff332064c01b760afa7a384ccda024bfd24 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sat, 16 Apr 2016 15:16:07 -0700 Subject: devpts: clean up interface to pty drivers This gets rid of the horrible notion of having that struct inode *ptmx_inode be the linchpin of the interface between the pty code and devpts. By de-emphasizing the ptmx inode, a lot of things actually get cleaner, and we will have a much saner way forward. In particular, this will allow us to associate with any particular devpts instance at open-time, and not be artificially tied to one particular ptmx inode. The patch itself is actually fairly straightforward, and apart from some locking and return path cleanups it's pretty mechanical: - the interfaces that devpts exposes all take "struct pts_fs_info *" instead of "struct inode *ptmx_inode" now. NOTE! The "struct pts_fs_info" thing is a completely opaque structure as far as the pty driver is concerned: it's still declared entirely internally to devpts. So the pty code can't actually access it in any way, just pass it as a "cookie" to the devpts code. - the "look up the pts fs info" is now a single clear operation, that also does the reference count increment on the pts superblock. So "devpts_add/del_ref()" is gone, and replaced by a "lookup and get ref" operation (devpts_get_ref(inode)), along with a "put ref" op (devpts_put_ref()). - the pty master "tty->driver_data" field now contains the pts_fs_info, not the ptmx inode. - because we don't care about the ptmx inode any more as some kind of base index, the ref counting can now drop the inode games - it just gets the ref on the superblock. - the pts_fs_info now has a back-pointer to the super_block. That's so that we can easily look up the information we actually need. Although quite often, the pts fs info was actually all we wanted, and not having to look it up based on some magical inode makes things more straightforward. In particular, now that "devpts_get_ref(inode)" operation should really be the *only* place we need to look up what devpts instance we're associated with, and we do it exactly once, at ptmx_open() time. The other side of this is that one ptmx node could now be associated with multiple different devpts instances - you could have a single /dev/ptmx node, and then have multiple mount namespaces with their own instances of devpts mounted on /dev/pts/. And that's all perfectly sane in a model where we just look up the pts instance at open time. This will eventually allow us to get rid of our odd single-vs-multiple pts instance model, but this patch in itself changes no semantics, only an internal binding model. Cc: Eric Biederman Cc: Peter Anvin Cc: Andy Lutomirski Cc: Al Viro Cc: Peter Hurley Cc: Serge Hallyn Cc: Willy Tarreau Cc: Aurelien Jarno Cc: Alan Cox Cc: Jann Horn Cc: Greg KH Cc: Jiri Slaby Cc: Florian Weimer Signed-off-by: Linus Torvalds --- include/linux/devpts_fs.h | 34 ++++++++++------------------------ 1 file changed, 10 insertions(+), 24 deletions(-) (limited to 'include') diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index e0ee0b3000b2..358a4db72a27 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -15,38 +15,24 @@ #include +struct pts_fs_info; + #ifdef CONFIG_UNIX98_PTYS -int devpts_new_index(struct inode *ptmx_inode); -void devpts_kill_index(struct inode *ptmx_inode, int idx); -void devpts_add_ref(struct inode *ptmx_inode); -void devpts_del_ref(struct inode *ptmx_inode); +/* Look up a pts fs info and get a ref to it */ +struct pts_fs_info *devpts_get_ref(struct inode *, struct file *); +void devpts_put_ref(struct pts_fs_info *); + +int devpts_new_index(struct pts_fs_info *); +void devpts_kill_index(struct pts_fs_info *, int); + /* mknod in devpts */ -struct inode *devpts_pty_new(struct inode *ptmx_inode, dev_t device, int index, - void *priv); +struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *); /* get private structure */ void *devpts_get_priv(struct inode *pts_inode); /* unlink */ void devpts_pty_kill(struct inode *inode); -#else - -/* Dummy stubs in the no-pty case */ -static inline int devpts_new_index(struct inode *ptmx_inode) { return -EINVAL; } -static inline void devpts_kill_index(struct inode *ptmx_inode, int idx) { } -static inline void devpts_add_ref(struct inode *ptmx_inode) { } -static inline void devpts_del_ref(struct inode *ptmx_inode) { } -static inline struct inode *devpts_pty_new(struct inode *ptmx_inode, - dev_t device, int index, void *priv) -{ - return ERR_PTR(-EINVAL); -} -static inline void *devpts_get_priv(struct inode *pts_inode) -{ - return NULL; -} -static inline void devpts_pty_kill(struct inode *inode) { } - #endif -- cgit v1.2.3 From 2309236c13feae619572324efd3e910e66ef6bd0 Mon Sep 17 00:00:00 2001 From: Konstantin Khlebnikov Date: Mon, 18 Apr 2016 14:37:10 +0300 Subject: cls_cgroup: get sk_classid only from full sockets skb->sk could point to timewait or request socket which has no sk_classid. Detected as "BUG: KASAN: slab-out-of-bounds in cls_cgroup_classify". Signed-off-by: Konstantin Khlebnikov Acked-by: Eric Dumazet Signed-off-by: David S. Miller --- include/net/cls_cgroup.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/net/cls_cgroup.h b/include/net/cls_cgroup.h index c0a92e2c286d..74c9693d4941 100644 --- a/include/net/cls_cgroup.h +++ b/include/net/cls_cgroup.h @@ -17,6 +17,7 @@ #include #include #include +#include #ifdef CONFIG_CGROUP_NET_CLASSID struct cgroup_cls_state { @@ -63,11 +64,13 @@ static inline u32 task_get_classid(const struct sk_buff *skb) * softirqs always disables bh. */ if (in_serving_softirq()) { + struct sock *sk = skb_to_full_sk(skb); + /* If there is an sock_cgroup_classid we'll use that. */ - if (!skb->sk) + if (!sk || !sk_fullsock(sk)) return 0; - classid = sock_cgroup_classid(&skb->sk->sk_cgrp_data); + classid = sock_cgroup_classid(&sk->sk_cgrp_data); } return classid; -- cgit v1.2.3 From 1d0fd42fa31d18ba0a3e0dd008c9e93e1cebe451 Mon Sep 17 00:00:00 2001 From: Wei Ni Date: Thu, 3 Mar 2016 17:33:46 +0800 Subject: thermal: consistently use int for trip temp The commit 17e8351a7739 consistently use int for temperature, however it missed a few in trip temperature and thermal_core. In current codes, the trip->temperature used "unsigned long" and zone->temperature used"int", if the temperature is negative value, it will get wrong result when compare temperature with trip temperature. This patch can fix it. Signed-off-by: Wei Ni Signed-off-by: Eduardo Valentin --- include/linux/thermal.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/thermal.h b/include/linux/thermal.h index a55d0523f75d..1b8a5a7876ce 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -352,8 +352,8 @@ struct thermal_zone_of_device_ops { struct thermal_trip { struct device_node *np; - unsigned long int temperature; - unsigned long int hysteresis; + int temperature; + int hysteresis; enum thermal_trip_type type; }; -- cgit v1.2.3 From fba7cd681b6155e2d93e7862fcd6f970336b83c3 Mon Sep 17 00:00:00 2001 From: Romain Perier Date: Thu, 14 Apr 2016 15:36:03 +0200 Subject: asm-generic/futex: Re-enable preemption in futex_atomic_cmpxchg_inatomic() The recent decoupling of pagefault disable and preempt disable added an explicit preempt_disable/enable() pair to the futex_atomic_cmpxchg_inatomic() implementation in asm-generic/futex.h. But it forgot to add preempt_enable() calls to the error handling code pathes, which results in a preemption count imbalance. This is observable on boot when the test for atomic_cmpxchg() is calling futex_atomic_cmpxchg_inatomic() on a NULL pointer. Add the missing preempt_enable() calls to the error handling code pathes. [ tglx: Massaged changelog ] Fixes: d9b9ff8c1889 ("sched/preempt, futex: Disable preemption in UP futex_atomic_cmpxchg_inatomic() explicitly") Signed-off-by: Romain Perier Cc: linux-arch@vger.kernel.org Cc: Thomas Petazzoni Cc: Arnd Bergmann Cc: Peter Zijlstra Cc: stable@vger.kernel.org Link: http://lkml.kernel.org/r/1460640963-690-1-git-send-email-romain.perier@free-electrons.com Signed-off-by: Thomas Gleixner --- include/asm-generic/futex.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/asm-generic/futex.h b/include/asm-generic/futex.h index e56272c919b5..bf2d34c9d804 100644 --- a/include/asm-generic/futex.h +++ b/include/asm-generic/futex.h @@ -108,11 +108,15 @@ futex_atomic_cmpxchg_inatomic(u32 *uval, u32 __user *uaddr, u32 val; preempt_disable(); - if (unlikely(get_user(val, uaddr) != 0)) + if (unlikely(get_user(val, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } - if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) + if (val == oldval && unlikely(put_user(newval, uaddr) != 0)) { + preempt_enable(); return -EFAULT; + } *uval = val; preempt_enable(); -- cgit v1.2.3 From 3194ed497939c6448005542e3ca4fa2386968fa0 Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Apr 2016 17:49:11 +0200 Subject: ALSA: hda - Fix possible race on regmap bypass flip HD-audio driver uses regmap cache bypass feature for reading a raw value without the cache. But this is racy since both the cached and the uncached reads may occur concurrently. The former is done via the normal control API access while the latter comes from the proc file read. Even though the regmap itself has the protection against the concurrent accesses, the flag set/reset is done without the protection, so it may lead to inconsistent state of bypass flag that doesn't match with the current read and occasionally result in a kernel WARNING like: WARNING: CPU: 3 PID: 2731 at drivers/base/regmap/regcache.c:499 regcache_cache_only+0x78/0x93 One way to work around such a problem is to wrap with a mutex. But in this case, the solution is simpler: for the uncached read, we just skip the regmap and directly calls its accessor. The verb execution there is protected by itself, so basically it's safe to call individually. Bugzilla: https://bugzilla.kernel.org/show_bug.cgi?id=116171 Signed-off-by: Takashi Iwai --- include/sound/hda_regmap.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/sound/hda_regmap.h b/include/sound/hda_regmap.h index 2767c55a641e..ca64f0f50b45 100644 --- a/include/sound/hda_regmap.h +++ b/include/sound/hda_regmap.h @@ -17,6 +17,8 @@ int snd_hdac_regmap_add_vendor_verb(struct hdac_device *codec, unsigned int verb); int snd_hdac_regmap_read_raw(struct hdac_device *codec, unsigned int reg, unsigned int *val); +int snd_hdac_regmap_read_raw_uncached(struct hdac_device *codec, + unsigned int reg, unsigned int *val); int snd_hdac_regmap_write_raw(struct hdac_device *codec, unsigned int reg, unsigned int val); int snd_hdac_regmap_update_raw(struct hdac_device *codec, unsigned int reg, -- cgit v1.2.3 From cfea5a688eb37bcd1081255df9f9f777f4e61999 Mon Sep 17 00:00:00 2001 From: Martin KaFai Lau Date: Tue, 19 Apr 2016 22:39:29 -0700 Subject: tcp: Merge tx_flags and tskey in tcp_shifted_skb After receiving sacks, tcp_shifted_skb() will collapse skbs if possible. tx_flags and tskey also have to be merged. This patch reuses the tcp_skb_collapse_tstamp() to handle them. BPF Output Before: ~~~~~ BPF Output After: ~~~~~ <...>-2024 [007] d.s. 88.644374: : ee_data:14599 Packetdrill Script: ~~~~~ +0 `sysctl -q -w net.ipv4.tcp_min_tso_segs=10` +0 `sysctl -q -w net.ipv4.tcp_no_metrics_save=1` +0 socket(..., SOCK_STREAM, IPPROTO_TCP) = 3 +0 setsockopt(3, SOL_SOCKET, SO_REUSEADDR, [1], 4) = 0 +0 bind(3, ..., ...) = 0 +0 listen(3, 1) = 0 0.100 < S 0:0(0) win 32792 0.100 > S. 0:0(0) ack 1 0.200 < . 1:1(0) ack 1 win 257 0.200 accept(3, ..., ...) = 4 +0 setsockopt(4, SOL_TCP, TCP_NODELAY, [1], 4) = 0 0.200 write(4, ..., 1460) = 1460 +0 setsockopt(4, SOL_SOCKET, 37, [2688], 4) = 0 0.200 write(4, ..., 13140) = 13140 0.200 > P. 1:1461(1460) ack 1 0.200 > . 1461:8761(7300) ack 1 0.200 > P. 8761:14601(5840) ack 1 0.300 < . 1:1(0) ack 1 win 257 0.300 > P. 1:1461(1460) ack 1 0.400 < . 1:1(0) ack 14601 win 257 0.400 close(4) = 0 0.400 > F. 14601:14601(0) ack 1 0.500 < F. 1:1(0) ack 14602 win 257 0.500 > . 14602:14602(0) ack 2 Signed-off-by: Martin KaFai Lau Cc: Eric Dumazet Cc: Neal Cardwell Cc: Soheil Hassas Yeganeh Cc: Willem de Bruijn Cc: Yuchung Cheng Acked-by: Soheil Hassas Yeganeh Tested-by: Soheil Hassas Yeganeh Signed-off-by: David S. Miller --- include/net/tcp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/tcp.h b/include/net/tcp.h index b91370f61be6..6db10228113f 100644 --- a/include/net/tcp.h +++ b/include/net/tcp.h @@ -552,6 +552,8 @@ void tcp_send_ack(struct sock *sk); void tcp_send_delayed_ack(struct sock *sk); void tcp_send_loss_probe(struct sock *sk); bool tcp_schedule_loss_probe(struct sock *sk); +void tcp_skb_collapse_tstamp(struct sk_buff *skb, + const struct sk_buff *next_skb); /* tcp_input.c */ void tcp_resume_early_retransmit(struct sock *sk); -- cgit v1.2.3 From 4bfd2e6e53435a214888fd35e230157a38ffc6a0 Mon Sep 17 00:00:00 2001 From: Daniel Jurgens Date: Wed, 20 Apr 2016 16:01:16 +0300 Subject: net/mlx4_core: Avoid repeated calls to pci enable/disable Maintain the PCI status and provide wrappers for enabling and disabling the PCI device. Performing the actions more than once without doing its opposite results in warning logs. This occurred when EEH hotplugged the device causing a warning for disabling an already disabled device. Fixes: 2ba5fbd62b25 ('net/mlx4_core: Handle AER flow properly') Signed-off-by: Daniel Jurgens Signed-off-by: Yishai Hadas Signed-off-by: Or Gerlitz Signed-off-by: David S. Miller --- include/linux/mlx4/device.h | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'include') diff --git a/include/linux/mlx4/device.h b/include/linux/mlx4/device.h index 8541a913f6a3..d1f904c8b2cb 100644 --- a/include/linux/mlx4/device.h +++ b/include/linux/mlx4/device.h @@ -828,6 +828,11 @@ struct mlx4_vf_dev { u8 n_ports; }; +enum mlx4_pci_status { + MLX4_PCI_STATUS_DISABLED, + MLX4_PCI_STATUS_ENABLED, +}; + struct mlx4_dev_persistent { struct pci_dev *pdev; struct mlx4_dev *dev; @@ -841,6 +846,8 @@ struct mlx4_dev_persistent { u8 state; struct mutex interface_state_mutex; /* protect SW state */ u8 interface_state; + struct mutex pci_status_mutex; /* sync pci state */ + enum mlx4_pci_status pci_status; }; struct mlx4_dev { -- cgit v1.2.3 From 221004c66a58949a0f25c937a6789c0839feb530 Mon Sep 17 00:00:00 2001 From: Huacai Chen Date: Tue, 19 Apr 2016 19:19:11 +0800 Subject: drm: Loongson-3 doesn't fully support wc memory Signed-off-by: Huacai Chen Cc: stable@vger.kernel.org Reviewed-by: Alex Deucher Signed-off-by: Dave Airlie --- include/drm/drm_cache.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/drm/drm_cache.h b/include/drm/drm_cache.h index 461a0558bca4..cebecff536a3 100644 --- a/include/drm/drm_cache.h +++ b/include/drm/drm_cache.h @@ -39,6 +39,8 @@ static inline bool drm_arch_can_wc_memory(void) { #if defined(CONFIG_PPC) && !defined(CONFIG_NOT_COHERENT_CACHE) return false; +#elif defined(CONFIG_MIPS) && defined(CONFIG_CPU_LOONGSON3) + return false; #else return true; #endif -- cgit v1.2.3 From 75dd602a5198a6e5f75534db52b6e6fbaabb33d1 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 30 Mar 2016 11:36:59 +0200 Subject: lockdep: Fix lock_chain::base size lock_chain::base is used to store an index into the chain_hlocks[] array, however that array contains more elements than can be indexed using the u16. Change the lock_chain structure to use a bitfield to encode the data it needs and add BUILD_BUG_ON() assertions to check the fields are wide enough. Also, for DEBUG_LOCKDEP, assert that we don't run out of elements of that array; as that would wreck the collision detectoring. Signed-off-by: Peter Zijlstra (Intel) Cc: Alfredo Alvarez Fernandez Cc: Andrew Morton Cc: Linus Torvalds Cc: Paul E. McKenney Cc: Peter Zijlstra Cc: Sedat Dilek Cc: Theodore Ts'o Cc: Thomas Gleixner Link: http://lkml.kernel.org/r/20160330093659.GS3408@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- include/linux/lockdep.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index d026b190c530..d10ef06971b5 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -196,9 +196,11 @@ struct lock_list { * We record lock dependency chains, so that we can cache them: */ struct lock_chain { - u8 irq_context; - u8 depth; - u16 base; + /* see BUILD_BUG_ON()s in lookup_chain_cache() */ + unsigned int irq_context : 2, + depth : 6, + base : 24; + /* 4 byte hole */ struct hlist_node entry; u64 chain_key; }; -- cgit v1.2.3 From 987aedb5d6f6e10c5203c6d0aab9a60ec22c7e93 Mon Sep 17 00:00:00 2001 From: Andre Przywara Date: Mon, 11 Apr 2016 10:17:46 +0100 Subject: generic syscalls: wire up preadv2 and pwritev2 syscalls These new syscalls are implemented as generic code, so enable them for architectures like arm64 which use the generic syscall table. Signed-off-by: Andre Przywara Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/unistd.h | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 2622b33fb2ec..6e0f5f01734c 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -717,9 +717,13 @@ __SYSCALL(__NR_membarrier, sys_membarrier) __SYSCALL(__NR_mlock2, sys_mlock2) #define __NR_copy_file_range 285 __SYSCALL(__NR_copy_file_range, sys_copy_file_range) +#define __NR_preadv2 286 +__SYSCALL(__NR_preadv2, sys_preadv2) +#define __NR_pwritev2 287 +__SYSCALL(__NR_pwritev2, sys_pwritev2) #undef __NR_syscalls -#define __NR_syscalls 286 +#define __NR_syscalls 288 /* * All syscalls below here should go away really, -- cgit v1.2.3 From 7ceb2afbd6aee4643056b47156baad6841db8e78 Mon Sep 17 00:00:00 2001 From: Elad Raz Date: Thu, 21 Apr 2016 12:52:43 +0200 Subject: switchdev: Adding complete operation to deferred switchdev ops When using switchdev deferred operation (SWITCHDEV_F_DEFER), the operation is executed in different context and the application doesn't have any way to get the operation real status. Adding a completion callback fixes that. This patch adds fields to switchdev_attr and switchdev_obj "complete_priv" field which is used by the "complete" callback. Application can set a complete function which will be called once the operation executed. Signed-off-by: Elad Raz Signed-off-by: Jiri Pirko Reviewed-by: Ido Schimmel Acked-by: Nikolay Aleksandrov Signed-off-by: David S. Miller --- include/net/switchdev.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/net/switchdev.h b/include/net/switchdev.h index d451122e8404..51d77b2ce2b2 100644 --- a/include/net/switchdev.h +++ b/include/net/switchdev.h @@ -54,6 +54,8 @@ struct switchdev_attr { struct net_device *orig_dev; enum switchdev_attr_id id; u32 flags; + void *complete_priv; + void (*complete)(struct net_device *dev, int err, void *priv); union { struct netdev_phys_item_id ppid; /* PORT_PARENT_ID */ u8 stp_state; /* PORT_STP_STATE */ @@ -75,6 +77,8 @@ struct switchdev_obj { struct net_device *orig_dev; enum switchdev_obj_id id; u32 flags; + void *complete_priv; + void (*complete)(struct net_device *dev, int err, void *priv); }; /* SWITCHDEV_OBJ_ID_PORT_VLAN */ -- cgit v1.2.3 From 748164802c1bd2c52937d20782b07d8c68dd9a4f Mon Sep 17 00:00:00 2001 From: Sabrina Dubroca Date: Fri, 22 Apr 2016 11:28:08 +0200 Subject: macsec: add missing macsec prefix in uapi I accidentally forgot some MACSEC_ prefixes in if_macsec.h. Fixes: dece8d2b78d1 ("uapi: add MACsec bits") Signed-off-by: Sabrina Dubroca Acked-by: Hannes Frederic Sowa Signed-off-by: David S. Miller --- include/uapi/linux/if_macsec.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/if_macsec.h b/include/uapi/linux/if_macsec.h index 26b0d1e3e3e7..4c58d9917aa4 100644 --- a/include/uapi/linux/if_macsec.h +++ b/include/uapi/linux/if_macsec.h @@ -19,8 +19,8 @@ #define MACSEC_MAX_KEY_LEN 128 -#define DEFAULT_CIPHER_ID 0x0080020001000001ULL -#define DEFAULT_CIPHER_ALT 0x0080C20001000001ULL +#define MACSEC_DEFAULT_CIPHER_ID 0x0080020001000001ULL +#define MACSEC_DEFAULT_CIPHER_ALT 0x0080C20001000001ULL #define MACSEC_MIN_ICV_LEN 8 #define MACSEC_MAX_ICV_LEN 32 -- cgit v1.2.3 From 046339eaab26804f52f6604877f5674f70815b26 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 22 Apr 2016 00:33:03 +0300 Subject: net/mlx5e: Device's mtu field is u16 and not int For set/query MTU port firmware commands the MTU field is 16 bits, here I changed all the "int mtu" parameters of the functions wrapping those firmware commands to be u16. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/port.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/port.h b/include/linux/mlx5/port.h index a1d145abd4eb..b30250ab7604 100644 --- a/include/linux/mlx5/port.h +++ b/include/linux/mlx5/port.h @@ -54,9 +54,9 @@ int mlx5_set_port_admin_status(struct mlx5_core_dev *dev, int mlx5_query_port_admin_status(struct mlx5_core_dev *dev, enum mlx5_port_status *status); -int mlx5_set_port_mtu(struct mlx5_core_dev *dev, int mtu, u8 port); -void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, int *max_mtu, u8 port); -void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, int *oper_mtu, +int mlx5_set_port_mtu(struct mlx5_core_dev *dev, u16 mtu, u8 port); +void mlx5_query_port_max_mtu(struct mlx5_core_dev *dev, u16 *max_mtu, u8 port); +void mlx5_query_port_oper_mtu(struct mlx5_core_dev *dev, u16 *oper_mtu, u8 port); int mlx5_query_port_vl_hw_cap(struct mlx5_core_dev *dev, -- cgit v1.2.3 From cd255efff9baadd654d6160e52d17ae7c568c9d3 Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Fri, 22 Apr 2016 00:33:05 +0300 Subject: net/mlx5e: Use vport MTU rather than physical port MTU Set and report vport MTU rather than physical MTU, Driver will set both vport and physical port mtu and will rely on the query of vport mtu. SRIOV VFs have to report their MTU to their vport manager (PF), and this will allow them to work with any MTU they need without failing the request. Also for some cases where the PF is not a port owner, PF can work with MTU less than the physical port mtu if set physical port mtu didn't take effect. Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/vport.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/vport.h b/include/linux/mlx5/vport.h index bd93e6323603..301da4a5e6bf 100644 --- a/include/linux/mlx5/vport.h +++ b/include/linux/mlx5/vport.h @@ -45,6 +45,8 @@ int mlx5_query_nic_vport_mac_address(struct mlx5_core_dev *mdev, u16 vport, u8 *addr); int mlx5_modify_nic_vport_mac_address(struct mlx5_core_dev *dev, u16 vport, u8 *addr); +int mlx5_query_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 *mtu); +int mlx5_modify_nic_vport_mtu(struct mlx5_core_dev *mdev, u16 mtu); int mlx5_query_nic_vport_system_image_guid(struct mlx5_core_dev *mdev, u64 *system_image_guid); int mlx5_query_nic_vport_node_guid(struct mlx5_core_dev *mdev, u64 *node_guid); -- cgit v1.2.3 From 5fc7197d3a256d9c5de3134870304b24892a4908 Mon Sep 17 00:00:00 2001 From: Majd Dibbiny Date: Fri, 22 Apr 2016 00:33:07 +0300 Subject: net/mlx5: Add pci shutdown callback This patch introduces kexec support for mlx5. When switching kernels, kexec() calls shutdown, which unloads the driver and cleans its resources. In addition, remove unregister netdev from shutdown flow. This will allow a clean shutdown, even if some netdev clients did not release their reference from this netdev. Releasing The HW resources only is enough as the kernel is shutting down Signed-off-by: Majd Dibbiny Signed-off-by: Tariq Toukan Signed-off-by: Haggai Abramovsky Signed-off-by: Saeed Mahameed Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index dcd5ac8d3b14..369c837d40f5 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -519,8 +519,9 @@ enum mlx5_device_state { }; enum mlx5_interface_state { - MLX5_INTERFACE_STATE_DOWN, - MLX5_INTERFACE_STATE_UP, + MLX5_INTERFACE_STATE_DOWN = BIT(0), + MLX5_INTERFACE_STATE_UP = BIT(1), + MLX5_INTERFACE_STATE_SHUTDOWN = BIT(2), }; enum mlx5_pci_status { @@ -544,7 +545,7 @@ struct mlx5_core_dev { enum mlx5_device_state state; /* sync interface state */ struct mutex intf_state_mutex; - enum mlx5_interface_state interface_state; + unsigned long intf_state; void (*event) (struct mlx5_core_dev *dev, enum mlx5_dev_event event, unsigned long param); -- cgit v1.2.3 From 3020ca711871fdaf0c15c8bab677a6bc302e28fe Mon Sep 17 00:00:00 2001 From: Hans Verkuil Date: Fri, 22 Apr 2016 04:00:50 -0300 Subject: [media] v4l2-dv-timings.h: fix polarity for 4k formats The VSync polarity was negative instead of positive for the 4k CEA formats. I probably copy-and-pasted these from the DMT 4k format, which does have a negative VSync polarity. Signed-off-by: Hans Verkuil Reported-by: Martin Bugge Cc: # for v4.1 and up Signed-off-by: Mauro Carvalho Chehab --- include/uapi/linux/v4l2-dv-timings.h | 30 ++++++++++++++++++++---------- 1 file changed, 20 insertions(+), 10 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/v4l2-dv-timings.h b/include/uapi/linux/v4l2-dv-timings.h index c039f1d68a09..086168e18ca8 100644 --- a/include/uapi/linux/v4l2-dv-timings.h +++ b/include/uapi/linux/v4l2-dv-timings.h @@ -183,7 +183,8 @@ #define V4L2_DV_BT_CEA_3840X2160P24 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 1276, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -191,14 +192,16 @@ #define V4L2_DV_BT_CEA_3840X2160P25 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P30 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -206,14 +209,16 @@ #define V4L2_DV_BT_CEA_3840X2160P50 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 1056, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_3840X2160P60 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(3840, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(3840, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 176, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -221,7 +226,8 @@ #define V4L2_DV_BT_CEA_4096X2160P24 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 1020, 88, 296, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -229,14 +235,16 @@ #define V4L2_DV_BT_CEA_4096X2160P25 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P30 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 297000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ @@ -244,14 +252,16 @@ #define V4L2_DV_BT_CEA_4096X2160P50 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 968, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, V4L2_DV_FL_IS_CE_VIDEO) \ } #define V4L2_DV_BT_CEA_4096X2160P60 { \ .type = V4L2_DV_BT_656_1120, \ - V4L2_INIT_BT_TIMINGS(4096, 2160, 0, V4L2_DV_HSYNC_POS_POL, \ + V4L2_INIT_BT_TIMINGS(4096, 2160, 0, \ + V4L2_DV_HSYNC_POS_POL | V4L2_DV_VSYNC_POS_POL, \ 594000000, 88, 88, 128, 8, 10, 72, 0, 0, 0, \ V4L2_DV_BT_STD_CEA861, \ V4L2_DV_FL_CAN_REDUCE_FPS | V4L2_DV_FL_IS_CE_VIDEO) \ -- cgit v1.2.3 From b93876845c5e30a92964eeb088d9d2e024118022 Mon Sep 17 00:00:00 2001 From: Ricardo Ribalda Date: Mon, 25 Apr 2016 06:04:45 -0300 Subject: [media] media: vb2: Fix regression on poll() for RW mode When using a device is read/write mode, vb2 does not handle properly the first select/poll operation. The reason for this, is that when this code has been refactored, some of the operations have changed their order, and now fileio emulator is not started. The reintroduced check to the core is enabled by a quirk flag, that avoids this check by other subsystems like DVB. Fixes: 49d8ab9feaf2 ("media] media: videobuf2: Separate vb2_poll()") Reported-by: Dimitrios Katsaros Cc: Junghak Sung Cc: # for v4.5 and up Signed-off-by: Ricardo Ribalda Delgado Signed-off-by: Hans Verkuil Signed-off-by: Mauro Carvalho Chehab --- include/media/videobuf2-core.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/media/videobuf2-core.h b/include/media/videobuf2-core.h index 5342ff4d748f..88e3ab496e8f 100644 --- a/include/media/videobuf2-core.h +++ b/include/media/videobuf2-core.h @@ -404,6 +404,9 @@ struct vb2_buf_ops { * @fileio_read_once: report EOF after reading the first buffer * @fileio_write_immediately: queue buffer after each write() call * @allow_zero_bytesused: allow bytesused == 0 to be passed to the driver + * @quirk_poll_must_check_waiting_for_buffers: Return POLLERR at poll when QBUF + * has not been called. This is a vb1 idiom that has been adopted + * also by vb2. * @lock: pointer to a mutex that protects the vb2_queue struct. The * driver can set this to a mutex to let the v4l2 core serialize * the queuing ioctls. If the driver wants to handle locking @@ -467,6 +470,7 @@ struct vb2_queue { unsigned fileio_read_once:1; unsigned fileio_write_immediately:1; unsigned allow_zero_bytesused:1; + unsigned quirk_poll_must_check_waiting_for_buffers:1; struct mutex *lock; void *owner; -- cgit v1.2.3 From 6c1ea260f89709e0021d2c59f8fd2a104b5b1123 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Mon, 11 Apr 2016 19:34:49 +0200 Subject: libceph: make authorizer destruction independent of ceph_auth_client Starting the kernel client with cephx disabled and then enabling cephx and restarting userspace daemons can result in a crash: [262671.478162] BUG: unable to handle kernel paging request at ffffebe000000000 [262671.531460] IP: [] kfree+0x5a/0x130 [262671.584334] PGD 0 [262671.635847] Oops: 0000 [#1] SMP [262672.055841] CPU: 22 PID: 2961272 Comm: kworker/22:2 Not tainted 4.2.0-34-generic #39~14.04.1-Ubuntu [262672.162338] Hardware name: Dell Inc. PowerEdge R720/068CDY, BIOS 2.4.3 07/09/2014 [262672.268937] Workqueue: ceph-msgr con_work [libceph] [262672.322290] task: ffff88081c2d0dc0 ti: ffff880149ae8000 task.ti: ffff880149ae8000 [262672.428330] RIP: 0010:[] [] kfree+0x5a/0x130 [262672.535880] RSP: 0018:ffff880149aeba58 EFLAGS: 00010286 [262672.589486] RAX: 000001e000000000 RBX: 0000000000000012 RCX: ffff8807e7461018 [262672.695980] RDX: 000077ff80000000 RSI: ffff88081af2be04 RDI: 0000000000000012 [262672.803668] RBP: ffff880149aeba78 R08: 0000000000000000 R09: 0000000000000000 [262672.912299] R10: ffffebe000000000 R11: ffff880819a60e78 R12: ffff8800aec8df40 [262673.021769] R13: ffffffffc035f70f R14: ffff8807e5b138e0 R15: ffff880da9785840 [262673.131722] FS: 0000000000000000(0000) GS:ffff88081fac0000(0000) knlGS:0000000000000000 [262673.245377] CS: 0010 DS: 0000 ES: 0000 CR0: 0000000080050033 [262673.303281] CR2: ffffebe000000000 CR3: 0000000001c0d000 CR4: 00000000001406e0 [262673.417556] Stack: [262673.472943] ffff880149aeba88 ffff88081af2be04 ffff8800aec8df40 ffff88081af2be04 [262673.583767] ffff880149aeba98 ffffffffc035f70f ffff880149aebac8 ffff8800aec8df00 [262673.694546] ffff880149aebac8 ffffffffc035c89e ffff8807e5b138e0 ffff8805b047f800 [262673.805230] Call Trace: [262673.859116] [] ceph_x_destroy_authorizer+0x1f/0x50 [libceph] [262673.968705] [] ceph_auth_destroy_authorizer+0x3e/0x60 [libceph] [262674.078852] [] put_osd+0x45/0x80 [libceph] [262674.134249] [] remove_osd+0xae/0x140 [libceph] [262674.189124] [] __reset_osd+0x103/0x150 [libceph] [262674.243749] [] kick_requests+0x223/0x460 [libceph] [262674.297485] [] ceph_osdc_handle_map+0x282/0x5e0 [libceph] [262674.350813] [] dispatch+0x4e/0x720 [libceph] [262674.403312] [] try_read+0x3d1/0x1090 [libceph] [262674.454712] [] ? dequeue_entity+0x152/0x690 [262674.505096] [] con_work+0xcb/0x1300 [libceph] [262674.555104] [] process_one_work+0x14e/0x3d0 [262674.604072] [] worker_thread+0x11a/0x470 [262674.652187] [] ? rescuer_thread+0x310/0x310 [262674.699022] [] kthread+0xd2/0xf0 [262674.744494] [] ? kthread_create_on_node+0x1c0/0x1c0 [262674.789543] [] ret_from_fork+0x3f/0x70 [262674.834094] [] ? kthread_create_on_node+0x1c0/0x1c0 What happens is the following: (1) new MON session is established (2) old "none" ac is destroyed (3) new "cephx" ac is constructed ... (4) old OSD session (w/ "none" authorizer) is put ceph_auth_destroy_authorizer(ac, osd->o_auth.authorizer) osd->o_auth.authorizer in the "none" case is just a bare pointer into ac, which contains a single static copy for all services. By the time we get to (4), "none" ac, freed in (2), is long gone. On top of that, a new vtable installed in (3) points us at ceph_x_destroy_authorizer(), so we end up trying to destroy a "none" authorizer with a "cephx" destructor operating on invalid memory! To fix this, decouple authorizer destruction from ac and do away with a single static "none" authorizer by making a copy for each OSD or MDS session. Authorizers themselves are independent of ac and so there is no reason for destroy_authorizer() to be an ac op. Make it an op on the authorizer itself by turning ceph_authorizer into a real struct. Fixes: http://tracker.ceph.com/issues/15447 Reported-by: Alan Zhang Signed-off-by: Ilya Dryomov Reviewed-by: Sage Weil --- include/linux/ceph/auth.h | 10 +++++----- include/linux/ceph/osd_client.h | 1 - 2 files changed, 5 insertions(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/ceph/auth.h b/include/linux/ceph/auth.h index 260d78b587c4..1563265d2097 100644 --- a/include/linux/ceph/auth.h +++ b/include/linux/ceph/auth.h @@ -12,9 +12,12 @@ */ struct ceph_auth_client; -struct ceph_authorizer; struct ceph_msg; +struct ceph_authorizer { + void (*destroy)(struct ceph_authorizer *); +}; + struct ceph_auth_handshake { struct ceph_authorizer *authorizer; void *authorizer_buf; @@ -62,8 +65,6 @@ struct ceph_auth_client_ops { struct ceph_auth_handshake *auth); int (*verify_authorizer_reply)(struct ceph_auth_client *ac, struct ceph_authorizer *a, size_t len); - void (*destroy_authorizer)(struct ceph_auth_client *ac, - struct ceph_authorizer *a); void (*invalidate_authorizer)(struct ceph_auth_client *ac, int peer_type); @@ -112,8 +113,7 @@ extern int ceph_auth_is_authenticated(struct ceph_auth_client *ac); extern int ceph_auth_create_authorizer(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *auth); -extern void ceph_auth_destroy_authorizer(struct ceph_auth_client *ac, - struct ceph_authorizer *a); +void ceph_auth_destroy_authorizer(struct ceph_authorizer *a); extern int ceph_auth_update_authorizer(struct ceph_auth_client *ac, int peer_type, struct ceph_auth_handshake *a); diff --git a/include/linux/ceph/osd_client.h b/include/linux/ceph/osd_client.h index 4343df806710..cbf460927c42 100644 --- a/include/linux/ceph/osd_client.h +++ b/include/linux/ceph/osd_client.h @@ -16,7 +16,6 @@ struct ceph_msg; struct ceph_snap_context; struct ceph_osd_request; struct ceph_osd_client; -struct ceph_authorizer; /* * completion callback for async writepages -- cgit v1.2.3 From 841645b5f2dfceac69b78fcd0c9050868d41ea61 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Mon, 25 Apr 2016 15:33:55 -0400 Subject: ipv6: Revert optional address flusing on ifdown. This reverts the following three commits: 70af921db6f8835f4b11c65731116560adb00c14 799977d9aafbf0ca0b9c39b04cbfb16db71302c9 f1705ec197e705b79ea40fe7a2cc5acfa1d3bfac The feature was ill conceived, has terrible semantics, and has added nothing but regressions to the already fragile ipv6 stack. Fixes: f1705ec197e7 ("net: ipv6: Make address flushing on ifdown optional") Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 - include/uapi/linux/ipv6.h | 1 - 2 files changed, 2 deletions(-) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 7edc14fb66b6..4b2267e1b7c3 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -62,7 +62,6 @@ struct ipv6_devconf { struct in6_addr secret; } stable_secret; __s32 use_oif_addrs_only; - __s32 keep_addr_on_down; void *sysctl; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index 395876060f50..ec117b65d5a5 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -176,7 +176,6 @@ enum { DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, DEVCONF_DROP_UNSOLICITED_NA, - DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_MAX }; -- cgit v1.2.3 From 5cf1cacb49aee39c3e02ae87068fc3c6430659b0 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Thu, 21 Apr 2016 19:06:48 -0400 Subject: cgroup, cpuset: replace cpuset_post_attach_flush() with cgroup_subsys->post_attach callback Since e93ad19d0564 ("cpuset: make mm migration asynchronous"), cpuset kicks off asynchronous NUMA node migration if necessary during task migration and flushes it from cpuset_post_attach_flush() which is called at the end of __cgroup_procs_write(). This is to avoid performing migration with cgroup_threadgroup_rwsem write-locked which can lead to deadlock through dependency on kworker creation. memcg has a similar issue with charge moving, so let's convert it to an official callback rather than the current one-off cpuset specific function. This patch adds cgroup_subsys->post_attach callback and makes cpuset register cpuset_post_attach_flush() as its ->post_attach. The conversion is mostly one-to-one except that the new callback is called under cgroup_mutex. This is to guarantee that no other migration operations are started before ->post_attach callbacks are finished. cgroup_mutex is one of the outermost mutex in the system and has never been and shouldn't be a problem. We can add specialized synchronization around __cgroup_procs_write() but I don't think there's any noticeable benefit. Signed-off-by: Tejun Heo Cc: Li Zefan Cc: Johannes Weiner Cc: Michal Hocko Cc: # 4.4+ prerequisite for the next patch --- include/linux/cgroup-defs.h | 1 + include/linux/cpuset.h | 6 ------ 2 files changed, 1 insertion(+), 6 deletions(-) (limited to 'include') diff --git a/include/linux/cgroup-defs.h b/include/linux/cgroup-defs.h index 3e39ae5bc799..5b17de62c962 100644 --- a/include/linux/cgroup-defs.h +++ b/include/linux/cgroup-defs.h @@ -444,6 +444,7 @@ struct cgroup_subsys { int (*can_attach)(struct cgroup_taskset *tset); void (*cancel_attach)(struct cgroup_taskset *tset); void (*attach)(struct cgroup_taskset *tset); + void (*post_attach)(void); int (*can_fork)(struct task_struct *task); void (*cancel_fork)(struct task_struct *task); void (*fork)(struct task_struct *task); diff --git a/include/linux/cpuset.h b/include/linux/cpuset.h index fea160ee5803..85a868ccb493 100644 --- a/include/linux/cpuset.h +++ b/include/linux/cpuset.h @@ -137,8 +137,6 @@ static inline void set_mems_allowed(nodemask_t nodemask) task_unlock(current); } -extern void cpuset_post_attach_flush(void); - #else /* !CONFIG_CPUSETS */ static inline bool cpusets_enabled(void) { return false; } @@ -245,10 +243,6 @@ static inline bool read_mems_allowed_retry(unsigned int seq) return false; } -static inline void cpuset_post_attach_flush(void) -{ -} - #endif /* !CONFIG_CPUSETS */ #endif /* _LINUX_CPUSET_H */ -- cgit v1.2.3 From bb03ed216370cb021f377f923471e56d1de3ff5d Mon Sep 17 00:00:00 2001 From: Takashi Iwai Date: Thu, 21 Apr 2016 16:39:17 +0200 Subject: ALSA: hda - Update BCLK also at hotplug for i915 HSW/BDW The recent bug report suggests that BCLK setup for i915 HSW/BDW needs to be updated at each HDMI hotplug, not only at initialization and resume. That is, we need to update HSW_EM4 and HSW_EM5 registers at ELD notification, too. Otherwise the HDMI audio may be out of sync and played in a wrong pitch. However, the HDA codec driver has no access to the controller registers, and currently the code managing these registers is in hda_intel.c, i.e. local to the controller driver. For allowing the explicit BCLK update from the codec driver, as in this patch, the former haswell_set_bclk() in hda_intel.c is moved to hdac_i915.c and exposed as snd_hdac_i915_set_bclk(). This is called from both the HDA controller driver and intel_pin_eld_notify() in HDMI codec driver. Along with this change, snd_hdac_get_display_clk() gets dropped as it's no longer used. Bugzilla: https://bugs.freedesktop.org/show_bug.cgi?id=91410 Cc: # v4.5+ Signed-off-by: Takashi Iwai --- include/sound/hda_i915.h | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/sound/hda_i915.h b/include/sound/hda_i915.h index fa341fcb5829..f5842bcd9c94 100644 --- a/include/sound/hda_i915.h +++ b/include/sound/hda_i915.h @@ -9,7 +9,7 @@ #ifdef CONFIG_SND_HDA_I915 int snd_hdac_set_codec_wakeup(struct hdac_bus *bus, bool enable); int snd_hdac_display_power(struct hdac_bus *bus, bool enable); -int snd_hdac_get_display_clk(struct hdac_bus *bus); +void snd_hdac_i915_set_bclk(struct hdac_bus *bus); int snd_hdac_sync_audio_rate(struct hdac_bus *bus, hda_nid_t nid, int rate); int snd_hdac_acomp_get_eld(struct hdac_bus *bus, hda_nid_t nid, bool *audio_enabled, char *buffer, int max_bytes); @@ -25,9 +25,8 @@ static inline int snd_hdac_display_power(struct hdac_bus *bus, bool enable) { return 0; } -static inline int snd_hdac_get_display_clk(struct hdac_bus *bus) +static inline void snd_hdac_i915_set_bclk(struct hdac_bus *bus) { - return 0; } static inline int snd_hdac_sync_audio_rate(struct hdac_bus *bus, hda_nid_t nid, int rate) -- cgit v1.2.3 From 6a923934c33c750a595868af6bef5f1a1fa90054 Mon Sep 17 00:00:00 2001 From: "David S. Miller" Date: Tue, 26 Apr 2016 11:47:41 -0400 Subject: Revert "ipv6: Revert optional address flusing on ifdown." This reverts commit 841645b5f2dfceac69b78fcd0c9050868d41ea61. Ok, this puts the feature back. I've decided to apply David A.'s bug fix and run with that rather than make everyone wait another whole release for this feature. Signed-off-by: David S. Miller --- include/linux/ipv6.h | 1 + include/uapi/linux/ipv6.h | 1 + 2 files changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/ipv6.h b/include/linux/ipv6.h index 4b2267e1b7c3..7edc14fb66b6 100644 --- a/include/linux/ipv6.h +++ b/include/linux/ipv6.h @@ -62,6 +62,7 @@ struct ipv6_devconf { struct in6_addr secret; } stable_secret; __s32 use_oif_addrs_only; + __s32 keep_addr_on_down; void *sysctl; }; diff --git a/include/uapi/linux/ipv6.h b/include/uapi/linux/ipv6.h index ec117b65d5a5..395876060f50 100644 --- a/include/uapi/linux/ipv6.h +++ b/include/uapi/linux/ipv6.h @@ -176,6 +176,7 @@ enum { DEVCONF_IGNORE_ROUTES_WITH_LINKDOWN, DEVCONF_DROP_UNICAST_IN_L2_MULTICAST, DEVCONF_DROP_UNSOLICITED_NA, + DEVCONF_KEEP_ADDR_ON_DOWN, DEVCONF_MAX }; -- cgit v1.2.3 From 8ead9dd54716d1e05e129959f702fcc1786f82b4 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 25 Apr 2016 20:04:08 -0700 Subject: devpts: more pty driver interface cleanups This is more prep-work for the upcoming pty changes. Still just code cleanup with no actual semantic changes. This removes a bunch pointless complexity by just having the slave pty side remember the dentry associated with the devpts slave rather than the inode. That allows us to remove all the "look up the dentry" code for when we want to remove it again. Together with moving the tty pointer from "inode->i_private" to "dentry->d_fsdata" and getting rid of pointless inode locking, this removes about 30 lines of code. Not only is the end result smaller, it's simpler and easier to understand. The old code, for example, depended on the d_find_alias() to not just find the dentry, but also to check that it is still hashed, which in turn validated the tty pointer in the inode. That is a _very_ roundabout way to say "invalidate the cached tty pointer when the dentry is removed". The new code just does dentry->d_fsdata = NULL; in devpts_pty_kill() instead, invalidating the tty pointer rather more directly and obviously. Don't do something complex and subtle when the obvious straightforward approach will do. The rest of the patch (ie apart from code deletion and the above tty pointer clearing) is just switching the calling convention to pass the dentry or file pointer around instead of the inode. Cc: Eric Biederman Cc: Peter Anvin Cc: Andy Lutomirski Cc: Al Viro Cc: Peter Hurley Cc: Serge Hallyn Cc: Willy Tarreau Cc: Aurelien Jarno Cc: Alan Cox Cc: Jann Horn Cc: Greg KH Cc: Jiri Slaby Cc: Florian Weimer Signed-off-by: Linus Torvalds --- include/linux/devpts_fs.h | 6 +++--- include/linux/tty_driver.h | 4 ++-- 2 files changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/linux/devpts_fs.h b/include/linux/devpts_fs.h index 358a4db72a27..5871f292b596 100644 --- a/include/linux/devpts_fs.h +++ b/include/linux/devpts_fs.h @@ -27,11 +27,11 @@ int devpts_new_index(struct pts_fs_info *); void devpts_kill_index(struct pts_fs_info *, int); /* mknod in devpts */ -struct inode *devpts_pty_new(struct pts_fs_info *, dev_t, int, void *); +struct dentry *devpts_pty_new(struct pts_fs_info *, int, void *); /* get private structure */ -void *devpts_get_priv(struct inode *pts_inode); +void *devpts_get_priv(struct dentry *); /* unlink */ -void devpts_pty_kill(struct inode *inode); +void devpts_pty_kill(struct dentry *); #endif diff --git a/include/linux/tty_driver.h b/include/linux/tty_driver.h index 161052477f77..b742b5e47cc2 100644 --- a/include/linux/tty_driver.h +++ b/include/linux/tty_driver.h @@ -7,7 +7,7 @@ * defined; unless noted otherwise, they are optional, and can be * filled in with a null pointer. * - * struct tty_struct * (*lookup)(struct tty_driver *self, int idx) + * struct tty_struct * (*lookup)(struct tty_driver *self, struct file *, int idx) * * Return the tty device corresponding to idx, NULL if there is not * one currently in use and an ERR_PTR value on error. Called under @@ -250,7 +250,7 @@ struct serial_icounter_struct; struct tty_operations { struct tty_struct * (*lookup)(struct tty_driver *driver, - struct inode *inode, int idx); + struct file *filp, int idx); int (*install)(struct tty_driver *driver, struct tty_struct *tty); void (*remove)(struct tty_driver *driver, struct tty_struct *tty); int (*open)(struct tty_struct * tty, struct file * filp); -- cgit v1.2.3 From 0224a4a30b57385a60065aa598181868881d8fc6 Mon Sep 17 00:00:00 2001 From: Heikki Krogerus Date: Wed, 27 Apr 2016 14:04:20 +0300 Subject: device property: Avoid potential dereferences of invalid pointers Since fwnode may hold ERR_PTR(-ENODEV) or it may be NULL, the fwnode type checks is_of_node(), is_acpi_node() and is is_pset_node() need to consider it. Using IS_ERR_OR_NULL() to check it. Fixes: 0d67e0fa1664 (device property: fix for a case of use-after-free) Reported-by: Dan Carpenter Signed-off-by: Heikki Krogerus [ rjw: Subject & changelog ] Signed-off-by: Rafael J. Wysocki --- include/acpi/acpi_bus.h | 4 ++-- include/linux/of.h | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/acpi/acpi_bus.h b/include/acpi/acpi_bus.h index 14362a84c78e..3a932501d690 100644 --- a/include/acpi/acpi_bus.h +++ b/include/acpi/acpi_bus.h @@ -394,13 +394,13 @@ struct acpi_data_node { static inline bool is_acpi_node(struct fwnode_handle *fwnode) { - return fwnode && (fwnode->type == FWNODE_ACPI + return !IS_ERR_OR_NULL(fwnode) && (fwnode->type == FWNODE_ACPI || fwnode->type == FWNODE_ACPI_DATA); } static inline bool is_acpi_device_node(struct fwnode_handle *fwnode) { - return fwnode && fwnode->type == FWNODE_ACPI; + return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_ACPI; } static inline struct acpi_device *to_acpi_device_node(struct fwnode_handle *fwnode) diff --git a/include/linux/of.h b/include/linux/of.h index 7fcb681baadf..31758036787c 100644 --- a/include/linux/of.h +++ b/include/linux/of.h @@ -133,7 +133,7 @@ void of_core_init(void); static inline bool is_of_node(struct fwnode_handle *fwnode) { - return fwnode && fwnode->type == FWNODE_OF; + return !IS_ERR_OR_NULL(fwnode) && fwnode->type == FWNODE_OF; } static inline struct device_node *to_of_node(struct fwnode_handle *fwnode) -- cgit v1.2.3 From 986ef95ecdd3eb6fa29433e68faa94c7624083be Mon Sep 17 00:00:00 2001 From: Sagi Grimberg Date: Thu, 31 Mar 2016 19:03:25 +0300 Subject: IB/mlx5: Expose correct max_sge_rd limit mlx5 devices (Connect-IB, ConnectX-4, ConnectX-4-LX) has a limitation where rdma read work queue entries cannot exceed 512 bytes. A rdma_read wqe needs to fit in 512 bytes: - wqe control segment (16 bytes) - rdma segment (16 bytes) - scatter elements (16 bytes each) So max_sge_rd should be: (512 - 16 - 16) / 16 = 30. Cc: linux-stable@vger.kernel.org Reported-by: Christoph Hellwig Tested-by: Christoph Hellwig Signed-off-by: Sagi Grimberg Signed-off-by: Leon Romanovsky Signed-off-by: Doug Ledford --- include/linux/mlx5/device.h | 11 +++++++++++ 1 file changed, 11 insertions(+) (limited to 'include') diff --git a/include/linux/mlx5/device.h b/include/linux/mlx5/device.h index 8156e3c9239c..b3575f392492 100644 --- a/include/linux/mlx5/device.h +++ b/include/linux/mlx5/device.h @@ -392,6 +392,17 @@ enum { MLX5_CAP_OFF_CMDIF_CSUM = 46, }; +enum { + /* + * Max wqe size for rdma read is 512 bytes, so this + * limits our max_sge_rd as the wqe needs to fit: + * - ctrl segment (16 bytes) + * - rdma segment (16 bytes) + * - scatter elements (16 bytes each) + */ + MLX5_MAX_SGE_RD = (512 - 16 - 16) / 16 +}; + struct mlx5_inbox_hdr { __be16 opcode; u8 rsvd[4]; -- cgit v1.2.3 From e6bd18f57aad1a2d1ef40e646d03ed0f2515c9e3 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Sun, 10 Apr 2016 19:13:13 -0600 Subject: IB/security: Restrict use of the write() interface The drivers/infiniband stack uses write() as a replacement for bi-directional ioctl(). This is not safe. There are ways to trigger write calls that result in the return structure that is normally written to user space being shunted off to user specified kernel memory instead. For the immediate repair, detect and deny suspicious accesses to the write API. For long term, update the user space libraries and the kernel API to something that doesn't present the same security vulnerabilities (likely a structured ioctl() interface). The impacted uAPI interfaces are generally only available if hardware from drivers/infiniband is installed in the system. Reported-by: Jann Horn Signed-off-by: Linus Torvalds Signed-off-by: Jason Gunthorpe [ Expanded check to all known write() entry points ] Cc: stable@vger.kernel.org Signed-off-by: Doug Ledford --- include/rdma/ib.h | 16 ++++++++++++++++ 1 file changed, 16 insertions(+) (limited to 'include') diff --git a/include/rdma/ib.h b/include/rdma/ib.h index cf8f9e700e48..a6b93706b0fc 100644 --- a/include/rdma/ib.h +++ b/include/rdma/ib.h @@ -34,6 +34,7 @@ #define _RDMA_IB_H #include +#include struct ib_addr { union { @@ -86,4 +87,19 @@ struct sockaddr_ib { __u64 sib_scope_id; }; +/* + * The IB interfaces that use write() as bi-directional ioctl() are + * fundamentally unsafe, since there are lots of ways to trigger "write()" + * calls from various contexts with elevated privileges. That includes the + * traditional suid executable error message writes, but also various kernel + * interfaces that can write to file descriptors. + * + * This function provides protection for the legacy API by restricting the + * calling context. + */ +static inline bool ib_safe_file_access(struct file *filp) +{ + return filp->f_cred == current_cred() && segment_eq(get_fs(), USER_DS); +} + #endif /* _RDMA_IB_H */ -- cgit v1.2.3 From 7b7483409f09c15f30ac43242ead1ab3061e1f59 Mon Sep 17 00:00:00 2001 From: Marcelo Ricardo Leitner Date: Mon, 25 Apr 2016 15:13:17 -0300 Subject: net: fix net_gso_ok for new GSO types. Fix casting in net_gso_ok. Otherwise the shift on gso_type << NETIF_F_GSO_SHIFT may hit the 32th bit and make it look like a INT_MIN, which is then promoted from signed to uint64 which is 0xffffffff80000000, resulting in wrong behavior when it is and'ed with the feature itself, as in: This test app: #include #include int main(int argc, char **argv) { uint64_t feature1; uint64_t feature2; int gso_type = 1 << 15; feature1 = gso_type << 16; feature2 = (uint64_t)gso_type << 16; printf("%lx %lx\n", feature1, feature2); return 0; } Gives: ffffffff80000000 80000000 So that this: return (features & feature) == feature; Actually works on more bits than expected and invalid ones. Fix is to promote it earlier. Issue noted while rebasing SCTP GSO patch but posting separetely as someone else may experience this meanwhile. Signed-off-by: Marcelo Ricardo Leitner Signed-off-by: David S. Miller --- include/linux/netdevice.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/netdevice.h b/include/linux/netdevice.h index 8395308a2445..b3c46b019ac1 100644 --- a/include/linux/netdevice.h +++ b/include/linux/netdevice.h @@ -4004,7 +4004,7 @@ netdev_features_t netif_skb_features(struct sk_buff *skb); static inline bool net_gso_ok(netdev_features_t features, int gso_type) { - netdev_features_t feature = gso_type << NETIF_F_GSO_SHIFT; + netdev_features_t feature = (netdev_features_t)gso_type << NETIF_F_GSO_SHIFT; /* check flags correspondence */ BUILD_BUG_ON(SKB_GSO_TCPV4 != (NETIF_F_TSO >> NETIF_F_GSO_SHIFT)); -- cgit v1.2.3 From 92117d8443bc5afacc8d5ba82e541946310f106e Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Wed, 27 Apr 2016 18:56:20 -0700 Subject: bpf: fix refcnt overflow On a system with >32Gbyte of phyiscal memory and infinite RLIMIT_MEMLOCK, the malicious application may overflow 32-bit bpf program refcnt. It's also possible to overflow map refcnt on 1Tb system. Impose 32k hard limit which means that the same bpf program or map cannot be shared by more than 32k processes. Fixes: 1be7f75d1668 ("bpf: enable non-root eBPF programs") Reported-by: Jann Horn Signed-off-by: Alexei Starovoitov Acked-by: Daniel Borkmann Signed-off-by: David S. Miller --- include/linux/bpf.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf.h b/include/linux/bpf.h index 21ee41b92e8a..f1d5c5acc8dd 100644 --- a/include/linux/bpf.h +++ b/include/linux/bpf.h @@ -171,12 +171,13 @@ void bpf_register_prog_type(struct bpf_prog_type_list *tl); void bpf_register_map_type(struct bpf_map_type_list *tl); struct bpf_prog *bpf_prog_get(u32 ufd); +struct bpf_prog *bpf_prog_inc(struct bpf_prog *prog); void bpf_prog_put(struct bpf_prog *prog); void bpf_prog_put_rcu(struct bpf_prog *prog); struct bpf_map *bpf_map_get_with_uref(u32 ufd); struct bpf_map *__bpf_map_get(struct fd f); -void bpf_map_inc(struct bpf_map *map, bool uref); +struct bpf_map *bpf_map_inc(struct bpf_map *map, bool uref); void bpf_map_put_with_uref(struct bpf_map *map); void bpf_map_put(struct bpf_map *map); int bpf_map_precharge_memlock(u32 pages); -- cgit v1.2.3 From 66ee95d16a7f1b7b4f1dd74a2d81c6e19dc29a14 Mon Sep 17 00:00:00 2001 From: Steve Capper Date: Thu, 28 Apr 2016 16:18:24 -0700 Subject: mm: exclude HugeTLB pages from THP page_mapped() logic HugeTLB pages cannot be split, so we use the compound_mapcount to track rmaps. Currently page_mapped() will check the compound_mapcount, but will also go through the constituent pages of a THP compound page and query the individual _mapcount's too. Unfortunately, page_mapped() does not distinguish between HugeTLB and THP compound pages and assumes that a compound page always needs to have HPAGE_PMD_NR pages querying. For most cases when dealing with HugeTLB this is just inefficient, but for scenarios where the HugeTLB page size is less than the pmd block size (e.g. when using contiguous bit on ARM) this can lead to crashes. This patch adjusts the page_mapped function such that we skip the unnecessary THP reference checks for HugeTLB pages. Fixes: e1534ae95004 ("mm: differentiate page_mapped() from page_mapcount() for compound pages") Signed-off-by: Steve Capper Acked-by: Kirill A. Shutemov Cc: Will Deacon Cc: Catalin Marinas Cc: Michal Hocko Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index a55e5be0894f..79b6c18d0a38 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1031,6 +1031,8 @@ static inline bool page_mapped(struct page *page) page = compound_head(page); if (atomic_read(compound_mapcount_ptr(page)) >= 0) return true; + if (PageHuge(page)) + return false; for (i = 0; i < hpage_nr_pages(page); i++) { if (atomic_read(&page[i]._mapcount) >= 0) return true; -- cgit v1.2.3 From aa88b68c3b1dce8bc3fd54c8a7372a777ff265cd Mon Sep 17 00:00:00 2001 From: "Kirill A. Shutemov" Date: Thu, 28 Apr 2016 16:18:27 -0700 Subject: thp: keep huge zero page pinned until tlb flush Andrea has found[1] a race condition on MMU-gather based TLB flush vs split_huge_page() or shrinker which frees huge zero under us (patch 1/2 and 2/2 respectively). With new THP refcounting, we don't need patch 1/2: mmu_gather keeps the page pinned until flush is complete and the pin prevents the page from being split under us. We still need patch 2/2. This is simplified version of Andrea's patch. We don't need fancy encoding. [1] http://lkml.kernel.org/r/1447938052-22165-1-git-send-email-aarcange@redhat.com Signed-off-by: Kirill A. Shutemov Reported-by: Andrea Arcangeli Reviewed-by: Andrea Arcangeli Cc: "Aneesh Kumar K.V" Cc: Mel Gorman Cc: Hugh Dickins Cc: Johannes Weiner Cc: Dave Hansen Cc: Vlastimil Babka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/huge_mm.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/linux/huge_mm.h b/include/linux/huge_mm.h index 7008623e24b1..d7b9e5346fba 100644 --- a/include/linux/huge_mm.h +++ b/include/linux/huge_mm.h @@ -152,6 +152,7 @@ static inline bool is_huge_zero_pmd(pmd_t pmd) } struct page *get_huge_zero_page(void); +void put_huge_zero_page(void); #else /* CONFIG_TRANSPARENT_HUGEPAGE */ #define HPAGE_PMD_SHIFT ({ BUILD_BUG(); 0; }) @@ -208,6 +209,10 @@ static inline bool is_huge_zero_page(struct page *page) return false; } +static inline void put_huge_zero_page(void) +{ + BUILD_BUG(); +} static inline struct page *follow_devmap_pmd(struct vm_area_struct *vma, unsigned long addr, pmd_t *pmd, int flags) -- cgit v1.2.3 From 28093f9f34cedeaea0f481c58446d9dac6dd620f Mon Sep 17 00:00:00 2001 From: Gerald Schaefer Date: Thu, 28 Apr 2016 16:18:35 -0700 Subject: numa: fix /proc//numa_maps for THP In gather_pte_stats() a THP pmd is cast into a pte, which is wrong because the layouts may differ depending on the architecture. On s390 this will lead to inaccurate numa_maps accounting in /proc because of misguided pte_present() and pte_dirty() checks on the fake pte. On other architectures pte_present() and pte_dirty() may work by chance, but there may be an issue with direct-access (dax) mappings w/o underlying struct pages when HAVE_PTE_SPECIAL is set and THP is available. In vm_normal_page() the fake pte will be checked with pte_special() and because there is no "special" bit in a pmd, this will always return false and the VM_PFNMAP | VM_MIXEDMAP checking will be skipped. On dax mappings w/o struct pages, an invalid struct page pointer would then be returned that can crash the kernel. This patch fixes the numa_maps THP handling by introducing new "_pmd" variants of the can_gather_numa_stats() and vm_normal_page() functions. Signed-off-by: Gerald Schaefer Cc: Naoya Horiguchi Cc: "Kirill A . Shutemov" Cc: Konstantin Khlebnikov Cc: Michal Hocko Cc: Vlastimil Babka Cc: Jerome Marchand Cc: Johannes Weiner Cc: Dave Hansen Cc: Mel Gorman Cc: Dan Williams Cc: Martin Schwidefsky Cc: Heiko Carstens Cc: Michael Holzheu Cc: [4.3+] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/mm.h b/include/linux/mm.h index 79b6c18d0a38..864d7221de84 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -1140,6 +1140,8 @@ struct zap_details { struct page *vm_normal_page(struct vm_area_struct *vma, unsigned long addr, pte_t pte); +struct page *vm_normal_page_pmd(struct vm_area_struct *vma, unsigned long addr, + pmd_t pmd); int zap_vma_ptes(struct vm_area_struct *vma, unsigned long address, unsigned long size); -- cgit v1.2.3 From 2c94b53738549d81dc7464a32117d1f5112c64d3 Mon Sep 17 00:00:00 2001 From: Tim Bingham Date: Fri, 29 Apr 2016 13:30:23 -0400 Subject: net: Implement net_dbg_ratelimited() for CONFIG_DYNAMIC_DEBUG case Prior to commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") the implementation of net_dbg_ratelimited() was buggy for both the DEBUG and CONFIG_DYNAMIC_DEBUG cases. The bug was that net_ratelimit() was being called and, despite returning true, nothing was being printed to the console. This resulted in messages like the following - "net_ratelimit: %d callbacks suppressed" with no other output nearby. After commit d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") the bug is fixed for the DEBUG case. However, there's no output at all for CONFIG_DYNAMIC_DEBUG case. This patch restores debug output (if enabled) for the CONFIG_DYNAMIC_DEBUG case. Add a definition of net_dbg_ratelimited() for the CONFIG_DYNAMIC_DEBUG case. The implementation takes care to check that dynamic debugging is enabled before calling net_ratelimit(). Fixes: d92cff89a0c8 ("net_dbg_ratelimited: turn into no-op when !DEBUG") Signed-off-by: Tim Bingham Signed-off-by: David S. Miller --- include/linux/net.h | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/net.h b/include/linux/net.h index 49175e4ced11..f840d77c6c31 100644 --- a/include/linux/net.h +++ b/include/linux/net.h @@ -246,7 +246,15 @@ do { \ net_ratelimited_function(pr_warn, fmt, ##__VA_ARGS__) #define net_info_ratelimited(fmt, ...) \ net_ratelimited_function(pr_info, fmt, ##__VA_ARGS__) -#if defined(DEBUG) +#if defined(CONFIG_DYNAMIC_DEBUG) +#define net_dbg_ratelimited(fmt, ...) \ +do { \ + DEFINE_DYNAMIC_DEBUG_METADATA(descriptor, fmt); \ + if (unlikely(descriptor.flags & _DPRINTK_FLAGS_PRINT) && \ + net_ratelimit()) \ + __dynamic_pr_debug(&descriptor, fmt, ##__VA_ARGS__); \ +} while (0) +#elif defined(DEBUG) #define net_dbg_ratelimited(fmt, ...) \ net_ratelimited_function(pr_debug, fmt, ##__VA_ARGS__) #else -- cgit v1.2.3 From 689de1d6ca95b3b5bd8ee446863bf81a4883ea25 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 2 May 2016 12:46:42 -0700 Subject: Minimal fix-up of bad hashing behavior of hash_64() This is a fairly minimal fixup to the horribly bad behavior of hash_64() with certain input patterns. In particular, because the multiplicative value used for the 64-bit hash was intentionally bit-sparse (so that the multiply could be done with shifts and adds on architectures without hardware multipliers), some bits did not get spread out very much. In particular, certain fairly common bit ranges in the input (roughly bits 12-20: commonly with the most information in them when you hash things like byte offsets in files or memory that have block factors that mean that the low bits are often zero) would not necessarily show up much in the result. There's a bigger patch-series brewing to fix up things more completely, but this is the fairly minimal fix for the 64-bit hashing problem. It simply picks a much better constant multiplier, spreading the bits out a lot better. NOTE! For 32-bit architectures, the bad old hash_64() remains the same for now, since 64-bit multiplies are expensive. The bigger hashing cleanup will replace the 32-bit case with something better. The new constants were picked by George Spelvin who wrote that bigger cleanup series. I just picked out the constants and part of the comment from that series. Cc: stable@vger.kernel.org Cc: George Spelvin Cc: Thomas Gleixner Signed-off-by: Linus Torvalds --- include/linux/hash.h | 20 ++++++++++++++++++-- 1 file changed, 18 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/hash.h b/include/linux/hash.h index 1afde47e1528..79c52fa81cac 100644 --- a/include/linux/hash.h +++ b/include/linux/hash.h @@ -32,12 +32,28 @@ #error Wordsize not 32 or 64 #endif +/* + * The above primes are actively bad for hashing, since they are + * too sparse. The 32-bit one is mostly ok, the 64-bit one causes + * real problems. Besides, the "prime" part is pointless for the + * multiplicative hash. + * + * Although a random odd number will do, it turns out that the golden + * ratio phi = (sqrt(5)-1)/2, or its negative, has particularly nice + * properties. + * + * These are the negative, (1 - phi) = (phi^2) = (3 - sqrt(5))/2. + * (See Knuth vol 3, section 6.4, exercise 9.) + */ +#define GOLDEN_RATIO_32 0x61C88647 +#define GOLDEN_RATIO_64 0x61C8864680B583EBull + static __always_inline u64 hash_64(u64 val, unsigned int bits) { u64 hash = val; -#if defined(CONFIG_ARCH_HAS_FAST_MULTIPLIER) && BITS_PER_LONG == 64 - hash = hash * GOLDEN_RATIO_PRIME_64; +#if BITS_PER_LONG == 64 + hash = hash * GOLDEN_RATIO_64; #else /* Sigh, gcc can't optimise this alone like it does for 32 bits. */ u64 n = hash; -- cgit v1.2.3 From af67eb9e7e1ab37880459f83153d34b3c42b0075 Mon Sep 17 00:00:00 2001 From: Alexander Duyck Date: Mon, 2 May 2016 09:25:16 -0700 Subject: vxlan: Add checksum check to the features check function We need to perform an additional check on the inner headers to determine if we can offload the checksum for them. Previously this check didn't occur so we would generate an invalid frame in the case of an IPv6 header encapsulated inside of an IPv4 tunnel. To fix this I added a secondary check to vxlan_features_check so that we can verify that we can offload the inner checksum. Signed-off-by: Alexander Duyck Signed-off-by: David S. Miller --- include/linux/if_ether.h | 5 +++++ include/net/vxlan.h | 4 +++- 2 files changed, 8 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/if_ether.h b/include/linux/if_ether.h index d5569734f672..548fd535fd02 100644 --- a/include/linux/if_ether.h +++ b/include/linux/if_ether.h @@ -28,6 +28,11 @@ static inline struct ethhdr *eth_hdr(const struct sk_buff *skb) return (struct ethhdr *)skb_mac_header(skb); } +static inline struct ethhdr *inner_eth_hdr(const struct sk_buff *skb) +{ + return (struct ethhdr *)skb_inner_mac_header(skb); +} + int eth_header_parse(const struct sk_buff *skb, unsigned char *haddr); extern ssize_t sysfs_format_mac(char *buf, const unsigned char *addr, int len); diff --git a/include/net/vxlan.h b/include/net/vxlan.h index 73ed2e951c02..35437c779da8 100644 --- a/include/net/vxlan.h +++ b/include/net/vxlan.h @@ -252,7 +252,9 @@ static inline netdev_features_t vxlan_features_check(struct sk_buff *skb, (skb->inner_protocol_type != ENCAP_TYPE_ETHER || skb->inner_protocol != htons(ETH_P_TEB) || (skb_inner_mac_header(skb) - skb_transport_header(skb) != - sizeof(struct udphdr) + sizeof(struct vxlanhdr)))) + sizeof(struct udphdr) + sizeof(struct vxlanhdr)) || + (skb->ip_summed != CHECKSUM_NONE && + !can_checksum_protocol(features, inner_eth_hdr(skb)->h_proto)))) return features & ~(NETIF_F_CSUM_MASK | NETIF_F_GSO_MASK); return features; -- cgit v1.2.3 From 1f93e9f2318b598e6775a1fc9701604993c512b1 Mon Sep 17 00:00:00 2001 From: Yury Norov Date: Mon, 2 May 2016 19:12:47 +0300 Subject: asm-generic: use compat version for preadv2 and pwritev2 Compat architectures that does not use generic unistd (mips, s390), declare compat version in their syscall tables for preadv2 and pwritev2. Generic unistd syscall table should do it as well. [arnd: this initially slipped through the review and an incorrect patch got merged. arch/tile/ is the only architecture that could be affected for their 32-bit compat mode, every other architecture we support today is fine.] Signed-off-by: Yury Norov Signed-off-by: Arnd Bergmann --- include/uapi/asm-generic/unistd.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/uapi/asm-generic/unistd.h b/include/uapi/asm-generic/unistd.h index 6e0f5f01734c..c51afb71bfab 100644 --- a/include/uapi/asm-generic/unistd.h +++ b/include/uapi/asm-generic/unistd.h @@ -718,9 +718,9 @@ __SYSCALL(__NR_mlock2, sys_mlock2) #define __NR_copy_file_range 285 __SYSCALL(__NR_copy_file_range, sys_copy_file_range) #define __NR_preadv2 286 -__SYSCALL(__NR_preadv2, sys_preadv2) +__SC_COMP(__NR_preadv2, sys_preadv2, compat_sys_preadv2) #define __NR_pwritev2 287 -__SYSCALL(__NR_pwritev2, sys_pwritev2) +__SC_COMP(__NR_pwritev2, sys_pwritev2, compat_sys_pwritev2) #undef __NR_syscalls #define __NR_syscalls 288 -- cgit v1.2.3 From 4550c4e157ca3da929593bb6c64080a59141af35 Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Thu, 5 May 2016 16:22:03 -0700 Subject: mm: memcontrol: let v2 cgroups follow changes in system swappiness Cgroup2 currently doesn't have a per-cgroup swappiness setting. We might want to add one later - that's a different discussion - but until we do, the cgroups should always follow the system setting. Otherwise it will be unchangeably set to whatever the ancestor inherited from the system setting at the time of cgroup creation. Signed-off-by: Johannes Weiner Acked-by: Michal Hocko Acked-by: Vladimir Davydov Cc: [4.5] Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/swap.h | 4 ++++ 1 file changed, 4 insertions(+) (limited to 'include') diff --git a/include/linux/swap.h b/include/linux/swap.h index 2b83359c19ca..0a4cd4703f40 100644 --- a/include/linux/swap.h +++ b/include/linux/swap.h @@ -533,6 +533,10 @@ static inline swp_entry_t get_swap_page(void) #ifdef CONFIG_MEMCG static inline int mem_cgroup_swappiness(struct mem_cgroup *memcg) { + /* Cgroup2 doesn't have per-cgroup swappiness */ + if (cgroup_subsys_on_dfl(memory_cgrp_subsys)) + return vm_swappiness; + /* root ? */ if (mem_cgroup_disabled() || !memcg->css.parent) return vm_swappiness; -- cgit v1.2.3 From 4e1016dac1ccce6d8a960775526cdc3a5baa690b Mon Sep 17 00:00:00 2001 From: Alexandre Bounine Date: Thu, 5 May 2016 16:22:06 -0700 Subject: rapidio/mport_cdev: fix uapi type definitions Fix problems in uapi definitions reported by Gabriel Laskar: (see https://lkml.org/lkml/2016/4/5/205 for details) - move public header file rio_mport_cdev.h to include/uapi/linux directory - change types in data structures passed as IOCTL parameters - improve parameter checking in some IOCTL service routines Signed-off-by: Alexandre Bounine Reported-by: Gabriel Laskar Tested-by: Barry Wood Cc: Gabriel Laskar Cc: Matt Porter Cc: Aurelien Jacquiot Cc: Andre van Herk Cc: Barry Wood Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/rio_mport_cdev.h | 271 ----------------------------------- include/uapi/linux/rio_mport_cdev.h | 277 ++++++++++++++++++++++++++++++++++++ 2 files changed, 277 insertions(+), 271 deletions(-) delete mode 100644 include/linux/rio_mport_cdev.h create mode 100644 include/uapi/linux/rio_mport_cdev.h (limited to 'include') diff --git a/include/linux/rio_mport_cdev.h b/include/linux/rio_mport_cdev.h deleted file mode 100644 index b65d19df76d2..000000000000 --- a/include/linux/rio_mport_cdev.h +++ /dev/null @@ -1,271 +0,0 @@ -/* - * Copyright (c) 2015-2016, Integrated Device Technology Inc. - * Copyright (c) 2015, Prodrive Technologies - * Copyright (c) 2015, Texas Instruments Incorporated - * Copyright (c) 2015, RapidIO Trade Association - * All rights reserved. - * - * This software is available to you under a choice of one of two licenses. - * You may choose to be licensed under the terms of the GNU General Public - * License(GPL) Version 2, or the BSD-3 Clause license below: - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions are met: - * - * 1. Redistributions of source code must retain the above copyright notice, - * this list of conditions and the following disclaimer. - * - * 2. Redistributions in binary form must reproduce the above copyright notice, - * this list of conditions and the following disclaimer in the documentation - * and/or other materials provided with the distribution. - * - * 3. Neither the name of the copyright holder nor the names of its contributors - * may be used to endorse or promote products derived from this software without - * specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" - * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, - * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR - * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR - * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, - * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, - * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; - * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, - * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR - * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF - * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - */ - -#ifndef _RIO_MPORT_CDEV_H_ -#define _RIO_MPORT_CDEV_H_ - -#ifndef __user -#define __user -#endif - -struct rio_mport_maint_io { - uint32_t rioid; /* destID of remote device */ - uint32_t hopcount; /* hopcount to remote device */ - uint32_t offset; /* offset in register space */ - size_t length; /* length in bytes */ - void __user *buffer; /* data buffer */ -}; - -/* - * Definitions for RapidIO data transfers: - * - memory mapped (MAPPED) - * - packet generation from memory (TRANSFER) - */ -#define RIO_TRANSFER_MODE_MAPPED (1 << 0) -#define RIO_TRANSFER_MODE_TRANSFER (1 << 1) -#define RIO_CAP_DBL_SEND (1 << 2) -#define RIO_CAP_DBL_RECV (1 << 3) -#define RIO_CAP_PW_SEND (1 << 4) -#define RIO_CAP_PW_RECV (1 << 5) -#define RIO_CAP_MAP_OUTB (1 << 6) -#define RIO_CAP_MAP_INB (1 << 7) - -struct rio_mport_properties { - uint16_t hdid; - uint8_t id; /* Physical port ID */ - uint8_t index; - uint32_t flags; - uint32_t sys_size; /* Default addressing size */ - uint8_t port_ok; - uint8_t link_speed; - uint8_t link_width; - uint32_t dma_max_sge; - uint32_t dma_max_size; - uint32_t dma_align; - uint32_t transfer_mode; /* Default transfer mode */ - uint32_t cap_sys_size; /* Capable system sizes */ - uint32_t cap_addr_size; /* Capable addressing sizes */ - uint32_t cap_transfer_mode; /* Capable transfer modes */ - uint32_t cap_mport; /* Mport capabilities */ -}; - -/* - * Definitions for RapidIO events; - * - incoming port-writes - * - incoming doorbells - */ -#define RIO_DOORBELL (1 << 0) -#define RIO_PORTWRITE (1 << 1) - -struct rio_doorbell { - uint32_t rioid; - uint16_t payload; -}; - -struct rio_doorbell_filter { - uint32_t rioid; /* 0xffffffff to match all ids */ - uint16_t low; - uint16_t high; -}; - - -struct rio_portwrite { - uint32_t payload[16]; -}; - -struct rio_pw_filter { - uint32_t mask; - uint32_t low; - uint32_t high; -}; - -/* RapidIO base address for inbound requests set to value defined below - * indicates that no specific RIO-to-local address translation is requested - * and driver should use direct (one-to-one) address mapping. -*/ -#define RIO_MAP_ANY_ADDR (uint64_t)(~((uint64_t) 0)) - -struct rio_mmap { - uint32_t rioid; - uint64_t rio_addr; - uint64_t length; - uint64_t handle; - void *address; -}; - -struct rio_dma_mem { - uint64_t length; /* length of DMA memory */ - uint64_t dma_handle; /* handle associated with this memory */ - void *buffer; /* pointer to this memory */ -}; - - -struct rio_event { - unsigned int header; /* event type RIO_DOORBELL or RIO_PORTWRITE */ - union { - struct rio_doorbell doorbell; /* header for RIO_DOORBELL */ - struct rio_portwrite portwrite; /* header for RIO_PORTWRITE */ - } u; -}; - -enum rio_transfer_sync { - RIO_TRANSFER_SYNC, /* synchronous transfer */ - RIO_TRANSFER_ASYNC, /* asynchronous transfer */ - RIO_TRANSFER_FAF, /* fire-and-forget transfer */ -}; - -enum rio_transfer_dir { - RIO_TRANSFER_DIR_READ, /* Read operation */ - RIO_TRANSFER_DIR_WRITE, /* Write operation */ -}; - -/* - * RapidIO data exchange transactions are lists of individual transfers. Each - * transfer exchanges data between two RapidIO devices by remote direct memory - * access and has its own completion code. - * - * The RapidIO specification defines four types of data exchange requests: - * NREAD, NWRITE, SWRITE and NWRITE_R. The RapidIO DMA channel interface allows - * to specify the required type of write operation or combination of them when - * only the last data packet requires response. - * - * NREAD: read up to 256 bytes from remote device memory into local memory - * NWRITE: write up to 256 bytes from local memory to remote device memory - * without confirmation - * SWRITE: as NWRITE, but all addresses and payloads must be 64-bit aligned - * NWRITE_R: as NWRITE, but expect acknowledgment from remote device. - * - * The default exchange is chosen from NREAD and any of the WRITE modes as the - * driver sees fit. For write requests the user can explicitly choose between - * any of the write modes for each transaction. - */ -enum rio_exchange { - RIO_EXCHANGE_DEFAULT, /* Default method */ - RIO_EXCHANGE_NWRITE, /* All packets using NWRITE */ - RIO_EXCHANGE_SWRITE, /* All packets using SWRITE */ - RIO_EXCHANGE_NWRITE_R, /* Last packet NWRITE_R, others NWRITE */ - RIO_EXCHANGE_SWRITE_R, /* Last packet NWRITE_R, others SWRITE */ - RIO_EXCHANGE_NWRITE_R_ALL, /* All packets using NWRITE_R */ -}; - -struct rio_transfer_io { - uint32_t rioid; /* Target destID */ - uint64_t rio_addr; /* Address in target's RIO mem space */ - enum rio_exchange method; /* Data exchange method */ - void __user *loc_addr; - uint64_t handle; - uint64_t offset; /* Offset in buffer */ - uint64_t length; /* Length in bytes */ - uint32_t completion_code; /* Completion code for this transfer */ -}; - -struct rio_transaction { - uint32_t transfer_mode; /* Data transfer mode */ - enum rio_transfer_sync sync; /* Synchronization method */ - enum rio_transfer_dir dir; /* Transfer direction */ - size_t count; /* Number of transfers */ - struct rio_transfer_io __user *block; /* Array of transfers */ -}; - -struct rio_async_tx_wait { - uint32_t token; /* DMA transaction ID token */ - uint32_t timeout; /* Wait timeout in msec, if 0 use default TO */ -}; - -#define RIO_MAX_DEVNAME_SZ 20 - -struct rio_rdev_info { - uint32_t destid; - uint8_t hopcount; - uint32_t comptag; - char name[RIO_MAX_DEVNAME_SZ + 1]; -}; - -/* Driver IOCTL codes */ -#define RIO_MPORT_DRV_MAGIC 'm' - -#define RIO_MPORT_MAINT_HDID_SET \ - _IOW(RIO_MPORT_DRV_MAGIC, 1, uint16_t) -#define RIO_MPORT_MAINT_COMPTAG_SET \ - _IOW(RIO_MPORT_DRV_MAGIC, 2, uint32_t) -#define RIO_MPORT_MAINT_PORT_IDX_GET \ - _IOR(RIO_MPORT_DRV_MAGIC, 3, uint32_t) -#define RIO_MPORT_GET_PROPERTIES \ - _IOR(RIO_MPORT_DRV_MAGIC, 4, struct rio_mport_properties) -#define RIO_MPORT_MAINT_READ_LOCAL \ - _IOR(RIO_MPORT_DRV_MAGIC, 5, struct rio_mport_maint_io) -#define RIO_MPORT_MAINT_WRITE_LOCAL \ - _IOW(RIO_MPORT_DRV_MAGIC, 6, struct rio_mport_maint_io) -#define RIO_MPORT_MAINT_READ_REMOTE \ - _IOR(RIO_MPORT_DRV_MAGIC, 7, struct rio_mport_maint_io) -#define RIO_MPORT_MAINT_WRITE_REMOTE \ - _IOW(RIO_MPORT_DRV_MAGIC, 8, struct rio_mport_maint_io) -#define RIO_ENABLE_DOORBELL_RANGE \ - _IOW(RIO_MPORT_DRV_MAGIC, 9, struct rio_doorbell_filter) -#define RIO_DISABLE_DOORBELL_RANGE \ - _IOW(RIO_MPORT_DRV_MAGIC, 10, struct rio_doorbell_filter) -#define RIO_ENABLE_PORTWRITE_RANGE \ - _IOW(RIO_MPORT_DRV_MAGIC, 11, struct rio_pw_filter) -#define RIO_DISABLE_PORTWRITE_RANGE \ - _IOW(RIO_MPORT_DRV_MAGIC, 12, struct rio_pw_filter) -#define RIO_SET_EVENT_MASK \ - _IOW(RIO_MPORT_DRV_MAGIC, 13, unsigned int) -#define RIO_GET_EVENT_MASK \ - _IOR(RIO_MPORT_DRV_MAGIC, 14, unsigned int) -#define RIO_MAP_OUTBOUND \ - _IOWR(RIO_MPORT_DRV_MAGIC, 15, struct rio_mmap) -#define RIO_UNMAP_OUTBOUND \ - _IOW(RIO_MPORT_DRV_MAGIC, 16, struct rio_mmap) -#define RIO_MAP_INBOUND \ - _IOWR(RIO_MPORT_DRV_MAGIC, 17, struct rio_mmap) -#define RIO_UNMAP_INBOUND \ - _IOW(RIO_MPORT_DRV_MAGIC, 18, uint64_t) -#define RIO_ALLOC_DMA \ - _IOWR(RIO_MPORT_DRV_MAGIC, 19, struct rio_dma_mem) -#define RIO_FREE_DMA \ - _IOW(RIO_MPORT_DRV_MAGIC, 20, uint64_t) -#define RIO_TRANSFER \ - _IOWR(RIO_MPORT_DRV_MAGIC, 21, struct rio_transaction) -#define RIO_WAIT_FOR_ASYNC \ - _IOW(RIO_MPORT_DRV_MAGIC, 22, struct rio_async_tx_wait) -#define RIO_DEV_ADD \ - _IOW(RIO_MPORT_DRV_MAGIC, 23, struct rio_rdev_info) -#define RIO_DEV_DEL \ - _IOW(RIO_MPORT_DRV_MAGIC, 24, struct rio_rdev_info) - -#endif /* _RIO_MPORT_CDEV_H_ */ diff --git a/include/uapi/linux/rio_mport_cdev.h b/include/uapi/linux/rio_mport_cdev.h new file mode 100644 index 000000000000..5796bf1d06ad --- /dev/null +++ b/include/uapi/linux/rio_mport_cdev.h @@ -0,0 +1,277 @@ +/* + * Copyright (c) 2015-2016, Integrated Device Technology Inc. + * Copyright (c) 2015, Prodrive Technologies + * Copyright (c) 2015, Texas Instruments Incorporated + * Copyright (c) 2015, RapidIO Trade Association + * All rights reserved. + * + * This software is available to you under a choice of one of two licenses. + * You may choose to be licensed under the terms of the GNU General Public + * License(GPL) Version 2, or the BSD-3 Clause license below: + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are met: + * + * 1. Redistributions of source code must retain the above copyright notice, + * this list of conditions and the following disclaimer. + * + * 2. Redistributions in binary form must reproduce the above copyright notice, + * this list of conditions and the following disclaimer in the documentation + * and/or other materials provided with the distribution. + * + * 3. Neither the name of the copyright holder nor the names of its contributors + * may be used to endorse or promote products derived from this software without + * specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" + * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, + * THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR + * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR + * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, + * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, + * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; + * OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, + * WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR + * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF + * ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#ifndef _RIO_MPORT_CDEV_H_ +#define _RIO_MPORT_CDEV_H_ + +#include +#include + +struct rio_mport_maint_io { + __u16 rioid; /* destID of remote device */ + __u8 hopcount; /* hopcount to remote device */ + __u8 pad0[5]; + __u32 offset; /* offset in register space */ + __u32 length; /* length in bytes */ + __u64 buffer; /* pointer to data buffer */ +}; + +/* + * Definitions for RapidIO data transfers: + * - memory mapped (MAPPED) + * - packet generation from memory (TRANSFER) + */ +#define RIO_TRANSFER_MODE_MAPPED (1 << 0) +#define RIO_TRANSFER_MODE_TRANSFER (1 << 1) +#define RIO_CAP_DBL_SEND (1 << 2) +#define RIO_CAP_DBL_RECV (1 << 3) +#define RIO_CAP_PW_SEND (1 << 4) +#define RIO_CAP_PW_RECV (1 << 5) +#define RIO_CAP_MAP_OUTB (1 << 6) +#define RIO_CAP_MAP_INB (1 << 7) + +struct rio_mport_properties { + __u16 hdid; + __u8 id; /* Physical port ID */ + __u8 index; + __u32 flags; + __u32 sys_size; /* Default addressing size */ + __u8 port_ok; + __u8 link_speed; + __u8 link_width; + __u8 pad0; + __u32 dma_max_sge; + __u32 dma_max_size; + __u32 dma_align; + __u32 transfer_mode; /* Default transfer mode */ + __u32 cap_sys_size; /* Capable system sizes */ + __u32 cap_addr_size; /* Capable addressing sizes */ + __u32 cap_transfer_mode; /* Capable transfer modes */ + __u32 cap_mport; /* Mport capabilities */ +}; + +/* + * Definitions for RapidIO events; + * - incoming port-writes + * - incoming doorbells + */ +#define RIO_DOORBELL (1 << 0) +#define RIO_PORTWRITE (1 << 1) + +struct rio_doorbell { + __u16 rioid; + __u16 payload; +}; + +struct rio_doorbell_filter { + __u16 rioid; /* Use RIO_INVALID_DESTID to match all ids */ + __u16 low; + __u16 high; + __u16 pad0; +}; + + +struct rio_portwrite { + __u32 payload[16]; +}; + +struct rio_pw_filter { + __u32 mask; + __u32 low; + __u32 high; + __u32 pad0; +}; + +/* RapidIO base address for inbound requests set to value defined below + * indicates that no specific RIO-to-local address translation is requested + * and driver should use direct (one-to-one) address mapping. +*/ +#define RIO_MAP_ANY_ADDR (__u64)(~((__u64) 0)) + +struct rio_mmap { + __u16 rioid; + __u16 pad0[3]; + __u64 rio_addr; + __u64 length; + __u64 handle; + __u64 address; +}; + +struct rio_dma_mem { + __u64 length; /* length of DMA memory */ + __u64 dma_handle; /* handle associated with this memory */ + __u64 address; +}; + +struct rio_event { + __u32 header; /* event type RIO_DOORBELL or RIO_PORTWRITE */ + union { + struct rio_doorbell doorbell; /* header for RIO_DOORBELL */ + struct rio_portwrite portwrite; /* header for RIO_PORTWRITE */ + } u; + __u32 pad0; +}; + +enum rio_transfer_sync { + RIO_TRANSFER_SYNC, /* synchronous transfer */ + RIO_TRANSFER_ASYNC, /* asynchronous transfer */ + RIO_TRANSFER_FAF, /* fire-and-forget transfer */ +}; + +enum rio_transfer_dir { + RIO_TRANSFER_DIR_READ, /* Read operation */ + RIO_TRANSFER_DIR_WRITE, /* Write operation */ +}; + +/* + * RapidIO data exchange transactions are lists of individual transfers. Each + * transfer exchanges data between two RapidIO devices by remote direct memory + * access and has its own completion code. + * + * The RapidIO specification defines four types of data exchange requests: + * NREAD, NWRITE, SWRITE and NWRITE_R. The RapidIO DMA channel interface allows + * to specify the required type of write operation or combination of them when + * only the last data packet requires response. + * + * NREAD: read up to 256 bytes from remote device memory into local memory + * NWRITE: write up to 256 bytes from local memory to remote device memory + * without confirmation + * SWRITE: as NWRITE, but all addresses and payloads must be 64-bit aligned + * NWRITE_R: as NWRITE, but expect acknowledgment from remote device. + * + * The default exchange is chosen from NREAD and any of the WRITE modes as the + * driver sees fit. For write requests the user can explicitly choose between + * any of the write modes for each transaction. + */ +enum rio_exchange { + RIO_EXCHANGE_DEFAULT, /* Default method */ + RIO_EXCHANGE_NWRITE, /* All packets using NWRITE */ + RIO_EXCHANGE_SWRITE, /* All packets using SWRITE */ + RIO_EXCHANGE_NWRITE_R, /* Last packet NWRITE_R, others NWRITE */ + RIO_EXCHANGE_SWRITE_R, /* Last packet NWRITE_R, others SWRITE */ + RIO_EXCHANGE_NWRITE_R_ALL, /* All packets using NWRITE_R */ +}; + +struct rio_transfer_io { + __u64 rio_addr; /* Address in target's RIO mem space */ + __u64 loc_addr; + __u64 handle; + __u64 offset; /* Offset in buffer */ + __u64 length; /* Length in bytes */ + __u16 rioid; /* Target destID */ + __u16 method; /* Data exchange method, one of rio_exchange enum */ + __u32 completion_code; /* Completion code for this transfer */ +}; + +struct rio_transaction { + __u64 block; /* Pointer to array of transfers */ + __u32 count; /* Number of transfers */ + __u32 transfer_mode; /* Data transfer mode */ + __u16 sync; /* Synch method, one of rio_transfer_sync enum */ + __u16 dir; /* Transfer direction, one of rio_transfer_dir enum */ + __u32 pad0; +}; + +struct rio_async_tx_wait { + __u32 token; /* DMA transaction ID token */ + __u32 timeout; /* Wait timeout in msec, if 0 use default TO */ +}; + +#define RIO_MAX_DEVNAME_SZ 20 + +struct rio_rdev_info { + __u16 destid; + __u8 hopcount; + __u8 pad0; + __u32 comptag; + char name[RIO_MAX_DEVNAME_SZ + 1]; +}; + +/* Driver IOCTL codes */ +#define RIO_MPORT_DRV_MAGIC 'm' + +#define RIO_MPORT_MAINT_HDID_SET \ + _IOW(RIO_MPORT_DRV_MAGIC, 1, __u16) +#define RIO_MPORT_MAINT_COMPTAG_SET \ + _IOW(RIO_MPORT_DRV_MAGIC, 2, __u32) +#define RIO_MPORT_MAINT_PORT_IDX_GET \ + _IOR(RIO_MPORT_DRV_MAGIC, 3, __u32) +#define RIO_MPORT_GET_PROPERTIES \ + _IOR(RIO_MPORT_DRV_MAGIC, 4, struct rio_mport_properties) +#define RIO_MPORT_MAINT_READ_LOCAL \ + _IOR(RIO_MPORT_DRV_MAGIC, 5, struct rio_mport_maint_io) +#define RIO_MPORT_MAINT_WRITE_LOCAL \ + _IOW(RIO_MPORT_DRV_MAGIC, 6, struct rio_mport_maint_io) +#define RIO_MPORT_MAINT_READ_REMOTE \ + _IOR(RIO_MPORT_DRV_MAGIC, 7, struct rio_mport_maint_io) +#define RIO_MPORT_MAINT_WRITE_REMOTE \ + _IOW(RIO_MPORT_DRV_MAGIC, 8, struct rio_mport_maint_io) +#define RIO_ENABLE_DOORBELL_RANGE \ + _IOW(RIO_MPORT_DRV_MAGIC, 9, struct rio_doorbell_filter) +#define RIO_DISABLE_DOORBELL_RANGE \ + _IOW(RIO_MPORT_DRV_MAGIC, 10, struct rio_doorbell_filter) +#define RIO_ENABLE_PORTWRITE_RANGE \ + _IOW(RIO_MPORT_DRV_MAGIC, 11, struct rio_pw_filter) +#define RIO_DISABLE_PORTWRITE_RANGE \ + _IOW(RIO_MPORT_DRV_MAGIC, 12, struct rio_pw_filter) +#define RIO_SET_EVENT_MASK \ + _IOW(RIO_MPORT_DRV_MAGIC, 13, __u32) +#define RIO_GET_EVENT_MASK \ + _IOR(RIO_MPORT_DRV_MAGIC, 14, __u32) +#define RIO_MAP_OUTBOUND \ + _IOWR(RIO_MPORT_DRV_MAGIC, 15, struct rio_mmap) +#define RIO_UNMAP_OUTBOUND \ + _IOW(RIO_MPORT_DRV_MAGIC, 16, struct rio_mmap) +#define RIO_MAP_INBOUND \ + _IOWR(RIO_MPORT_DRV_MAGIC, 17, struct rio_mmap) +#define RIO_UNMAP_INBOUND \ + _IOW(RIO_MPORT_DRV_MAGIC, 18, __u64) +#define RIO_ALLOC_DMA \ + _IOWR(RIO_MPORT_DRV_MAGIC, 19, struct rio_dma_mem) +#define RIO_FREE_DMA \ + _IOW(RIO_MPORT_DRV_MAGIC, 20, __u64) +#define RIO_TRANSFER \ + _IOWR(RIO_MPORT_DRV_MAGIC, 21, struct rio_transaction) +#define RIO_WAIT_FOR_ASYNC \ + _IOW(RIO_MPORT_DRV_MAGIC, 22, struct rio_async_tx_wait) +#define RIO_DEV_ADD \ + _IOW(RIO_MPORT_DRV_MAGIC, 23, struct rio_rdev_info) +#define RIO_DEV_DEL \ + _IOW(RIO_MPORT_DRV_MAGIC, 24, struct rio_rdev_info) + +#endif /* _RIO_MPORT_CDEV_H_ */ -- cgit v1.2.3 From 127393fbe597dd85863a9bdccaa11007e7d4948f Mon Sep 17 00:00:00 2001 From: Andrea Arcangeli Date: Thu, 5 May 2016 16:22:20 -0700 Subject: mm: thp: kvm: fix memory corruption in KVM with THP enabled After the THP refcounting change, obtaining a compound pages from get_user_pages() no longer allows us to assume the entire compound page is immediately mappable from a secondary MMU. A secondary MMU doesn't want to call get_user_pages() more than once for each compound page, in order to know if it can map the whole compound page. So a secondary MMU needs to know from a single get_user_pages() invocation when it can map immediately the entire compound page to avoid a flood of unnecessary secondary MMU faults and spurious atomic_inc()/atomic_dec() (pages don't have to be pinned by MMU notifier users). Ideally instead of the page->_mapcount < 1 check, get_user_pages() should return the granularity of the "page" mapping in the "mm" passed to get_user_pages(). However it's non trivial change to pass the "pmd" status belonging to the "mm" walked by get_user_pages up the stack (up to the caller of get_user_pages). So the fix just checks if there is not a single pte mapping on the page returned by get_user_pages, and in turn if the caller can assume that the whole compound page is mapped in the current "mm" (in a pmd_trans_huge()). In such case the entire compound page is safe to map into the secondary MMU without additional get_user_pages() calls on the surrounding tail/head pages. In addition of being faster, not having to run other get_user_pages() calls also reduces the memory footprint of the secondary MMU fault in case the pmd split happened as result of memory pressure. Without this fix after a MADV_DONTNEED (like invoked by QEMU during postcopy live migration or balloning) or after generic swapping (with a failure in split_huge_page() that would only result in pmd splitting and not a physical page split), KVM would map the whole compound page into the shadow pagetables, despite regular faults or userfaults (like UFFDIO_COPY) may map regular pages into the primary MMU as result of the pte faults, leading to the guest mode and userland mode going out of sync and not working on the same memory at all times. Any other secondary MMU notifier manager (KVM is just one of the many MMU notifier users) will need the same information if it doesn't want to run a flood of get_user_pages_fast and it can support multiple granularity in the secondary MMU mappings, so I think it is justified to be exposed not just to KVM. The other option would be to move transparent_hugepage_adjust to mm/huge_memory.c but that currently has all kind of KVM data structures in it, so it's definitely not a cut-and-paste work, so I couldn't do a fix as cleaner as this one for 4.6. Signed-off-by: Andrea Arcangeli Cc: "Dr. David Alan Gilbert" Cc: "Kirill A. Shutemov" Cc: "Li, Liang Z" Cc: Amit Shah Cc: Paolo Bonzini Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/page-flags.h | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) (limited to 'include') diff --git a/include/linux/page-flags.h b/include/linux/page-flags.h index f4ed4f1b0c77..6b052aa7b5b7 100644 --- a/include/linux/page-flags.h +++ b/include/linux/page-flags.h @@ -516,6 +516,27 @@ static inline int PageTransCompound(struct page *page) return PageCompound(page); } +/* + * PageTransCompoundMap is the same as PageTransCompound, but it also + * guarantees the primary MMU has the entire compound page mapped + * through pmd_trans_huge, which in turn guarantees the secondary MMUs + * can also map the entire compound page. This allows the secondary + * MMUs to call get_user_pages() only once for each compound page and + * to immediately map the entire compound page with a single secondary + * MMU fault. If there will be a pmd split later, the secondary MMUs + * will get an update through the MMU notifier invalidation through + * split_huge_pmd(). + * + * Unlike PageTransCompound, this is safe to be called only while + * split_huge_pmd() cannot run from under us, like if protected by the + * MMU notifier, otherwise it may result in page->_mapcount < 0 false + * positives. + */ +static inline int PageTransCompoundMap(struct page *page) +{ + return PageTransCompound(page) && atomic_read(&page->_mapcount) < 0; +} + /* * PageTransTail returns true for both transparent huge pages * and hugetlbfs pages, so it should only be called when it's known @@ -559,6 +580,7 @@ static inline int TestClearPageDoubleMap(struct page *page) #else TESTPAGEFLAG_FALSE(TransHuge) TESTPAGEFLAG_FALSE(TransCompound) +TESTPAGEFLAG_FALSE(TransCompoundMap) TESTPAGEFLAG_FALSE(TransTail) TESTPAGEFLAG_FALSE(DoubleMap) TESTSETFLAG_FALSE(DoubleMap) -- cgit v1.2.3 From 7322dd755e7dd34bc5359aa27abeed1687e0f628 Mon Sep 17 00:00:00 2001 From: Arnd Bergmann Date: Thu, 5 May 2016 16:22:39 -0700 Subject: byteswap: try to avoid __builtin_constant_p gcc bug This is another attempt to avoid a regression in wwn_to_u64() after that started using get_unaligned_be64(), which in turn ran into a bug on gcc-4.9 through 6.1. The regression got introduced due to the combination of two separate workarounds (commits e3bde9568d99: "include/linux/unaligned: force inlining of byteswap operations" and ef3fb2422ffe: "scsi: fc: use get/put_unaligned64 for wwn access") that each try to sidestep distinct problems with gcc behavior (code growth and increased stack usage). Unfortunately after both have been applied, a more serious gcc bug has been uncovered, leading to incorrect object code that discards part of a function and causes undefined behavior. As part of this problem is how __builtin_constant_p gets evaluated on an argument passed by reference into an inline function, this avoids the use of __builtin_constant_p() for all architectures that set CONFIG_ARCH_USE_BUILTIN_BSWAP. Most architectures do not set ARCH_SUPPORTS_OPTIMIZED_INLINING, which means they probably do not suffer from the problem in the qla2xxx driver, but they might still run into it elsewhere. Both of the original workarounds were only merged in the 4.6 kernel, and the bug that is fixed by this patch should only appear if both are there, so we probably don't need to backport the fix. On the other hand, it works by simplifying the code path and should not have any negative effects. [arnd@arndb.de: fix older gcc warnings] (http://lkml.kernel.org/r/12243652.bxSxEgjgfk@wuerfel) Link: https://lkml.org/lkml/headers/2016/4/12/1103 Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=66122 Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70232 Link: https://gcc.gnu.org/bugzilla/show_bug.cgi?id=70646 Fixes: e3bde9568d99 ("include/linux/unaligned: force inlining of byteswap operations") Fixes: ef3fb2422ffe ("scsi: fc: use get/put_unaligned64 for wwn access") Link: http://lkml.kernel.org/r/1780465.XdtPJpi8Tt@wuerfel Signed-off-by: Arnd Bergmann Reviewed-by: Josh Poimboeuf Tested-by: Josh Poimboeuf # on gcc-5.3 Tested-by: Quinn Tran Cc: Martin Jambor Cc: "Martin K. Petersen" Cc: James Bottomley Cc: Denys Vlasenko Cc: Thomas Graf Cc: Peter Zijlstra Cc: David Rientjes Cc: Ingo Molnar Cc: Himanshu Madhani Cc: Jan Hubicka Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/uapi/linux/swab.h | 24 +++++++++++++++--------- 1 file changed, 15 insertions(+), 9 deletions(-) (limited to 'include') diff --git a/include/uapi/linux/swab.h b/include/uapi/linux/swab.h index 3f10e5317b46..8f3a8f606fd9 100644 --- a/include/uapi/linux/swab.h +++ b/include/uapi/linux/swab.h @@ -45,9 +45,7 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) { -#ifdef __HAVE_BUILTIN_BSWAP16__ - return __builtin_bswap16(val); -#elif defined (__arch_swab16) +#if defined (__arch_swab16) return __arch_swab16(val); #else return ___constant_swab16(val); @@ -56,9 +54,7 @@ static inline __attribute_const__ __u16 __fswab16(__u16 val) static inline __attribute_const__ __u32 __fswab32(__u32 val) { -#ifdef __HAVE_BUILTIN_BSWAP32__ - return __builtin_bswap32(val); -#elif defined(__arch_swab32) +#if defined(__arch_swab32) return __arch_swab32(val); #else return ___constant_swab32(val); @@ -67,9 +63,7 @@ static inline __attribute_const__ __u32 __fswab32(__u32 val) static inline __attribute_const__ __u64 __fswab64(__u64 val) { -#ifdef __HAVE_BUILTIN_BSWAP64__ - return __builtin_bswap64(val); -#elif defined (__arch_swab64) +#if defined (__arch_swab64) return __arch_swab64(val); #elif defined(__SWAB_64_THRU_32__) __u32 h = val >> 32; @@ -102,28 +96,40 @@ static inline __attribute_const__ __u32 __fswahb32(__u32 val) * __swab16 - return a byteswapped 16-bit value * @x: value to byteswap */ +#ifdef __HAVE_BUILTIN_BSWAP16__ +#define __swab16(x) (__u16)__builtin_bswap16((__u16)(x)) +#else #define __swab16(x) \ (__builtin_constant_p((__u16)(x)) ? \ ___constant_swab16(x) : \ __fswab16(x)) +#endif /** * __swab32 - return a byteswapped 32-bit value * @x: value to byteswap */ +#ifdef __HAVE_BUILTIN_BSWAP32__ +#define __swab32(x) (__u32)__builtin_bswap32((__u32)(x)) +#else #define __swab32(x) \ (__builtin_constant_p((__u32)(x)) ? \ ___constant_swab32(x) : \ __fswab32(x)) +#endif /** * __swab64 - return a byteswapped 64-bit value * @x: value to byteswap */ +#ifdef __HAVE_BUILTIN_BSWAP64__ +#define __swab64(x) (__u64)__builtin_bswap64((__u64)(x)) +#else #define __swab64(x) \ (__builtin_constant_p((__u64)(x)) ? \ ___constant_swab64(x) : \ __fswab64(x)) +#endif /** * __swahw32 - return a word-swapped 32-bit value -- cgit v1.2.3 From 54d5ca871e72f2bb172ec9323497f01cd5091ec7 Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 May 2016 01:16:37 +0200 Subject: vfs: add vfs_select_inode() helper Signed-off-by: Miklos Szeredi Cc: # v4.2+ --- include/linux/dcache.h | 12 ++++++++++++ 1 file changed, 12 insertions(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 4bb4de8d95ea..7e9422cb5989 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -565,4 +565,16 @@ static inline struct dentry *d_real(struct dentry *dentry) return dentry; } +static inline struct inode *vfs_select_inode(struct dentry *dentry, + unsigned open_flags) +{ + struct inode *inode = d_inode(dentry); + + if (inode && unlikely(dentry->d_flags & DCACHE_OP_SELECT_INODE)) + inode = dentry->d_op->d_select_inode(dentry, open_flags); + + return inode; +} + + #endif /* __LINUX_DCACHE_H */ -- cgit v1.2.3 From 3c9fe8cdff1b889a059a30d22f130372f2b3885f Mon Sep 17 00:00:00 2001 From: Miklos Szeredi Date: Wed, 11 May 2016 01:16:37 +0200 Subject: vfs: add lookup_hash() helper Overlayfs needs lookup without inode_permission() and already has the name hash (in form of dentry->d_name on overlayfs dentry). It also doesn't support filesystems with d_op->d_hash() so basically it only needs the actual hashed lookup from lookup_one_len_unlocked() So add a new helper that does unlocked lookup of a hashed name. Signed-off-by: Miklos Szeredi --- include/linux/namei.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/namei.h b/include/linux/namei.h index 77d01700daf7..ec5ec2818a28 100644 --- a/include/linux/namei.h +++ b/include/linux/namei.h @@ -79,6 +79,8 @@ extern int kern_path_mountpoint(int, const char *, struct path *, unsigned int); extern struct dentry *lookup_one_len(const char *, struct dentry *, int); extern struct dentry *lookup_one_len_unlocked(const char *, struct dentry *, int); +struct qstr; +extern struct dentry *lookup_hash(const struct qstr *, struct dentry *); extern int follow_down_one(struct path *); extern int follow_down(struct path *); -- cgit v1.2.3