From c043ec1ca5baae63726aae32abbe003192bc6eec Mon Sep 17 00:00:00 2001 From: Martin Kelly Date: Mon, 26 Mar 2018 14:27:51 -0700 Subject: iio:buffer: make length types match kfifo types Currently, we use int for buffer length and bytes_per_datum. However, kfifo uses unsigned int for length and size_t for element size. We need to make sure these matches or we will have bugs related to overflow (in the range between INT_MAX and UINT_MAX for length, for example). In addition, set_bytes_per_datum uses size_t while bytes_per_datum is an int, which would cause bugs for large values of bytes_per_datum. Change buffer length to use unsigned int and bytes_per_datum to use size_t. Signed-off-by: Martin Kelly Cc: Signed-off-by: Jonathan Cameron --- include/linux/iio/buffer_impl.h | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/iio/buffer_impl.h b/include/linux/iio/buffer_impl.h index b9e22b7e2f28..d1171db23742 100644 --- a/include/linux/iio/buffer_impl.h +++ b/include/linux/iio/buffer_impl.h @@ -53,7 +53,7 @@ struct iio_buffer_access_funcs { int (*request_update)(struct iio_buffer *buffer); int (*set_bytes_per_datum)(struct iio_buffer *buffer, size_t bpd); - int (*set_length)(struct iio_buffer *buffer, int length); + int (*set_length)(struct iio_buffer *buffer, unsigned int length); int (*enable)(struct iio_buffer *buffer, struct iio_dev *indio_dev); int (*disable)(struct iio_buffer *buffer, struct iio_dev *indio_dev); @@ -72,10 +72,10 @@ struct iio_buffer_access_funcs { */ struct iio_buffer { /** @length: Number of datums in buffer. */ - int length; + unsigned int length; /** @bytes_per_datum: Size of individual datum including timestamp. */ - int bytes_per_datum; + size_t bytes_per_datum; /** * @access: Buffer access functions associated with the -- cgit v1.2.3 From 72d4d3e3980702809509586d36015b7c3c51fad4 Mon Sep 17 00:00:00 2001 From: Jozsef Kadlecsik Date: Sat, 21 Apr 2018 13:43:48 +0200 Subject: netfilter: Fix handling simultaneous open in TCP conntrack Dominique Martinet reported a TCP hang problem when simultaneous open was used. The problem is that the tcp_conntracks state table is not smart enough to handle the case. The state table could be fixed by introducing a new state, but that would require more lines of code compared to this patch, due to the required backward compatibility with ctnetlink. Signed-off-by: Jozsef Kadlecsik Reported-by: Dominique Martinet Tested-by: Dominique Martinet Signed-off-by: Pablo Neira Ayuso --- include/uapi/linux/netfilter/nf_conntrack_tcp.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/netfilter/nf_conntrack_tcp.h b/include/uapi/linux/netfilter/nf_conntrack_tcp.h index 74b91151d494..bcba72def817 100644 --- a/include/uapi/linux/netfilter/nf_conntrack_tcp.h +++ b/include/uapi/linux/netfilter/nf_conntrack_tcp.h @@ -46,6 +46,9 @@ enum tcp_conntrack { /* Marks possibility for expected RFC5961 challenge ACK */ #define IP_CT_EXP_CHALLENGE_ACK 0x40 +/* Simultaneous open initialized */ +#define IP_CT_TCP_SIMULTANEOUS_OPEN 0x80 + struct nf_ct_tcp_flags { __u8 flags; __u8 mask; -- cgit v1.2.3 From c456442cd3a59eeb1d60293c26cbe2ff2c4e42cf Mon Sep 17 00:00:00 2001 From: Konrad Rzeszutek Wilk Date: Wed, 25 Apr 2018 22:04:20 -0400 Subject: x86/bugs: Expose /sys/../spec_store_bypass Add the sysfs file for the new vulerability. It does not do much except show the words 'Vulnerable' for recent x86 cores. Intel cores prior to family 6 are known not to be vulnerable, and so are some Atoms and some Xeon Phi. It assumes that older Cyrix, Centaur, etc. cores are immune. Signed-off-by: Konrad Rzeszutek Wilk Signed-off-by: Thomas Gleixner Reviewed-by: Borislav Petkov Reviewed-by: Ingo Molnar --- include/linux/cpu.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/cpu.h b/include/linux/cpu.h index 7b01bc11c692..a97a63eef59f 100644 --- a/include/linux/cpu.h +++ b/include/linux/cpu.h @@ -53,6 +53,8 @@ extern ssize_t cpu_show_spectre_v1(struct device *dev, struct device_attribute *attr, char *buf); extern ssize_t cpu_show_spectre_v2(struct device *dev, struct device_attribute *attr, char *buf); +extern ssize_t cpu_show_spec_store_bypass(struct device *dev, + struct device_attribute *attr, char *buf); extern __printf(4, 5) struct device *cpu_device_create(struct device *parent, void *drvdata, -- cgit v1.2.3 From b617cfc858161140d69cc0b5cc211996b557a1c7 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Sun, 29 Apr 2018 15:20:11 +0200 Subject: prctl: Add speculation control prctls Add two new prctls to control aspects of speculation related vulnerabilites and their mitigations to provide finer grained control over performance impacting mitigations. PR_GET_SPECULATION_CTRL returns the state of the speculation misfeature which is selected with arg2 of prctl(2). The return value uses bit 0-2 with the following meaning: Bit Define Description 0 PR_SPEC_PRCTL Mitigation can be controlled per task by PR_SET_SPECULATION_CTRL 1 PR_SPEC_ENABLE The speculation feature is enabled, mitigation is disabled 2 PR_SPEC_DISABLE The speculation feature is disabled, mitigation is enabled If all bits are 0 the CPU is not affected by the speculation misfeature. If PR_SPEC_PRCTL is set, then the per task control of the mitigation is available. If not set, prctl(PR_SET_SPECULATION_CTRL) for the speculation misfeature will fail. PR_SET_SPECULATION_CTRL allows to control the speculation misfeature, which is selected by arg2 of prctl(2) per task. arg3 is used to hand in the control value, i.e. either PR_SPEC_ENABLE or PR_SPEC_DISABLE. The common return values are: EINVAL prctl is not implemented by the architecture or the unused prctl() arguments are not 0 ENODEV arg2 is selecting a not supported speculation misfeature PR_SET_SPECULATION_CTRL has these additional return values: ERANGE arg3 is incorrect, i.e. it's not either PR_SPEC_ENABLE or PR_SPEC_DISABLE ENXIO prctl control of the selected speculation misfeature is disabled The first supported controlable speculation misfeature is PR_SPEC_STORE_BYPASS. Add the define so this can be shared between architectures. Based on an initial patch from Tim Chen and mostly rewritten. Signed-off-by: Thomas Gleixner Reviewed-by: Ingo Molnar Reviewed-by: Konrad Rzeszutek Wilk --- include/linux/nospec.h | 5 +++++ include/uapi/linux/prctl.h | 11 +++++++++++ 2 files changed, 16 insertions(+) (limited to 'include') diff --git a/include/linux/nospec.h b/include/linux/nospec.h index e791ebc65c9c..700bb8a4e4ea 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -55,4 +55,9 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, \ (typeof(_i)) (_i & _mask); \ }) + +/* Speculation control prctl */ +int arch_prctl_spec_ctrl_get(unsigned long which); +int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); + #endif /* _LINUX_NOSPEC_H */ diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index af5f8c2df87a..ebf057ac1346 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -207,4 +207,15 @@ struct prctl_mm_map { # define PR_SVE_VL_LEN_MASK 0xffff # define PR_SVE_VL_INHERIT (1 << 17) /* inherit across exec */ +/* Per task speculation control */ +#define PR_GET_SPECULATION_CTRL 52 +#define PR_SET_SPECULATION_CTRL 53 +/* Speculation control variants */ +# define PR_SPEC_STORE_BYPASS 0 +/* Return and control values for PR_SET/GET_SPECULATION_CTRL */ +# define PR_SPEC_NOT_AFFECTED 0 +# define PR_SPEC_PRCTL (1UL << 0) +# define PR_SPEC_ENABLE (1UL << 1) +# define PR_SPEC_DISABLE (1UL << 2) + #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 7bbf1373e228840bb0295a2ca26d548ef37f448e Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Tue, 1 May 2018 15:19:04 -0700 Subject: nospec: Allow getting/setting on non-current task Adjust arch_prctl_get/set_spec_ctrl() to operate on tasks other than current. This is needed both for /proc/$pid/status queries and for seccomp (since thread-syncing can trigger seccomp in non-current threads). Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner --- include/linux/nospec.h | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) (limited to 'include') diff --git a/include/linux/nospec.h b/include/linux/nospec.h index 700bb8a4e4ea..a908c954484d 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -7,6 +7,8 @@ #define _LINUX_NOSPEC_H #include +struct task_struct; + /** * array_index_mask_nospec() - generate a ~0 mask when index < size, 0 otherwise * @index: array element index @@ -57,7 +59,8 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, }) /* Speculation control prctl */ -int arch_prctl_spec_ctrl_get(unsigned long which); -int arch_prctl_spec_ctrl_set(unsigned long which, unsigned long ctrl); +int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); +int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, + unsigned long ctrl); #endif /* _LINUX_NOSPEC_H */ -- cgit v1.2.3 From 356e4bfff2c5489e016fdb925adbf12a1e3950ee Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Thu, 3 May 2018 22:09:15 +0200 Subject: prctl: Add force disable speculation For certain use cases it is desired to enforce mitigations so they cannot be undone afterwards. That's important for loader stubs which want to prevent a child from disabling the mitigation again. Will also be used for seccomp(). The extra state preserving of the prctl state for SSB is a preparatory step for EBPF dymanic speculation control. Signed-off-by: Thomas Gleixner --- include/linux/sched.h | 10 +++++++++- include/uapi/linux/prctl.h | 1 + 2 files changed, 10 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/sched.h b/include/linux/sched.h index b3d697f3b573..e4218d4deba0 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1393,7 +1393,8 @@ static inline bool is_percpu_thread(void) #define PFA_NO_NEW_PRIVS 0 /* May not gain new privileges. */ #define PFA_SPREAD_PAGE 1 /* Spread page cache over cpuset */ #define PFA_SPREAD_SLAB 2 /* Spread some slab caches over cpuset */ - +#define PFA_SPEC_SSB_DISABLE 3 /* Speculative Store Bypass disabled */ +#define PFA_SPEC_SSB_FORCE_DISABLE 4 /* Speculative Store Bypass force disabled*/ #define TASK_PFA_TEST(name, func) \ static inline bool task_##func(struct task_struct *p) \ @@ -1418,6 +1419,13 @@ TASK_PFA_TEST(SPREAD_SLAB, spread_slab) TASK_PFA_SET(SPREAD_SLAB, spread_slab) TASK_PFA_CLEAR(SPREAD_SLAB, spread_slab) +TASK_PFA_TEST(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_SET(SPEC_SSB_DISABLE, spec_ssb_disable) +TASK_PFA_CLEAR(SPEC_SSB_DISABLE, spec_ssb_disable) + +TASK_PFA_TEST(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) +TASK_PFA_SET(SPEC_SSB_FORCE_DISABLE, spec_ssb_force_disable) + static inline void current_restore_flags(unsigned long orig_flags, unsigned long flags) { diff --git a/include/uapi/linux/prctl.h b/include/uapi/linux/prctl.h index ebf057ac1346..db9f15f5db04 100644 --- a/include/uapi/linux/prctl.h +++ b/include/uapi/linux/prctl.h @@ -217,5 +217,6 @@ struct prctl_mm_map { # define PR_SPEC_PRCTL (1UL << 0) # define PR_SPEC_ENABLE (1UL << 1) # define PR_SPEC_DISABLE (1UL << 2) +# define PR_SPEC_FORCE_DISABLE (1UL << 3) #endif /* _LINUX_PRCTL_H */ -- cgit v1.2.3 From 00a02d0c502a06d15e07b857f8ff921e3e402675 Mon Sep 17 00:00:00 2001 From: Kees Cook Date: Thu, 3 May 2018 14:56:12 -0700 Subject: seccomp: Add filter flag to opt-out of SSB mitigation If a seccomp user is not interested in Speculative Store Bypass mitigation by default, it can set the new SECCOMP_FILTER_FLAG_SPEC_ALLOW flag when adding filters. Signed-off-by: Kees Cook Signed-off-by: Thomas Gleixner --- include/linux/seccomp.h | 5 +++-- include/uapi/linux/seccomp.h | 5 +++-- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'include') diff --git a/include/linux/seccomp.h b/include/linux/seccomp.h index c723a5c4e3ff..e5320f6c8654 100644 --- a/include/linux/seccomp.h +++ b/include/linux/seccomp.h @@ -4,8 +4,9 @@ #include -#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ - SECCOMP_FILTER_FLAG_LOG) +#define SECCOMP_FILTER_FLAG_MASK (SECCOMP_FILTER_FLAG_TSYNC | \ + SECCOMP_FILTER_FLAG_LOG | \ + SECCOMP_FILTER_FLAG_SPEC_ALLOW) #ifdef CONFIG_SECCOMP diff --git a/include/uapi/linux/seccomp.h b/include/uapi/linux/seccomp.h index 2a0bd9dd104d..9efc0e73d50b 100644 --- a/include/uapi/linux/seccomp.h +++ b/include/uapi/linux/seccomp.h @@ -17,8 +17,9 @@ #define SECCOMP_GET_ACTION_AVAIL 2 /* Valid flags for SECCOMP_SET_MODE_FILTER */ -#define SECCOMP_FILTER_FLAG_TSYNC 1 -#define SECCOMP_FILTER_FLAG_LOG 2 +#define SECCOMP_FILTER_FLAG_TSYNC (1UL << 0) +#define SECCOMP_FILTER_FLAG_LOG (1UL << 1) +#define SECCOMP_FILTER_FLAG_SPEC_ALLOW (1UL << 2) /* * All BPF programs must return a 32-bit value. -- cgit v1.2.3 From 8bf37d8c067bb7eb8e7c381bdadf9bd89182b6bc Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Fri, 4 May 2018 15:12:06 +0200 Subject: seccomp: Move speculation migitation control to arch code The migitation control is simpler to implement in architecture code as it avoids the extra function call to check the mode. Aside of that having an explicit seccomp enabled mode in the architecture mitigations would require even more workarounds. Move it into architecture code and provide a weak function in the seccomp code. Remove the 'which' argument as this allows the architecture to decide which mitigations are relevant for seccomp. Signed-off-by: Thomas Gleixner --- include/linux/nospec.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/linux/nospec.h b/include/linux/nospec.h index a908c954484d..0c5ef54fd416 100644 --- a/include/linux/nospec.h +++ b/include/linux/nospec.h @@ -62,5 +62,7 @@ static inline unsigned long array_index_mask_nospec(unsigned long index, int arch_prctl_spec_ctrl_get(struct task_struct *task, unsigned long which); int arch_prctl_spec_ctrl_set(struct task_struct *task, unsigned long which, unsigned long ctrl); +/* Speculation control for seccomp enforced mitigation */ +void arch_seccomp_spec_mitigate(struct task_struct *task); #endif /* _LINUX_NOSPEC_H */ -- cgit v1.2.3 From bb7b40aecbf778c0c83a5bd62b0f03ca9f49a618 Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Tue, 8 May 2018 02:43:57 +0200 Subject: netfilter: nf_tables: bogus EBUSY in chain deletions When removing a rule that jumps to chain and such chain in the same batch, this bogusly hits EBUSY. Add activate and deactivate operations to expression that can be called from the preparation and the commit/abort phases. Signed-off-by: Pablo Neira Ayuso --- include/net/netfilter/nf_tables.h | 5 +++++ 1 file changed, 5 insertions(+) (limited to 'include') diff --git a/include/net/netfilter/nf_tables.h b/include/net/netfilter/nf_tables.h index cd368d1b8cb8..a1e28dd5d0bf 100644 --- a/include/net/netfilter/nf_tables.h +++ b/include/net/netfilter/nf_tables.h @@ -170,6 +170,7 @@ struct nft_data_desc { int nft_data_init(const struct nft_ctx *ctx, struct nft_data *data, unsigned int size, struct nft_data_desc *desc, const struct nlattr *nla); +void nft_data_hold(const struct nft_data *data, enum nft_data_types type); void nft_data_release(const struct nft_data *data, enum nft_data_types type); int nft_data_dump(struct sk_buff *skb, int attr, const struct nft_data *data, enum nft_data_types type, unsigned int len); @@ -736,6 +737,10 @@ struct nft_expr_ops { int (*init)(const struct nft_ctx *ctx, const struct nft_expr *expr, const struct nlattr * const tb[]); + void (*activate)(const struct nft_ctx *ctx, + const struct nft_expr *expr); + void (*deactivate)(const struct nft_ctx *ctx, + const struct nft_expr *expr); void (*destroy)(const struct nft_ctx *ctx, const struct nft_expr *expr); int (*dump)(struct sk_buff *skb, -- cgit v1.2.3 From 1e2e547a93a00ebc21582c06ca3c6cfea2a309ee Mon Sep 17 00:00:00 2001 From: Al Viro Date: Fri, 4 May 2018 08:23:01 -0400 Subject: do d_instantiate/unlock_new_inode combinations safely For anything NFS-exported we do _not_ want to unlock new inode before it has grown an alias; original set of fixes got the ordering right, but missed the nasty complication in case of lockdep being enabled - unlock_new_inode() does lockdep_annotate_inode_mutex_key(inode) which can only be done before anyone gets a chance to touch ->i_mutex. Unfortunately, flipping the order and doing unlock_new_inode() before d_instantiate() opens a window when mkdir can race with open-by-fhandle on a guessed fhandle, leading to multiple aliases for a directory inode and all the breakage that follows from that. Correct solution: a new primitive (d_instantiate_new()) combining these two in the right order - lockdep annotate, then d_instantiate(), then the rest of unlock_new_inode(). All combinations of d_instantiate() with unlock_new_inode() should be converted to that. Cc: stable@kernel.org # 2.6.29 and later Tested-by: Mike Marshall Reviewed-by: Andreas Dilger Signed-off-by: Al Viro --- include/linux/dcache.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/dcache.h b/include/linux/dcache.h index 94acbde17bb1..66c6e17e61e5 100644 --- a/include/linux/dcache.h +++ b/include/linux/dcache.h @@ -224,6 +224,7 @@ extern seqlock_t rename_lock; * These are the low-level FS interfaces to the dcache.. */ extern void d_instantiate(struct dentry *, struct inode *); +extern void d_instantiate_new(struct dentry *, struct inode *); extern struct dentry * d_instantiate_unique(struct dentry *, struct inode *); extern struct dentry * d_instantiate_anon(struct dentry *, struct inode *); extern int d_instantiate_no_diralias(struct dentry *, struct inode *); -- cgit v1.2.3 From 8e907ed4882714fd13cfe670681fc6cb5284c780 Mon Sep 17 00:00:00 2001 From: Lidong Chen Date: Tue, 8 May 2018 16:50:16 +0800 Subject: IB/umem: Use the correct mm during ib_umem_release User-space may invoke ibv_reg_mr and ibv_dereg_mr in different threads. If ibv_dereg_mr is called after the thread which invoked ibv_reg_mr has exited, get_pid_task will return NULL and ib_umem_release will not decrease mm->pinned_vm. Instead of using threads to locate the mm, use the overall tgid from the ib_ucontext struct instead. This matches the behavior of ODP and disassociate in handling the mm of the process that called ibv_reg_mr. Cc: Fixes: 87773dd56d54 ("IB: ib_umem_release() should decrement mm->pinned_vm from ib_umem_get") Signed-off-by: Lidong Chen Signed-off-by: Jason Gunthorpe --- include/rdma/ib_umem.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include') diff --git a/include/rdma/ib_umem.h b/include/rdma/ib_umem.h index 23159dd5be18..a1fd63871d17 100644 --- a/include/rdma/ib_umem.h +++ b/include/rdma/ib_umem.h @@ -48,7 +48,6 @@ struct ib_umem { int writable; int hugetlb; struct work_struct work; - struct pid *pid; struct mm_struct *mm; unsigned long diff; struct ib_umem_odp *odp_data; -- cgit v1.2.3 From e3ca34880652250f524022ad89e516f8ba9a805b Mon Sep 17 00:00:00 2001 From: Saeed Mahameed Date: Mon, 14 May 2018 15:38:10 -0700 Subject: net/mlx5: Fix build break when CONFIG_SMP=n MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Avoid using the kernel's irq_descriptor and return IRQ vector affinity directly from the driver. This fixes the following build break when CONFIG_SMP=n include/linux/mlx5/driver.h: In function ‘mlx5_get_vector_affinity_hint’: include/linux/mlx5/driver.h:1299:13: error: ‘struct irq_desc’ has no member named ‘affinity_hint’ Fixes: 6082d9c9c94a ("net/mlx5: Fix mlx5_get_vector_affinity function") Signed-off-by: Saeed Mahameed CC: Randy Dunlap CC: Guenter Roeck CC: Thomas Gleixner Tested-by: Israel Rukshin Reported-by: kbuild test robot Reported-by: Randy Dunlap Tested-by: Randy Dunlap Acked-by: Thomas Gleixner Tested-by: Guenter Roeck Signed-off-by: David S. Miller --- include/linux/mlx5/driver.h | 12 +----------- 1 file changed, 1 insertion(+), 11 deletions(-) (limited to 'include') diff --git a/include/linux/mlx5/driver.h b/include/linux/mlx5/driver.h index 2a156c5dfadd..d703774982ca 100644 --- a/include/linux/mlx5/driver.h +++ b/include/linux/mlx5/driver.h @@ -1286,17 +1286,7 @@ enum { static inline const struct cpumask * mlx5_get_vector_affinity_hint(struct mlx5_core_dev *dev, int vector) { - struct irq_desc *desc; - unsigned int irq; - int eqn; - int err; - - err = mlx5_vector2eqn(dev, vector, &eqn, &irq); - if (err) - return NULL; - - desc = irq_to_desc(irq); - return desc->affinity_hint; + return dev->priv.irq_info[vector].mask; } #endif /* MLX5_DRIVER_H */ -- cgit v1.2.3 From 8ab6ffba14a466c7298cb3fd5066d774d2977ad1 Mon Sep 17 00:00:00 2001 From: Matt Mullins Date: Wed, 16 May 2018 10:48:40 -0700 Subject: tls: don't use stack memory in a scatterlist scatterlist code expects virt_to_page() to work, which fails with CONFIG_VMAP_STACK=y. Fixes: c46234ebb4d1e ("tls: RX path for ktls") Signed-off-by: Matt Mullins Signed-off-by: David S. Miller --- include/net/tls.h | 3 +++ 1 file changed, 3 insertions(+) (limited to 'include') diff --git a/include/net/tls.h b/include/net/tls.h index b400d0bb7448..f5fb16da3860 100644 --- a/include/net/tls.h +++ b/include/net/tls.h @@ -97,6 +97,9 @@ struct tls_sw_context { u8 control; bool decrypted; + char rx_aad_ciphertext[TLS_AAD_SPACE_SIZE]; + char rx_aad_plaintext[TLS_AAD_SPACE_SIZE]; + /* Sending context */ char aad_space[TLS_AAD_SPACE_SIZE]; -- cgit v1.2.3 From 814596495dd2b9d4aab92d8f89cf19060d25d2ea Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Mon, 14 May 2018 20:09:24 -0700 Subject: cfg80211: further limit wiphy names to 64 bytes wiphy names were recently limited to 128 bytes by commit a7cfebcb7594 ("cfg80211: limit wiphy names to 128 bytes"). As it turns out though, this isn't sufficient because dev_vprintk_emit() needs the syslog header string "SUBSYSTEM=ieee80211\0DEVICE=+ieee80211:$devname" to fit into 128 bytes. This triggered the "device/subsystem name too long" WARN when the device name was >= 90 bytes. As before, this was reproduced by syzbot by sending an HWSIM_CMD_NEW_RADIO command to the MAC80211_HWSIM generic netlink family. Fix it by further limiting wiphy names to 64 bytes. Reported-by: syzbot+e64565577af34b3768dc@syzkaller.appspotmail.com Fixes: a7cfebcb7594 ("cfg80211: limit wiphy names to 128 bytes") Signed-off-by: Eric Biggers Signed-off-by: Johannes Berg --- include/uapi/linux/nl80211.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/nl80211.h b/include/uapi/linux/nl80211.h index 9c3630146cec..271b93783d28 100644 --- a/include/uapi/linux/nl80211.h +++ b/include/uapi/linux/nl80211.h @@ -2698,7 +2698,7 @@ enum nl80211_attrs { #define NL80211_ATTR_KEYS NL80211_ATTR_KEYS #define NL80211_ATTR_FEATURE_FLAGS NL80211_ATTR_FEATURE_FLAGS -#define NL80211_WIPHY_NAME_MAXLEN 128 +#define NL80211_WIPHY_NAME_MAXLEN 64 #define NL80211_MAX_SUPP_RATES 32 #define NL80211_MAX_SUPP_HT_RATES 77 -- cgit v1.2.3 From af86ca4e3088fe5eacf2f7e58c01fa68ca067672 Mon Sep 17 00:00:00 2001 From: Alexei Starovoitov Date: Tue, 15 May 2018 09:27:05 -0700 Subject: bpf: Prevent memory disambiguation attack Detect code patterns where malicious 'speculative store bypass' can be used and sanitize such patterns. 39: (bf) r3 = r10 40: (07) r3 += -216 41: (79) r8 = *(u64 *)(r7 +0) // slow read 42: (7a) *(u64 *)(r10 -72) = 0 // verifier inserts this instruction 43: (7b) *(u64 *)(r8 +0) = r3 // this store becomes slow due to r8 44: (79) r1 = *(u64 *)(r6 +0) // cpu speculatively executes this load 45: (71) r2 = *(u8 *)(r1 +0) // speculatively arbitrary 'load byte' // is now sanitized Above code after x86 JIT becomes: e5: mov %rbp,%rdx e8: add $0xffffffffffffff28,%rdx ef: mov 0x0(%r13),%r14 f3: movq $0x0,-0x48(%rbp) fb: mov %rdx,0x0(%r14) ff: mov 0x0(%rbx),%rdi 103: movzbq 0x0(%rdi),%rsi Signed-off-by: Alexei Starovoitov Signed-off-by: Thomas Gleixner --- include/linux/bpf_verifier.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7e61c395fddf..65cfc2f59db9 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -146,6 +146,7 @@ struct bpf_insn_aux_data { s32 call_imm; /* saved imm field of call insn */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ + int sanitize_stack_off; /* stack slot to be cleared */ bool seen; /* this insn was processed by the verifier */ }; -- cgit v1.2.3 From 644fbdeacf1d3edd366e44b8ba214de9d1dd66a9 Mon Sep 17 00:00:00 2001 From: Xin Long Date: Sun, 20 May 2018 16:39:10 +0800 Subject: sctp: fix the issue that flags are ignored when using kernel_connect Now sctp uses inet_dgram_connect as its proto_ops .connect, and the flags param can't be passed into its proto .connect where this flags is really needed. sctp works around it by getting flags from socket file in __sctp_connect. It works for connecting from userspace, as inherently the user sock has socket file and it passes f_flags as the flags param into the proto_ops .connect. However, the sock created by sock_create_kern doesn't have a socket file, and it passes the flags (like O_NONBLOCK) by using the flags param in kernel_connect, which calls proto_ops .connect later. So to fix it, this patch defines a new proto_ops .connect for sctp, sctp_inet_connect, which calls __sctp_connect() directly with this flags param. After this, the sctp's proto .connect can be removed. Note that sctp_inet_connect doesn't need to do some checks that are not needed for sctp, which makes thing better than with inet_dgram_connect. Suggested-by: Marcelo Ricardo Leitner Signed-off-by: Xin Long Acked-by: Neil Horman Acked-by: Marcelo Ricardo Leitner Reviewed-by: Michal Kubecek Signed-off-by: David S. Miller --- include/net/sctp/sctp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/net/sctp/sctp.h b/include/net/sctp/sctp.h index 28b996d63490..35498e613ff5 100644 --- a/include/net/sctp/sctp.h +++ b/include/net/sctp/sctp.h @@ -103,6 +103,8 @@ void sctp_addr_wq_mgmt(struct net *, struct sctp_sockaddr_entry *, int); /* * sctp/socket.c */ +int sctp_inet_connect(struct socket *sock, struct sockaddr *uaddr, + int addr_len, int flags); int sctp_backlog_rcv(struct sock *sk, struct sk_buff *skb); int sctp_inet_listen(struct socket *sock, int backlog); void sctp_write_space(struct sock *sk); -- cgit v1.2.3 From f4602cbb0a2478dda8238a4f382867da425daa8e Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Tue, 22 May 2018 15:56:51 -0600 Subject: IB/uverbs: Fix uverbs_attr_get_obj The err pointer comes from uverbs_attr_get, not from the uobject member, which does not store an ERR_PTR. Fixes: be934cca9e98 ("IB/uverbs: Add device memory registration ioctl support") Signed-off-by: Jason Gunthorpe Reviewed-by: Leon Romanovsky --- include/rdma/uverbs_ioctl.h | 10 +++++----- 1 file changed, 5 insertions(+), 5 deletions(-) (limited to 'include') diff --git a/include/rdma/uverbs_ioctl.h b/include/rdma/uverbs_ioctl.h index 4a4201d997a7..095383a4bd1a 100644 --- a/include/rdma/uverbs_ioctl.h +++ b/include/rdma/uverbs_ioctl.h @@ -411,13 +411,13 @@ static inline int uverbs_attr_get_enum_id(const struct uverbs_attr_bundle *attrs static inline void *uverbs_attr_get_obj(const struct uverbs_attr_bundle *attrs_bundle, u16 idx) { - struct ib_uobject *uobj = - uverbs_attr_get(attrs_bundle, idx)->obj_attr.uobject; + const struct uverbs_attr *attr; - if (IS_ERR(uobj)) - return uobj; + attr = uverbs_attr_get(attrs_bundle, idx); + if (IS_ERR(attr)) + return ERR_CAST(attr); - return uobj->object; + return attr->obj_attr.uobject->object; } static inline int uverbs_copy_to(const struct uverbs_attr_bundle *attrs_bundle, -- cgit v1.2.3 From c93552c443ebc63b14e26e46d2e76941c88e0d71 Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Thu, 24 May 2018 02:32:53 +0200 Subject: bpf: properly enforce index mask to prevent out-of-bounds speculation While reviewing the verifier code, I recently noticed that the following two program variants in relation to tail calls can be loaded. Variant 1: # bpftool p d x i 15 0: (15) if r1 == 0x0 goto pc+3 1: (18) r2 = map[id:5] 3: (05) goto pc+2 4: (18) r2 = map[id:6] 6: (b7) r3 = 7 7: (35) if r3 >= 0xa0 goto pc+2 8: (54) (u32) r3 &= (u32) 255 9: (85) call bpf_tail_call#12 10: (b7) r0 = 1 11: (95) exit # bpftool m s i 5 5: prog_array flags 0x0 key 4B value 4B max_entries 4 memlock 4096B # bpftool m s i 6 6: prog_array flags 0x0 key 4B value 4B max_entries 160 memlock 4096B Variant 2: # bpftool p d x i 20 0: (15) if r1 == 0x0 goto pc+3 1: (18) r2 = map[id:8] 3: (05) goto pc+2 4: (18) r2 = map[id:7] 6: (b7) r3 = 7 7: (35) if r3 >= 0x4 goto pc+2 8: (54) (u32) r3 &= (u32) 3 9: (85) call bpf_tail_call#12 10: (b7) r0 = 1 11: (95) exit # bpftool m s i 8 8: prog_array flags 0x0 key 4B value 4B max_entries 160 memlock 4096B # bpftool m s i 7 7: prog_array flags 0x0 key 4B value 4B max_entries 4 memlock 4096B In both cases the index masking inserted by the verifier in order to control out of bounds speculation from a CPU via b2157399cc98 ("bpf: prevent out-of-bounds speculation") seems to be incorrect in what it is enforcing. In the 1st variant, the mask is applied from the map with the significantly larger number of entries where we would allow to a certain degree out of bounds speculation for the smaller map, and in the 2nd variant where the mask is applied from the map with the smaller number of entries, we get buggy behavior since we truncate the index of the larger map. The original intent from commit b2157399cc98 is to reject such occasions where two or more different tail call maps are used in the same tail call helper invocation. However, the check on the BPF_MAP_PTR_POISON is never hit since we never poisoned the saved pointer in the first place! We do this explicitly for map lookups but in case of tail calls we basically used the tail call map in insn_aux_data that was processed in the most recent path which the verifier walked. Thus any prior path that stored a pointer in insn_aux_data at the helper location was always overridden. Fix it by moving the map pointer poison logic into a small helper that covers both BPF helpers with the same logic. After that in fixup_bpf_calls() the poison check is then hit for tail calls and the program rejected. Latter only happens in unprivileged case since this is the *only* occasion where a rewrite needs to happen, and where such rewrite is specific to the map (max_entries, index_mask). In the privileged case the rewrite is generic for the insn->imm / insn->code update so multiple maps from different paths can be handled just fine since all the remaining logic happens in the instruction processing itself. This is similar to the case of map lookups: in case there is a collision of maps in fixup_bpf_calls() we must skip the inlined rewrite since this will turn the generic instruction sequence into a non- generic one. Thus the patch_call_imm will simply update the insn->imm location where the bpf_map_lookup_elem() will later take care of the dispatch. Given we need this 'poison' state as a check, the information of whether a map is an unpriv_array gets lost, so enforcing it prior to that needs an additional state. In general this check is needed since there are some complex and tail call intensive BPF programs out there where LLVM tends to generate such code occasionally. We therefore convert the map_ptr rather into map_state to store all this w/o extra memory overhead, and the bit whether one of the maps involved in the collision was from an unpriv_array thus needs to be retained as well there. Fixes: b2157399cc98 ("bpf: prevent out-of-bounds speculation") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/linux/bpf_verifier.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/bpf_verifier.h b/include/linux/bpf_verifier.h index 7e61c395fddf..52fb077d3c45 100644 --- a/include/linux/bpf_verifier.h +++ b/include/linux/bpf_verifier.h @@ -142,7 +142,7 @@ struct bpf_verifier_state_list { struct bpf_insn_aux_data { union { enum bpf_reg_type ptr_type; /* pointer type for load/store insns */ - struct bpf_map *map_ptr; /* pointer for call insn into lookup_elem */ + unsigned long map_state; /* pointer/poison value for maps */ s32 call_imm; /* saved imm field of call insn */ }; int ctx_field_size; /* the ctx field size for load insn, maybe 0 */ -- cgit v1.2.3 From d883c6cf3b39f1f42506e82ad2779fb88004acf3 Mon Sep 17 00:00:00 2001 From: Joonsoo Kim Date: Wed, 23 May 2018 10:18:21 +0900 Subject: Revert "mm/cma: manage the memory of the CMA area by using the ZONE_MOVABLE" MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This reverts the following commits that change CMA design in MM. 3d2054ad8c2d ("ARM: CMA: avoid double mapping to the CMA area if CONFIG_HIGHMEM=y") 1d47a3ec09b5 ("mm/cma: remove ALLOC_CMA") bad8c6c0b114 ("mm/cma: manage the memory of the CMA area by using the ZONE_MOVABLE") Ville reported a following error on i386. Inode-cache hash table entries: 65536 (order: 6, 262144 bytes) microcode: microcode updated early to revision 0x4, date = 2013-06-28 Initializing CPU#0 Initializing HighMem for node 0 (000377fe:00118000) Initializing Movable for node 0 (00000001:00118000) BUG: Bad page state in process swapper pfn:377fe page:f53effc0 count:0 mapcount:-127 mapping:00000000 index:0x0 flags: 0x80000000() raw: 80000000 00000000 00000000 ffffff80 00000000 00000100 00000200 00000001 page dumped because: nonzero mapcount Modules linked in: CPU: 0 PID: 0 Comm: swapper Not tainted 4.17.0-rc5-elk+ #145 Hardware name: Dell Inc. Latitude E5410/03VXMC, BIOS A15 07/11/2013 Call Trace: dump_stack+0x60/0x96 bad_page+0x9a/0x100 free_pages_check_bad+0x3f/0x60 free_pcppages_bulk+0x29d/0x5b0 free_unref_page_commit+0x84/0xb0 free_unref_page+0x3e/0x70 __free_pages+0x1d/0x20 free_highmem_page+0x19/0x40 add_highpages_with_active_regions+0xab/0xeb set_highmem_pages_init+0x66/0x73 mem_init+0x1b/0x1d7 start_kernel+0x17a/0x363 i386_start_kernel+0x95/0x99 startup_32_smp+0x164/0x168 The reason for this error is that the span of MOVABLE_ZONE is extended to whole node span for future CMA initialization, and, normal memory is wrongly freed here. I submitted the fix and it seems to work, but, another problem happened. It's so late time to fix the later problem so I decide to reverting the series. Reported-by: Ville Syrjälä Acked-by: Laura Abbott Acked-by: Michal Hocko Cc: Andrew Morton Signed-off-by: Joonsoo Kim Signed-off-by: Linus Torvalds --- include/linux/memory_hotplug.h | 3 +++ include/linux/mm.h | 1 - 2 files changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/memory_hotplug.h b/include/linux/memory_hotplug.h index e0e49b5b1ee1..2b0265265c28 100644 --- a/include/linux/memory_hotplug.h +++ b/include/linux/memory_hotplug.h @@ -216,6 +216,9 @@ void put_online_mems(void); void mem_hotplug_begin(void); void mem_hotplug_done(void); +extern void set_zone_contiguous(struct zone *zone); +extern void clear_zone_contiguous(struct zone *zone); + #else /* ! CONFIG_MEMORY_HOTPLUG */ #define pfn_to_online_page(pfn) \ ({ \ diff --git a/include/linux/mm.h b/include/linux/mm.h index c6fa9a255dbf..02a616e2f17d 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -2109,7 +2109,6 @@ extern void setup_per_cpu_pageset(void); extern void zone_pcp_update(struct zone *zone); extern void zone_pcp_reset(struct zone *zone); -extern void setup_zone_pageset(struct zone *zone); /* page_alloc.c */ extern int min_free_kbytes; -- cgit v1.2.3 From af8d3c7c001ae7df1ed2b2715f058113efc86187 Mon Sep 17 00:00:00 2001 From: Eric Biggers Date: Wed, 23 May 2018 14:37:38 -0700 Subject: ppp: remove the PPPIOCDETACH ioctl The PPPIOCDETACH ioctl effectively tries to "close" the given ppp file before f_count has reached 0, which is fundamentally a bad idea. It does check 'f_count < 2', which excludes concurrent operations on the file since they would only be possible with a shared fd table, in which case each fdget() would take a file reference. However, it fails to account for the fact that even with 'f_count == 1' the file can still be linked into epoll instances. As reported by syzbot, this can trivially be used to cause a use-after-free. Yet, the only known user of PPPIOCDETACH is pppd versions older than ppp-2.4.2, which was released almost 15 years ago (November 2003). Also, PPPIOCDETACH apparently stopped working reliably at around the same time, when the f_count check was added to the kernel, e.g. see https://lkml.org/lkml/2002/12/31/83. Also, the current 'f_count < 2' check makes PPPIOCDETACH only work in single-threaded applications; it always fails if called from a multithreaded application. All pppd versions released in the last 15 years just close() the file descriptor instead. Therefore, instead of hacking around this bug by exporting epoll internals to modules, and probably missing other related bugs, just remove the PPPIOCDETACH ioctl and see if anyone actually notices. Leave a stub in place that prints a one-time warning and returns EINVAL. Reported-by: syzbot+16363c99d4134717c05b@syzkaller.appspotmail.com Fixes: 1da177e4c3f4 ("Linux-2.6.12-rc2") Signed-off-by: Eric Biggers Acked-by: Paul Mackerras Reviewed-by: Guillaume Nault Tested-by: Guillaume Nault Signed-off-by: David S. Miller --- include/uapi/linux/ppp-ioctl.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/uapi/linux/ppp-ioctl.h b/include/uapi/linux/ppp-ioctl.h index b19a9c249b15..784c2e3e572e 100644 --- a/include/uapi/linux/ppp-ioctl.h +++ b/include/uapi/linux/ppp-ioctl.h @@ -106,7 +106,7 @@ struct pppol2tp_ioc_stats { #define PPPIOCGIDLE _IOR('t', 63, struct ppp_idle) /* get idle time */ #define PPPIOCNEWUNIT _IOWR('t', 62, int) /* create new ppp unit */ #define PPPIOCATTACH _IOW('t', 61, int) /* attach to ppp unit */ -#define PPPIOCDETACH _IOW('t', 60, int) /* detach from ppp unit/chan */ +#define PPPIOCDETACH _IOW('t', 60, int) /* obsolete, do not use */ #define PPPIOCSMRRU _IOW('t', 59, int) /* set multilink MRU */ #define PPPIOCCONNECT _IOW('t', 58, int) /* connect channel to unit */ #define PPPIOCDISCONN _IO('t', 57) /* disconnect channel */ -- cgit v1.2.3 From 4ff648decf4712d39f184fc2df3163f43975575a Mon Sep 17 00:00:00 2001 From: Sebastian Andrzej Siewior Date: Thu, 24 May 2018 15:26:48 +0200 Subject: sched, tracing: Fix trace_sched_pi_setprio() for deboosting MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Since the following commit: b91473ff6e97 ("sched,tracing: Update trace_sched_pi_setprio()") the sched_pi_setprio trace point shows the "newprio" during a deboost: |futex sched_pi_setprio: comm=futex_requeue_p pid"34 oldprio˜ newprio=3D98 |futex sched_switch: prev_comm=futex_requeue_p prev_pid"34 prev_prio=120 This patch open codes __rt_effective_prio() in the tracepoint as the 'newprio' to get the old behaviour back / the correct priority: |futex sched_pi_setprio: comm=futex_requeue_p pid"20 oldprio˜ newprio=3D120 |futex sched_switch: prev_comm=futex_requeue_p prev_pid"20 prev_prio=120 Peter suggested to open code the new priority so people using tracehook could get the deadline data out. Reported-by: Mansky Christian Signed-off-by: Sebastian Andrzej Siewior Signed-off-by: Peter Zijlstra (Intel) Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Steven Rostedt Cc: Thomas Gleixner Fixes: b91473ff6e97 ("sched,tracing: Update trace_sched_pi_setprio()") Link: http://lkml.kernel.org/r/20180524132647.gg6ziuogczdmjjzu@linutronix.de Signed-off-by: Ingo Molnar --- include/trace/events/sched.h | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'include') diff --git a/include/trace/events/sched.h b/include/trace/events/sched.h index bc01e06bc716..0be866c91f62 100644 --- a/include/trace/events/sched.h +++ b/include/trace/events/sched.h @@ -435,7 +435,9 @@ TRACE_EVENT(sched_pi_setprio, memcpy(__entry->comm, tsk->comm, TASK_COMM_LEN); __entry->pid = tsk->pid; __entry->oldprio = tsk->prio; - __entry->newprio = pi_task ? pi_task->prio : tsk->prio; + __entry->newprio = pi_task ? + min(tsk->normal_prio, pi_task->prio) : + tsk->normal_prio; /* XXX SCHED_DEADLINE bits missing */ ), -- cgit v1.2.3 From 8addc2d00fe171c89b5fcbafe583c2b90574d301 Mon Sep 17 00:00:00 2001 From: Michal Hocko Date: Fri, 25 May 2018 14:47:46 -0700 Subject: mm: do not warn on offline nodes unless the specific node is explicitly requested Oscar has noticed that we splat WARNING: CPU: 0 PID: 64 at ./include/linux/gfp.h:467 vmemmap_alloc_block+0x4e/0xc9 [...] CPU: 0 PID: 64 Comm: kworker/u4:1 Tainted: G W E 4.17.0-rc5-next-20180517-1-default+ #66 Hardware name: QEMU Standard PC (i440FX + PIIX, 1996), BIOS 1.0.0-prebuilt.qemu-project.org 04/01/2014 Workqueue: kacpi_hotplug acpi_hotplug_work_fn Call Trace: vmemmap_populate+0xf2/0x2ae sparse_mem_map_populate+0x28/0x35 sparse_add_one_section+0x4c/0x187 __add_pages+0xe7/0x1a0 add_pages+0x16/0x70 add_memory_resource+0xa3/0x1d0 add_memory+0xe4/0x110 acpi_memory_device_add+0x134/0x2e0 acpi_bus_attach+0xd9/0x190 acpi_bus_scan+0x37/0x70 acpi_device_hotplug+0x389/0x4e0 acpi_hotplug_work_fn+0x1a/0x30 process_one_work+0x146/0x340 worker_thread+0x47/0x3e0 kthread+0xf5/0x130 ret_from_fork+0x35/0x40 when adding memory to a node that is currently offline. The VM_WARN_ON is just too loud without a good reason. In this particular case we are doing alloc_pages_node(node, GFP_KERNEL|__GFP_RETRY_MAYFAIL|__GFP_NOWARN, order) so we do not insist on allocating from the given node (it is more a hint) so we can fall back to any other populated node and moreover we explicitly ask to not warn for the allocation failure. Soften the warning only to cases when somebody asks for the given node explicitly by __GFP_THISNODE. Link: http://lkml.kernel.org/r/20180523125555.30039-3-mhocko@kernel.org Signed-off-by: Michal Hocko Reported-by: Oscar Salvador Tested-by: Oscar Salvador Reviewed-by: Pavel Tatashin Cc: Vlastimil Babka Cc: Reza Arbab Cc: Igor Mammedov Cc: Vitaly Kuznetsov Cc: Anshuman Khandual Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/gfp.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/linux/gfp.h b/include/linux/gfp.h index 1a4582b44d32..fc5ab85278d5 100644 --- a/include/linux/gfp.h +++ b/include/linux/gfp.h @@ -464,7 +464,7 @@ static inline struct page * __alloc_pages_node(int nid, gfp_t gfp_mask, unsigned int order) { VM_BUG_ON(nid < 0 || nid >= MAX_NUMNODES); - VM_WARN_ON(!node_online(nid)); + VM_WARN_ON((gfp_mask & __GFP_THISNODE) && !node_online(nid)); return __alloc_pages(gfp_mask, order, nid); } -- cgit v1.2.3 From a21558618c5dfc55b6086743a88ce5a9c1588f0a Mon Sep 17 00:00:00 2001 From: Jonathan Cameron Date: Fri, 25 May 2018 14:47:53 -0700 Subject: mm/memory_hotplug: fix leftover use of struct page during hotplug The case of a new numa node got missed in avoiding using the node info from page_struct during hotplug. In this path we have a call to register_mem_sect_under_node (which allows us to specify it is hotplug so don't change the node), via link_mem_sections which unfortunately does not. Fix is to pass check_nid through link_mem_sections as well and disable it in the new numa node path. Note the bug only 'sometimes' manifests depending on what happens to be in the struct page structures - there are lots of them and it only needs to match one of them. The result of the bug is that (with a new memory only node) we never successfully call register_mem_sect_under_node so don't get the memory associated with the node in sysfs and meminfo for the node doesn't report it. It came up whilst testing some arm64 hotplug patches, but appears to be universal. Whilst I'm triggering it by removing then reinserting memory to a node with no other elements (thus making the node disappear then appear again), it appears it would happen on hotplugging memory where there was none before and it doesn't seem to be related the arm64 patches. These patches call __add_pages (where most of the issue was fixed by Pavel's patch). If there is a node at the time of the __add_pages call then all is well as it calls register_mem_sect_under_node from there with check_nid set to false. Without a node that function returns having not done the sysfs related stuff as there is no node to use. This is expected but it is the resulting path that fails... Exact path to the problem is as follows: mm/memory_hotplug.c: add_memory_resource() The node is not online so we enter the 'if (new_node)' twice, on the second such block there is a call to link_mem_sections which calls into drivers/node.c: link_mem_sections() which calls drivers/node.c: register_mem_sect_under_node() which calls get_nid_for_pfn and keeps trying until the output of that matches the expected node (passed all the way down from add_memory_resource) It is effectively the same fix as the one referred to in the fixes tag just in the code path for a new node where the comments point out we have to rerun the link creation because it will have failed in register_new_memory (as there was no node at the time). (actually that comment is wrong now as we don't have register_new_memory any more it got renamed to hotplug_memory_register in Pavel's patch). Link: http://lkml.kernel.org/r/20180504085311.1240-1-Jonathan.Cameron@huawei.com Fixes: fc44f7f9231a ("mm/memory_hotplug: don't read nid from struct page during hotplug") Signed-off-by: Jonathan Cameron Reviewed-by: Pavel Tatashin Acked-by: Michal Hocko Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- include/linux/node.h | 8 +++++--- 1 file changed, 5 insertions(+), 3 deletions(-) (limited to 'include') diff --git a/include/linux/node.h b/include/linux/node.h index 41f171861dcc..6d336e38d155 100644 --- a/include/linux/node.h +++ b/include/linux/node.h @@ -32,9 +32,11 @@ extern struct node *node_devices[]; typedef void (*node_registration_func_t)(struct node *); #if defined(CONFIG_MEMORY_HOTPLUG_SPARSE) && defined(CONFIG_NUMA) -extern int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages); +extern int link_mem_sections(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool check_nid); #else -static inline int link_mem_sections(int nid, unsigned long start_pfn, unsigned long nr_pages) +static inline int link_mem_sections(int nid, unsigned long start_pfn, + unsigned long nr_pages, bool check_nid) { return 0; } @@ -57,7 +59,7 @@ static inline int register_one_node(int nid) if (error) return error; /* link memory sections under this node */ - error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages); + error = link_mem_sections(nid, pgdat->node_start_pfn, pgdat->node_spanned_pages, true); } return error; -- cgit v1.2.3 From c32048d9e93a5ab925d745396c63e7b912147f0a Mon Sep 17 00:00:00 2001 From: Neil Armstrong Date: Wed, 30 May 2018 11:43:58 +0200 Subject: drm/bridge/synopsys: dw-hdmi: fix dw_hdmi_setup_rx_sense The dw_hdmi_setup_rx_sense exported function should not use struct device to recover the dw-hdmi context using drvdata, but take struct dw_hdmi directly like other exported functions. This caused a regression using Meson DRM on S905X since v4.17-rc1 : Internal error: Oops: 96000007 [#1] PREEMPT SMP [...] CPU: 0 PID: 124 Comm: irq/32-dw_hdmi_ Not tainted 4.17.0-rc7 #2 Hardware name: Libre Technology CC (DT) [...] pc : osq_lock+0x54/0x188 lr : __mutex_lock.isra.0+0x74/0x530 [...] Process irq/32-dw_hdmi_ (pid: 124, stack limit = 0x00000000adf418cb) Call trace: osq_lock+0x54/0x188 __mutex_lock_slowpath+0x10/0x18 mutex_lock+0x30/0x38 __dw_hdmi_setup_rx_sense+0x28/0x98 dw_hdmi_setup_rx_sense+0x10/0x18 dw_hdmi_top_thread_irq+0x2c/0x50 irq_thread_fn+0x28/0x68 irq_thread+0x10c/0x1a0 kthread+0x128/0x130 ret_from_fork+0x10/0x18 Code: 34000964 d00050a2 51000484 9135c042 (f864d844) ---[ end trace 945641e1fbbc07da ]--- note: irq/32-dw_hdmi_[124] exited with preempt_count 1 genirq: exiting task "irq/32-dw_hdmi_" (124) is an active IRQ thread (irq 32) Fixes: eea034af90c6 ("drm/bridge/synopsys: dw-hdmi: don't clobber drvdata") Signed-off-by: Neil Armstrong Tested-by: Koen Kooi Signed-off-by: Sean Paul Link: https://patchwork.freedesktop.org/patch/msgid/1527673438-20643-1-git-send-email-narmstrong@baylibre.com --- include/drm/bridge/dw_hdmi.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include') diff --git a/include/drm/bridge/dw_hdmi.h b/include/drm/bridge/dw_hdmi.h index dd2a8cf7d20b..ccb5aa8468e0 100644 --- a/include/drm/bridge/dw_hdmi.h +++ b/include/drm/bridge/dw_hdmi.h @@ -151,7 +151,7 @@ struct dw_hdmi *dw_hdmi_bind(struct platform_device *pdev, struct drm_encoder *encoder, const struct dw_hdmi_plat_data *plat_data); -void dw_hdmi_setup_rx_sense(struct device *dev, bool hpd, bool rx_sense); +void dw_hdmi_setup_rx_sense(struct dw_hdmi *hdmi, bool hpd, bool rx_sense); void dw_hdmi_set_sample_rate(struct dw_hdmi *hdmi, unsigned int rate); void dw_hdmi_audio_enable(struct dw_hdmi *hdmi); -- cgit v1.2.3 From 36f9814a494a874d5a0f44843544b4b2539022db Mon Sep 17 00:00:00 2001 From: Daniel Borkmann Date: Sat, 2 Jun 2018 05:21:59 +0200 Subject: bpf: fix uapi hole for 32 bit compat applications In 64 bit, we have a 4 byte hole between ifindex and netns_dev in the case of struct bpf_map_info but also struct bpf_prog_info. In net-next commit b85fab0e67b ("bpf: Add gpl_compatible flag to struct bpf_prog_info") added a bitfield into it to expose some flags related to programs. Thus, add an unnamed __u32 bitfield for both so that alignment keeps the same in both 32 and 64 bit cases, and can be naturally extended from there as in b85fab0e67b. Before: # file test.o test.o: ELF 32-bit LSB relocatable, Intel 80386, version 1 (SYSV), not stripped # pahole test.o struct bpf_map_info { __u32 type; /* 0 4 */ __u32 id; /* 4 4 */ __u32 key_size; /* 8 4 */ __u32 value_size; /* 12 4 */ __u32 max_entries; /* 16 4 */ __u32 map_flags; /* 20 4 */ char name[16]; /* 24 16 */ __u32 ifindex; /* 40 4 */ __u64 netns_dev; /* 44 8 */ __u64 netns_ino; /* 52 8 */ /* size: 64, cachelines: 1, members: 10 */ /* padding: 4 */ }; After (same as on 64 bit): # file test.o test.o: ELF 32-bit LSB relocatable, Intel 80386, version 1 (SYSV), not stripped # pahole test.o struct bpf_map_info { __u32 type; /* 0 4 */ __u32 id; /* 4 4 */ __u32 key_size; /* 8 4 */ __u32 value_size; /* 12 4 */ __u32 max_entries; /* 16 4 */ __u32 map_flags; /* 20 4 */ char name[16]; /* 24 16 */ __u32 ifindex; /* 40 4 */ /* XXX 4 bytes hole, try to pack */ __u64 netns_dev; /* 48 8 */ __u64 netns_ino; /* 56 8 */ /* --- cacheline 1 boundary (64 bytes) --- */ /* size: 64, cachelines: 1, members: 10 */ /* sum members: 60, holes: 1, sum holes: 4 */ }; Reported-by: Dmitry V. Levin Reported-by: Eugene Syromiatnikov Fixes: 52775b33bb507 ("bpf: offload: report device information about offloaded maps") Fixes: 675fc275a3a2d ("bpf: offload: report device information for offloaded programs") Signed-off-by: Daniel Borkmann Acked-by: Alexei Starovoitov Signed-off-by: Alexei Starovoitov --- include/uapi/linux/bpf.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include') diff --git a/include/uapi/linux/bpf.h b/include/uapi/linux/bpf.h index c5ec89732a8d..8c317737ba3f 100644 --- a/include/uapi/linux/bpf.h +++ b/include/uapi/linux/bpf.h @@ -1017,6 +1017,7 @@ struct bpf_prog_info { __aligned_u64 map_ids; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 :32; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); @@ -1030,6 +1031,7 @@ struct bpf_map_info { __u32 map_flags; char name[BPF_OBJ_NAME_LEN]; __u32 ifindex; + __u32 :32; __u64 netns_dev; __u64 netns_ino; } __attribute__((aligned(8))); -- cgit v1.2.3 From ce63b2c89cc02d8acf7472272016ecd979fb08d5 Mon Sep 17 00:00:00 2001 From: Oleksandr Andrushchenko Date: Tue, 12 Jun 2018 15:02:57 -0700 Subject: xen: Sync up with the canonical protocol definitions in Xen This is the sync up with the canonical definitions of the input, sound and display protocols in Xen. Changes to kbdif: 1. Add missing string constants for {feature|request}-raw-pointer to align with the rest of the interface file. 2. Add new XenStore feature fields, so it is possible to individually control set of exposed virtual devices for each guest OS: - set feature-disable-keyboard to 1 if no keyboard device needs to be created - set feature-disable-pointer to 1 if no pointer device needs to be created 3. Move multi-touch device parameters to backend nodes: these are described as a part of frontend's XenBus configuration nodes while they belong to backend's configuration. Fix this by moving the parameters to the proper section. Unique-id field: 1. Add unique-id XenBus entry for virtual input and display. 2. Change type of unique-id field to string for sndif to align with display and input protocols. Signed-off-by: Oleksandr Andrushchenko Reviewed-by: Juergen Gross Signed-off-by: Dmitry Torokhov --- include/xen/interface/io/displif.h | 8 ++++ include/xen/interface/io/kbdif.h | 78 ++++++++++++++++++++++++++++---------- include/xen/interface/io/sndif.h | 10 ++--- 3 files changed, 71 insertions(+), 25 deletions(-) (limited to 'include') diff --git a/include/xen/interface/io/displif.h b/include/xen/interface/io/displif.h index 596578d9be3e..fdc279dc4a88 100644 --- a/include/xen/interface/io/displif.h +++ b/include/xen/interface/io/displif.h @@ -189,6 +189,13 @@ * *----------------------------- Connector settings ---------------------------- * + * unique-id + * Values: + * + * After device instance initialization each connector is assigned a + * unique ID, so it can be identified by the backend by this ID. + * This can be UUID or such. + * * resolution * Values: x * @@ -368,6 +375,7 @@ #define XENDISPL_FIELD_EVT_CHANNEL "evt-event-channel" #define XENDISPL_FIELD_RESOLUTION "resolution" #define XENDISPL_FIELD_BE_ALLOC "be-alloc" +#define XENDISPL_FIELD_UNIQUE_ID "unique-id" /* ****************************************************************************** diff --git a/include/xen/interface/io/kbdif.h b/include/xen/interface/io/kbdif.h index 2a9510ade701..b4439cf48220 100644 --- a/include/xen/interface/io/kbdif.h +++ b/include/xen/interface/io/kbdif.h @@ -51,6 +51,18 @@ * corresponding entries in XenStore and puts 1 as the value of the entry. * If a feature is not supported then 0 must be set or feature entry omitted. * + * feature-disable-keyboard + * Values: + * + * If there is no need to expose a virtual keyboard device by the + * frontend then this must be set to 1. + * + * feature-disable-pointer + * Values: + * + * If there is no need to expose a virtual pointer device by the + * frontend then this must be set to 1. + * * feature-abs-pointer * Values: * @@ -63,6 +75,22 @@ * Backends, which support reporting of multi-touch events * should set this to 1. * + * feature-raw-pointer + * Values: + * + * Backends, which support reporting raw (unscaled) absolute coordinates + * for pointer devices should set this to 1. Raw (unscaled) values have + * a range of [0, 0x7fff]. + * + *----------------------- Device Instance Parameters ------------------------ + * + * unique-id + * Values: + * + * After device instance initialization it is assigned a unique ID, + * so every instance of the frontend can be identified by the backend + * by this ID. This can be UUID or such. + * *------------------------- Pointer Device Parameters ------------------------ * * width @@ -77,6 +105,25 @@ * Maximum Y coordinate (height) to be used by the frontend * while reporting input events, pixels, [0; UINT32_MAX]. * + *----------------------- Multi-touch Device Parameters ---------------------- + * + * multi-touch-num-contacts + * Values: + * + * Number of simultaneous touches reported. + * + * multi-touch-width + * Values: + * + * Width of the touch area to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * + * multi-touch-height + * Values: + * + * Height of the touch area to be used by the frontend + * while reporting input events, pixels, [0; UINT32_MAX]. + * ***************************************************************************** * Frontend XenBus Nodes ***************************************************************************** @@ -98,6 +145,13 @@ * * Request backend to report multi-touch events. * + * request-raw-pointer + * Values: + * + * Request backend to report raw unscaled absolute pointer coordinates. + * This option is only valid if request-abs-pointer is also set. + * Raw unscaled coordinates have the range [0, 0x7fff] + * *----------------------- Request Transport Parameters ----------------------- * * event-channel @@ -117,25 +171,6 @@ * * OBSOLETE, not recommended for use. * PFN of the shared page. - * - *----------------------- Multi-touch Device Parameters ----------------------- - * - * multi-touch-num-contacts - * Values: - * - * Number of simultaneous touches reported. - * - * multi-touch-width - * Values: - * - * Width of the touch area to be used by the frontend - * while reporting input events, pixels, [0; UINT32_MAX]. - * - * multi-touch-height - * Values: - * - * Height of the touch area to be used by the frontend - * while reporting input events, pixels, [0; UINT32_MAX]. */ /* @@ -163,9 +198,13 @@ #define XENKBD_DRIVER_NAME "vkbd" +#define XENKBD_FIELD_FEAT_DSBL_KEYBRD "feature-disable-keyboard" +#define XENKBD_FIELD_FEAT_DSBL_POINTER "feature-disable-pointer" #define XENKBD_FIELD_FEAT_ABS_POINTER "feature-abs-pointer" +#define XENKBD_FIELD_FEAT_RAW_POINTER "feature-raw-pointer" #define XENKBD_FIELD_FEAT_MTOUCH "feature-multi-touch" #define XENKBD_FIELD_REQ_ABS_POINTER "request-abs-pointer" +#define XENKBD_FIELD_REQ_RAW_POINTER "request-raw-pointer" #define XENKBD_FIELD_REQ_MTOUCH "request-multi-touch" #define XENKBD_FIELD_RING_GREF "page-gref" #define XENKBD_FIELD_EVT_CHANNEL "event-channel" @@ -174,6 +213,7 @@ #define XENKBD_FIELD_MT_WIDTH "multi-touch-width" #define XENKBD_FIELD_MT_HEIGHT "multi-touch-height" #define XENKBD_FIELD_MT_NUM_CONTACTS "multi-touch-num-contacts" +#define XENKBD_FIELD_UNIQUE_ID "unique-id" /* OBSOLETE, not recommended for use */ #define XENKBD_FIELD_RING_REF "page-ref" diff --git a/include/xen/interface/io/sndif.h b/include/xen/interface/io/sndif.h index 78bb5d9f8d83..2aac8f73614c 100644 --- a/include/xen/interface/io/sndif.h +++ b/include/xen/interface/io/sndif.h @@ -278,13 +278,11 @@ * defined under the same device. * * unique-id - * Values: + * Values: * - * After stream initialization it is assigned a unique ID (within the front - * driver), so every stream of the frontend can be identified by the - * backend by this ID. This is not equal to stream-idx as the later is - * zero based within the device, but this index is contigous within the - * driver. + * After stream initialization it is assigned a unique ID, so every + * stream of the frontend can be identified by the backend by this ID. + * This can be UUID or such. * *-------------------- Stream Request Transport Parameters -------------------- * -- cgit v1.2.3