From 5afced3bf28100d81fb2fe7e98918632a08feaf5 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 29 May 2020 15:05:22 +0200 Subject: writeback: Avoid skipping inode writeback Inode's i_io_list list head is used to attach inode to several different lists - wb->{b_dirty, b_dirty_time, b_io, b_more_io}. When flush worker prepares a list of inodes to writeback e.g. for sync(2), it moves inodes to b_io list. Thus it is critical for sync(2) data integrity guarantees that inode is not requeued to any other writeback list when inode is queued for processing by flush worker. That's the reason why writeback_single_inode() does not touch i_io_list (unless the inode is completely clean) and why __mark_inode_dirty() does not touch i_io_list if I_SYNC flag is set. However there are two flaws in the current logic: 1) When inode has only I_DIRTY_TIME set but it is already queued in b_io list due to sync(2), concurrent __mark_inode_dirty(inode, I_DIRTY_SYNC) can still move inode back to b_dirty list resulting in skipping writeback of inode time stamps during sync(2). 2) When inode is on b_dirty_time list and writeback_single_inode() races with __mark_inode_dirty() like: writeback_single_inode() __mark_inode_dirty(inode, I_DIRTY_PAGES) inode->i_state |= I_SYNC __writeback_single_inode() inode->i_state |= I_DIRTY_PAGES; if (inode->i_state & I_SYNC) bail if (!(inode->i_state & I_DIRTY_ALL)) - not true so nothing done We end up with I_DIRTY_PAGES inode on b_dirty_time list and thus standard background writeback will not writeback this inode leading to possible dirty throttling stalls etc. (thanks to Martijn Coenen for this analysis). Fix these problems by tracking whether inode is queued in b_io or b_more_io lists in a new I_SYNC_QUEUED flag. When this flag is set, we know flush worker has queued inode and we should not touch i_io_list. On the other hand we also know that once flush worker is done with the inode it will requeue the inode to appropriate dirty list. When I_SYNC_QUEUED is not set, __mark_inode_dirty() can (and must) move inode to appropriate dirty list. Reported-by: Martijn Coenen Reviewed-by: Martijn Coenen Tested-by: Martijn Coenen Reviewed-by: Christoph Hellwig Fixes: 0ae45f63d4ef ("vfs: add support for a lazytime mount option") CC: stable@vger.kernel.org Signed-off-by: Jan Kara --- include/linux/fs.h | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 19ef6c88c152..48556efcdcf0 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2157,6 +2157,10 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, * * I_DONTCACHE Evict inode as soon as it is not used anymore. * + * I_SYNC_QUEUED Inode is queued in b_io or b_more_io writeback lists. + * Used to detect that mark_inode_dirty() should not move + * inode between dirty lists. + * * Q: What is the difference between I_WILL_FREE and I_FREEING? */ #define I_DIRTY_SYNC (1 << 0) @@ -2174,12 +2178,12 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) -#define __I_DIRTY_TIME_EXPIRED 12 -#define I_DIRTY_TIME_EXPIRED (1 << __I_DIRTY_TIME_EXPIRED) +#define I_DIRTY_TIME_EXPIRED (1 << 12) #define I_WB_SWITCH (1 << 13) #define I_OVL_INUSE (1 << 14) #define I_CREATING (1 << 15) #define I_DONTCACHE (1 << 16) +#define I_SYNC_QUEUED (1 << 17) #define I_DIRTY_INODE (I_DIRTY_SYNC | I_DIRTY_DATASYNC) #define I_DIRTY (I_DIRTY_INODE | I_DIRTY_PAGES) -- cgit v1.2.3 From 5fcd57505c002efc5823a7355e21f48dd02d5a51 Mon Sep 17 00:00:00 2001 From: Jan Kara Date: Fri, 29 May 2020 16:24:43 +0200 Subject: writeback: Drop I_DIRTY_TIME_EXPIRE The only use of I_DIRTY_TIME_EXPIRE is to detect in __writeback_single_inode() that inode got there because flush worker decided it's time to writeback the dirty inode time stamps (either because we are syncing or because of age). However we can detect this directly in __writeback_single_inode() and there's no need for the strange propagation with I_DIRTY_TIME_EXPIRE flag. Reviewed-by: Christoph Hellwig Signed-off-by: Jan Kara --- include/linux/fs.h | 1 - 1 file changed, 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/fs.h b/include/linux/fs.h index 48556efcdcf0..45eadf5bea5d 100644 --- a/include/linux/fs.h +++ b/include/linux/fs.h @@ -2178,7 +2178,6 @@ static inline void kiocb_clone(struct kiocb *kiocb, struct kiocb *kiocb_src, #define I_DIO_WAKEUP (1 << __I_DIO_WAKEUP) #define I_LINKABLE (1 << 10) #define I_DIRTY_TIME (1 << 11) -#define I_DIRTY_TIME_EXPIRED (1 << 12) #define I_WB_SWITCH (1 << 13) #define I_OVL_INUSE (1 << 14) #define I_CREATING (1 << 15) -- cgit v1.2.3 From 53bf2b0e4e4c1ff0a957474237f9dcd20036ca54 Mon Sep 17 00:00:00 2001 From: Lokesh Vutla Date: Thu, 6 Aug 2020 13:18:16 +0530 Subject: firmware: ti_sci: Add support for getting resource with subtype With SYSFW ABI 3.0 changes, interrupts coming out of an interrupt controller is identified by a type and it is consistent across SoCs. Similarly global events for Interrupt aggregator. So add an API to get resource range using a resource type. Signed-off-by: Lokesh Vutla Signed-off-by: Marc Zyngier Acked-by: Nishanth Menon Link: https://lore.kernel.org/r/20200806074826.24607-4-lokeshvutla@ti.com --- include/linux/soc/ti/ti_sci_protocol.h | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'include/linux') diff --git a/include/linux/soc/ti/ti_sci_protocol.h b/include/linux/soc/ti/ti_sci_protocol.h index 49c5d29cd33c..cf27b080e148 100644 --- a/include/linux/soc/ti/ti_sci_protocol.h +++ b/include/linux/soc/ti/ti_sci_protocol.h @@ -220,6 +220,9 @@ struct ti_sci_rm_core_ops { u16 *range_start, u16 *range_num); }; +#define TI_SCI_RESASG_SUBTYPE_IR_OUTPUT 0 +#define TI_SCI_RESASG_SUBTYPE_IA_VINT 0xa +#define TI_SCI_RESASG_SUBTYPE_GLOBAL_EVENT_SEVT 0xd /** * struct ti_sci_rm_irq_ops: IRQ management operations * @set_irq: Set an IRQ route between the requested source @@ -556,6 +559,9 @@ u32 ti_sci_get_num_resources(struct ti_sci_resource *res); struct ti_sci_resource * devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, struct device *dev, u32 dev_id, char *of_prop); +struct ti_sci_resource * +devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, + u32 dev_id, u32 sub_type); #else /* CONFIG_TI_SCI_PROTOCOL */ @@ -609,6 +615,13 @@ devm_ti_sci_get_of_resource(const struct ti_sci_handle *handle, { return ERR_PTR(-EINVAL); } + +static inline struct ti_sci_resource * +devm_ti_sci_get_resource(const struct ti_sci_handle *handle, struct device *dev, + u32 dev_id, u32 sub_type); +{ + return ERR_PTR(-EINVAL); +} #endif /* CONFIG_TI_SCI_PROTOCOL */ #endif /* __TISCI_PROTOCOL_H */ -- cgit v1.2.3 From cc5453a5b7e90c39f713091a7ebc53c1f87d1700 Mon Sep 17 00:00:00 2001 From: Florian Westphal Date: Tue, 18 Aug 2020 16:15:58 +0200 Subject: netfilter: conntrack: allow sctp hearbeat after connection re-use If an sctp connection gets re-used, heartbeats are flagged as invalid because their vtag doesn't match. Handle this in a similar way as TCP conntrack when it suspects that the endpoints and conntrack are out-of-sync. When a HEARTBEAT request fails its vtag validation, flag this in the conntrack state and accept the packet. When a HEARTBEAT_ACK is received with an invalid vtag in the reverse direction after we allowed such a HEARTBEAT through, assume we are out-of-sync and re-set the vtag info. v2: remove left-over snippet from an older incarnation that moved new_state/old_state assignments, thats not needed so keep that as-is. Signed-off-by: Florian Westphal Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nf_conntrack_sctp.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/netfilter/nf_conntrack_sctp.h b/include/linux/netfilter/nf_conntrack_sctp.h index 9a33f171aa82..625f491b95de 100644 --- a/include/linux/netfilter/nf_conntrack_sctp.h +++ b/include/linux/netfilter/nf_conntrack_sctp.h @@ -9,6 +9,8 @@ struct ip_ct_sctp { enum sctp_conntrack state; __be32 vtag[IP_CT_DIR_MAX]; + u8 last_dir; + u8 flags; }; #endif /* _NF_CONNTRACK_SCTP_H */ -- cgit v1.2.3 From df561f6688fef775baa341a0f5d960becd248b11 Mon Sep 17 00:00:00 2001 From: "Gustavo A. R. Silva" Date: Sun, 23 Aug 2020 17:36:59 -0500 Subject: treewide: Use fallthrough pseudo-keyword Replace the existing /* fall through */ comments and its variants with the new pseudo-keyword macro fallthrough[1]. Also, remove unnecessary fall-through markings when it is the case. [1] https://www.kernel.org/doc/html/v5.7/process/deprecated.html?highlight=fallthrough#implicit-switch-case-fall-through Signed-off-by: Gustavo A. R. Silva --- include/linux/compat.h | 6 +++--- include/linux/filter.h | 2 +- include/linux/jhash.h | 26 +++++++++++++------------- include/linux/mm.h | 9 ++++++--- include/linux/signal.h | 12 ++++++------ include/linux/skbuff.h | 12 ++++++------ 6 files changed, 35 insertions(+), 32 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compat.h b/include/linux/compat.h index d38c4d7e83bd..b354ce58966e 100644 --- a/include/linux/compat.h +++ b/include/linux/compat.h @@ -429,11 +429,11 @@ put_compat_sigset(compat_sigset_t __user *compat, const sigset_t *set, compat_sigset_t v; switch (_NSIG_WORDS) { case 4: v.sig[7] = (set->sig[3] >> 32); v.sig[6] = set->sig[3]; - /* fall through */ + fallthrough; case 3: v.sig[5] = (set->sig[2] >> 32); v.sig[4] = set->sig[2]; - /* fall through */ + fallthrough; case 2: v.sig[3] = (set->sig[1] >> 32); v.sig[2] = set->sig[1]; - /* fall through */ + fallthrough; case 1: v.sig[1] = (set->sig[0] >> 32); v.sig[0] = set->sig[0]; } return copy_to_user(compat, &v, size) ? -EFAULT : 0; diff --git a/include/linux/filter.h b/include/linux/filter.h index 0a355b005bf4..ebfb7cfb65f1 100644 --- a/include/linux/filter.h +++ b/include/linux/filter.h @@ -1200,7 +1200,7 @@ static inline u16 bpf_anc_helper(const struct sock_filter *ftest) BPF_ANCILLARY(RANDOM); BPF_ANCILLARY(VLAN_TPID); } - /* Fallthrough. */ + fallthrough; default: return ftest->code; } diff --git a/include/linux/jhash.h b/include/linux/jhash.h index 19ddd43aee68..cfb62e9f37be 100644 --- a/include/linux/jhash.h +++ b/include/linux/jhash.h @@ -86,17 +86,17 @@ static inline u32 jhash(const void *key, u32 length, u32 initval) } /* Last block: affect all 32 bits of (c) */ switch (length) { - case 12: c += (u32)k[11]<<24; /* fall through */ - case 11: c += (u32)k[10]<<16; /* fall through */ - case 10: c += (u32)k[9]<<8; /* fall through */ - case 9: c += k[8]; /* fall through */ - case 8: b += (u32)k[7]<<24; /* fall through */ - case 7: b += (u32)k[6]<<16; /* fall through */ - case 6: b += (u32)k[5]<<8; /* fall through */ - case 5: b += k[4]; /* fall through */ - case 4: a += (u32)k[3]<<24; /* fall through */ - case 3: a += (u32)k[2]<<16; /* fall through */ - case 2: a += (u32)k[1]<<8; /* fall through */ + case 12: c += (u32)k[11]<<24; fallthrough; + case 11: c += (u32)k[10]<<16; fallthrough; + case 10: c += (u32)k[9]<<8; fallthrough; + case 9: c += k[8]; fallthrough; + case 8: b += (u32)k[7]<<24; fallthrough; + case 7: b += (u32)k[6]<<16; fallthrough; + case 6: b += (u32)k[5]<<8; fallthrough; + case 5: b += k[4]; fallthrough; + case 4: a += (u32)k[3]<<24; fallthrough; + case 3: a += (u32)k[2]<<16; fallthrough; + case 2: a += (u32)k[1]<<8; fallthrough; case 1: a += k[0]; __jhash_final(a, b, c); case 0: /* Nothing left to add */ @@ -132,8 +132,8 @@ static inline u32 jhash2(const u32 *k, u32 length, u32 initval) /* Handle the last 3 u32's */ switch (length) { - case 3: c += k[2]; /* fall through */ - case 2: b += k[1]; /* fall through */ + case 3: c += k[2]; fallthrough; + case 2: b += k[1]; fallthrough; case 1: a += k[0]; __jhash_final(a, b, c); case 0: /* Nothing left to add */ diff --git a/include/linux/mm.h b/include/linux/mm.h index 1983e08f5906..97c83773b6f0 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -157,11 +157,14 @@ static inline void __mm_zero_struct_page(struct page *page) switch (sizeof(struct page)) { case 80: - _pp[9] = 0; /* fallthrough */ + _pp[9] = 0; + fallthrough; case 72: - _pp[8] = 0; /* fallthrough */ + _pp[8] = 0; + fallthrough; case 64: - _pp[7] = 0; /* fallthrough */ + _pp[7] = 0; + fallthrough; case 56: _pp[6] = 0; _pp[5] = 0; diff --git a/include/linux/signal.h b/include/linux/signal.h index 6bb1a3f0258c..7bbc0e9cf084 100644 --- a/include/linux/signal.h +++ b/include/linux/signal.h @@ -137,11 +137,11 @@ static inline void name(sigset_t *r, const sigset_t *a, const sigset_t *b) \ b3 = b->sig[3]; b2 = b->sig[2]; \ r->sig[3] = op(a3, b3); \ r->sig[2] = op(a2, b2); \ - /* fall through */ \ + fallthrough; \ case 2: \ a1 = a->sig[1]; b1 = b->sig[1]; \ r->sig[1] = op(a1, b1); \ - /* fall through */ \ + fallthrough; \ case 1: \ a0 = a->sig[0]; b0 = b->sig[0]; \ r->sig[0] = op(a0, b0); \ @@ -171,9 +171,9 @@ static inline void name(sigset_t *set) \ switch (_NSIG_WORDS) { \ case 4: set->sig[3] = op(set->sig[3]); \ set->sig[2] = op(set->sig[2]); \ - /* fall through */ \ + fallthrough; \ case 2: set->sig[1] = op(set->sig[1]); \ - /* fall through */ \ + fallthrough; \ case 1: set->sig[0] = op(set->sig[0]); \ break; \ default: \ @@ -194,7 +194,7 @@ static inline void sigemptyset(sigset_t *set) memset(set, 0, sizeof(sigset_t)); break; case 2: set->sig[1] = 0; - /* fall through */ + fallthrough; case 1: set->sig[0] = 0; break; } @@ -207,7 +207,7 @@ static inline void sigfillset(sigset_t *set) memset(set, -1, sizeof(sigset_t)); break; case 2: set->sig[1] = -1; - /* fall through */ + fallthrough; case 1: set->sig[0] = -1; break; } diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 46881d902124..ab57cf787c1f 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -3745,19 +3745,19 @@ static inline bool __skb_metadata_differs(const struct sk_buff *skb_a, #define __it(x, op) (x -= sizeof(u##op)) #define __it_diff(a, b, op) (*(u##op *)__it(a, op)) ^ (*(u##op *)__it(b, op)) case 32: diffs |= __it_diff(a, b, 64); - /* fall through */ + fallthrough; case 24: diffs |= __it_diff(a, b, 64); - /* fall through */ + fallthrough; case 16: diffs |= __it_diff(a, b, 64); - /* fall through */ + fallthrough; case 8: diffs |= __it_diff(a, b, 64); break; case 28: diffs |= __it_diff(a, b, 64); - /* fall through */ + fallthrough; case 20: diffs |= __it_diff(a, b, 64); - /* fall through */ + fallthrough; case 12: diffs |= __it_diff(a, b, 64); - /* fall through */ + fallthrough; case 4: diffs |= __it_diff(a, b, 32); break; } -- cgit v1.2.3 From 12564485ed8caac3c18572793ec01330792c7191 Mon Sep 17 00:00:00 2001 From: Shawn Anastasio Date: Fri, 21 Aug 2020 13:55:56 -0500 Subject: Revert "powerpc/64s: Remove PROT_SAO support" This reverts commit 5c9fa16e8abd342ce04dc830c1ebb2a03abf6c05. Since PROT_SAO can still be useful for certain classes of software, reintroduce it. Concerns about guest migration for LPARs using SAO will be addressed next. Signed-off-by: Shawn Anastasio Signed-off-by: Michael Ellerman Link: https://lore.kernel.org/r/20200821185558.35561-2-shawn@anastas.io --- include/linux/mm.h | 2 ++ 1 file changed, 2 insertions(+) (limited to 'include/linux') diff --git a/include/linux/mm.h b/include/linux/mm.h index 1983e08f5906..5abe6df4247e 100644 --- a/include/linux/mm.h +++ b/include/linux/mm.h @@ -321,6 +321,8 @@ extern unsigned int kobjsize(const void *objp); #if defined(CONFIG_X86) # define VM_PAT VM_ARCH_1 /* PAT reserves whole VMA at once (x86) */ +#elif defined(CONFIG_PPC) +# define VM_SAO VM_ARCH_1 /* Strong Access Ordering (powerpc) */ #elif defined(CONFIG_PARISC) # define VM_GROWSUP VM_ARCH_1 #elif defined(CONFIG_IA64) -- cgit v1.2.3 From f062f025fc3a4fae3e6a50d13fb1fafb11900fa7 Mon Sep 17 00:00:00 2001 From: Ilya Dryomov Date: Wed, 19 Aug 2020 10:50:08 +0200 Subject: libceph: add __maybe_unused to DEFINE_CEPH_FEATURE Avoid -Wunused-const-variable warnings for "make W=1". Reported-by: Leon Romanovsky Signed-off-by: Ilya Dryomov Reviewed-by: Leon Romanovsky --- include/linux/ceph/ceph_features.h | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ceph/ceph_features.h b/include/linux/ceph/ceph_features.h index fcd84e8d88f4..999636d53cf2 100644 --- a/include/linux/ceph/ceph_features.h +++ b/include/linux/ceph/ceph_features.h @@ -11,14 +11,14 @@ #define CEPH_FEATURE_INCARNATION_2 (1ull<<57) // CEPH_FEATURE_SERVER_JEWEL #define DEFINE_CEPH_FEATURE(bit, incarnation, name) \ - static const uint64_t CEPH_FEATURE_##name = (1ULL< Date: Sat, 22 Aug 2020 08:23:29 +1000 Subject: net: Get rid of consume_skb when tracing is off The function consume_skb is only meaningful when tracing is enabled. This patch makes it conditional on CONFIG_TRACEPOINTS. Signed-off-by: Herbert Xu Signed-off-by: David S. Miller --- include/linux/skbuff.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index 46881d902124..e8bca74857a3 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -1056,7 +1056,16 @@ void kfree_skb(struct sk_buff *skb); void kfree_skb_list(struct sk_buff *segs); void skb_dump(const char *level, const struct sk_buff *skb, bool full_pkt); void skb_tx_error(struct sk_buff *skb); + +#ifdef CONFIG_TRACEPOINTS void consume_skb(struct sk_buff *skb); +#else +static inline void consume_skb(struct sk_buff *skb) +{ + return kfree_skb(skb); +} +#endif + void __consume_stateless_skb(struct sk_buff *skb); void __kfree_skb(struct sk_buff *skb); extern struct kmem_cache *skbuff_head_cache; -- cgit v1.2.3 From c94a88f341c9b8f05d8639f62bb5d95936f881cd Mon Sep 17 00:00:00 2001 From: Marco Elver Date: Thu, 20 Aug 2020 19:20:46 +0200 Subject: sched: Use __always_inline on is_idle_task() is_idle_task() may be used from noinstr functions such as irqentry_enter(). Since the compiler is free to not inline regular inline functions, switch to using __always_inline. Signed-off-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200820172046.GA177701@elver.google.com --- include/linux/sched.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/sched.h b/include/linux/sched.h index 93ecd930efd3..afe01e232935 100644 --- a/include/linux/sched.h +++ b/include/linux/sched.h @@ -1666,7 +1666,7 @@ extern struct task_struct *idle_task(int cpu); * * Return: 1 if @p is an idle task. 0 otherwise. */ -static inline bool is_idle_task(const struct task_struct *p) +static __always_inline bool is_idle_task(const struct task_struct *p) { return !!(p->flags & PF_IDLE); } -- cgit v1.2.3 From fddf9055a60dfcc97bda5ef03c8fa4108ed555c5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 20 Aug 2020 09:13:30 +0200 Subject: lockdep: Use raw_cpu_*() for per-cpu variables Sven reported that commit a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables") caused trouble on s390 because their this_cpu_*() primitives disable preemption which then lands back tracing. On the one hand, per-cpu ops should use preempt_*able_notrace() and raw_local_irq_*(), on the other hand, we can trivialy use raw_cpu_*() ops for this. Fixes: a21ee6055c30 ("lockdep: Change hardirq{s_enabled,_context} to per-cpu variables") Reported-by: Sven Schnelle Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Signed-off-by: Peter Zijlstra (Intel) Link: https://lkml.kernel.org/r/20200821085348.192346882@infradead.org --- include/linux/irqflags.h | 6 +++--- include/linux/lockdep.h | 18 +++++++++++++----- 2 files changed, 16 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index bd5c55755447..d7e50a215ea9 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -53,13 +53,13 @@ DECLARE_PER_CPU(int, hardirq_context); extern void trace_hardirqs_off_finish(void); extern void trace_hardirqs_on(void); extern void trace_hardirqs_off(void); -# define lockdep_hardirq_context() (this_cpu_read(hardirq_context)) +# define lockdep_hardirq_context() (raw_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) # define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled)) # define lockdep_softirqs_enabled(p) ((p)->softirqs_enabled) # define lockdep_hardirq_enter() \ do { \ - if (this_cpu_inc_return(hardirq_context) == 1) \ + if (__this_cpu_inc_return(hardirq_context) == 1)\ current->hardirq_threaded = 0; \ } while (0) # define lockdep_hardirq_threaded() \ @@ -68,7 +68,7 @@ do { \ } while (0) # define lockdep_hardirq_exit() \ do { \ - this_cpu_dec(hardirq_context); \ + __this_cpu_dec(hardirq_context); \ } while (0) # define lockdep_softirq_enter() \ do { \ diff --git a/include/linux/lockdep.h b/include/linux/lockdep.h index 62a382d1845b..6a584b3e5c74 100644 --- a/include/linux/lockdep.h +++ b/include/linux/lockdep.h @@ -535,19 +535,27 @@ do { \ DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); +/* + * The below lockdep_assert_*() macros use raw_cpu_read() to access the above + * per-cpu variables. This is required because this_cpu_read() will potentially + * call into preempt/irq-disable and that obviously isn't right. This is also + * correct because when IRQs are enabled, it doesn't matter if we accidentally + * read the value from our previous CPU. + */ + #define lockdep_assert_irqs_enabled() \ do { \ - WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_irqs_disabled() \ do { \ - WARN_ON_ONCE(debug_locks && this_cpu_read(hardirqs_enabled)); \ + WARN_ON_ONCE(debug_locks && raw_cpu_read(hardirqs_enabled)); \ } while (0) #define lockdep_assert_in_irq() \ do { \ - WARN_ON_ONCE(debug_locks && !this_cpu_read(hardirq_context)); \ + WARN_ON_ONCE(debug_locks && !raw_cpu_read(hardirq_context)); \ } while (0) #define lockdep_assert_preemption_enabled() \ @@ -555,7 +563,7 @@ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ debug_locks && \ (preempt_count() != 0 || \ - !this_cpu_read(hardirqs_enabled))); \ + !raw_cpu_read(hardirqs_enabled))); \ } while (0) #define lockdep_assert_preemption_disabled() \ @@ -563,7 +571,7 @@ do { \ WARN_ON_ONCE(IS_ENABLED(CONFIG_PREEMPT_COUNT) && \ debug_locks && \ (preempt_count() == 0 && \ - this_cpu_read(hardirqs_enabled))); \ + raw_cpu_read(hardirqs_enabled))); \ } while (0) #else -- cgit v1.2.3 From bf9282dc26e7fe2a0736edc568762f0f05d12416 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Aug 2020 12:22:17 +0200 Subject: cpuidle: Make CPUIDLE_FLAG_TLB_FLUSHED generic This allows moving the leave_mm() call into generic code before rcu_idle_enter(). Gets rid of more trace_*_rcuidle() users. Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200821085348.369441600@infradead.org --- include/linux/cpuidle.h | 13 +++++++------ include/linux/mmu_context.h | 5 +++++ 2 files changed, 12 insertions(+), 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h index b65909ae4e20..75895e6363b8 100644 --- a/include/linux/cpuidle.h +++ b/include/linux/cpuidle.h @@ -75,12 +75,13 @@ struct cpuidle_state { }; /* Idle State Flags */ -#define CPUIDLE_FLAG_NONE (0x00) -#define CPUIDLE_FLAG_POLLING BIT(0) /* polling state */ -#define CPUIDLE_FLAG_COUPLED BIT(1) /* state applies to multiple cpus */ -#define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */ -#define CPUIDLE_FLAG_UNUSABLE BIT(3) /* avoid using this state */ -#define CPUIDLE_FLAG_OFF BIT(4) /* disable this state by default */ +#define CPUIDLE_FLAG_NONE (0x00) +#define CPUIDLE_FLAG_POLLING BIT(0) /* polling state */ +#define CPUIDLE_FLAG_COUPLED BIT(1) /* state applies to multiple cpus */ +#define CPUIDLE_FLAG_TIMER_STOP BIT(2) /* timer is stopped on this state */ +#define CPUIDLE_FLAG_UNUSABLE BIT(3) /* avoid using this state */ +#define CPUIDLE_FLAG_OFF BIT(4) /* disable this state by default */ +#define CPUIDLE_FLAG_TLB_FLUSHED BIT(5) /* idle-state flushes TLBs */ struct cpuidle_device_kobj; struct cpuidle_state_kobj; diff --git a/include/linux/mmu_context.h b/include/linux/mmu_context.h index c51a84132d7c..03dee12d2b61 100644 --- a/include/linux/mmu_context.h +++ b/include/linux/mmu_context.h @@ -3,10 +3,15 @@ #define _LINUX_MMU_CONTEXT_H #include +#include /* Architectures that care about IRQ state in switch_mm can override this. */ #ifndef switch_mm_irqs_off # define switch_mm_irqs_off switch_mm #endif +#ifndef leave_mm +static inline void leave_mm(int cpu) { } +#endif + #endif -- cgit v1.2.3 From 00b0ed2d4997af6d0a93edef820386951fd66d94 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 12 Aug 2020 19:28:06 +0200 Subject: locking/lockdep: Cleanup Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200821085348.546087214@infradead.org --- include/linux/irqflags.h | 54 +++++++++++++++++++++++++++--------------------- 1 file changed, 30 insertions(+), 24 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index d7e50a215ea9..00d553d77911 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -49,10 +49,11 @@ struct irqtrace_events { DECLARE_PER_CPU(int, hardirqs_enabled); DECLARE_PER_CPU(int, hardirq_context); - extern void trace_hardirqs_on_prepare(void); - extern void trace_hardirqs_off_finish(void); - extern void trace_hardirqs_on(void); - extern void trace_hardirqs_off(void); +extern void trace_hardirqs_on_prepare(void); +extern void trace_hardirqs_off_finish(void); +extern void trace_hardirqs_on(void); +extern void trace_hardirqs_off(void); + # define lockdep_hardirq_context() (raw_cpu_read(hardirq_context)) # define lockdep_softirq_context(p) ((p)->softirq_context) # define lockdep_hardirqs_enabled() (this_cpu_read(hardirqs_enabled)) @@ -120,17 +121,17 @@ do { \ #else # define trace_hardirqs_on_prepare() do { } while (0) # define trace_hardirqs_off_finish() do { } while (0) -# define trace_hardirqs_on() do { } while (0) -# define trace_hardirqs_off() do { } while (0) -# define lockdep_hardirq_context() 0 -# define lockdep_softirq_context(p) 0 -# define lockdep_hardirqs_enabled() 0 -# define lockdep_softirqs_enabled(p) 0 -# define lockdep_hardirq_enter() do { } while (0) -# define lockdep_hardirq_threaded() do { } while (0) -# define lockdep_hardirq_exit() do { } while (0) -# define lockdep_softirq_enter() do { } while (0) -# define lockdep_softirq_exit() do { } while (0) +# define trace_hardirqs_on() do { } while (0) +# define trace_hardirqs_off() do { } while (0) +# define lockdep_hardirq_context() 0 +# define lockdep_softirq_context(p) 0 +# define lockdep_hardirqs_enabled() 0 +# define lockdep_softirqs_enabled(p) 0 +# define lockdep_hardirq_enter() do { } while (0) +# define lockdep_hardirq_threaded() do { } while (0) +# define lockdep_hardirq_exit() do { } while (0) +# define lockdep_softirq_enter() do { } while (0) +# define lockdep_softirq_exit() do { } while (0) # define lockdep_hrtimer_enter(__hrtimer) false # define lockdep_hrtimer_exit(__context) do { } while (0) # define lockdep_posixtimer_enter() do { } while (0) @@ -181,17 +182,25 @@ do { \ * if !TRACE_IRQFLAGS. */ #ifdef CONFIG_TRACE_IRQFLAGS -#define local_irq_enable() \ - do { trace_hardirqs_on(); raw_local_irq_enable(); } while (0) -#define local_irq_disable() \ - do { raw_local_irq_disable(); trace_hardirqs_off(); } while (0) + +#define local_irq_enable() \ + do { \ + trace_hardirqs_on(); \ + raw_local_irq_enable(); \ + } while (0) + +#define local_irq_disable() \ + do { \ + raw_local_irq_disable(); \ + trace_hardirqs_off(); \ + } while (0) + #define local_irq_save(flags) \ do { \ raw_local_irq_save(flags); \ trace_hardirqs_off(); \ } while (0) - #define local_irq_restore(flags) \ do { \ if (raw_irqs_disabled_flags(flags)) { \ @@ -214,10 +223,7 @@ do { \ #define local_irq_enable() do { raw_local_irq_enable(); } while (0) #define local_irq_disable() do { raw_local_irq_disable(); } while (0) -#define local_irq_save(flags) \ - do { \ - raw_local_irq_save(flags); \ - } while (0) +#define local_irq_save(flags) do { raw_local_irq_save(flags); } while (0) #define local_irq_restore(flags) do { raw_local_irq_restore(flags); } while (0) #define safe_halt() do { raw_safe_halt(); } while (0) -- cgit v1.2.3 From 044d0d6de9f50192f9697583504a382347ee95ca Mon Sep 17 00:00:00 2001 From: Nicholas Piggin Date: Thu, 23 Jul 2020 20:56:14 +1000 Subject: lockdep: Only trace IRQ edges Problem: raw_local_irq_save(); // software state on local_irq_save(); // software state off ... local_irq_restore(); // software state still off, because we don't enable IRQs raw_local_irq_restore(); // software state still off, *whoopsie* existing instances: - lock_acquire() raw_local_irq_save() __lock_acquire() arch_spin_lock(&graph_lock) pv_wait() := kvm_wait() (same or worse for Xen/HyperV) local_irq_save() - trace_clock_global() raw_local_irq_save() arch_spin_lock() pv_wait() := kvm_wait() local_irq_save() - apic_retrigger_irq() raw_local_irq_save() apic->send_IPI() := default_send_IPI_single_phys() local_irq_save() Possible solutions: A) make it work by enabling the tracing inside raw_*() B) make it work by keeping tracing disabled inside raw_*() C) call it broken and clean it up now Now, given that the only reason to use the raw_* variant is because you don't want tracing. Therefore A) seems like a weird option (although it can be done). C) is tempting, but OTOH it ends up converting a _lot_ of code to raw just because there is one raw user, this strips the validation/tracing off for all the other users. So we pick B) and declare any code that ends up doing: raw_local_irq_save() local_irq_save() lockdep_assert_irqs_disabled(); broken. AFAICT this problem has existed forever, the only reason it came up is because commit: 859d069ee1dd ("lockdep: Prepare for NMI IRQ state tracking") changed IRQ tracing vs lockdep recursion and the first instance is fairly common, the other cases hardly ever happen. Signed-off-by: Nicholas Piggin [rewrote changelog] Signed-off-by: Peter Zijlstra (Intel) Reviewed-by: Steven Rostedt (VMware) Reviewed-by: Thomas Gleixner Acked-by: Rafael J. Wysocki Tested-by: Marco Elver Link: https://lkml.kernel.org/r/20200723105615.1268126-1-npiggin@gmail.com --- include/linux/irqflags.h | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) (limited to 'include/linux') diff --git a/include/linux/irqflags.h b/include/linux/irqflags.h index 00d553d77911..3ed4e8771b64 100644 --- a/include/linux/irqflags.h +++ b/include/linux/irqflags.h @@ -191,25 +191,24 @@ do { \ #define local_irq_disable() \ do { \ + bool was_disabled = raw_irqs_disabled();\ raw_local_irq_disable(); \ - trace_hardirqs_off(); \ + if (!was_disabled) \ + trace_hardirqs_off(); \ } while (0) #define local_irq_save(flags) \ do { \ raw_local_irq_save(flags); \ - trace_hardirqs_off(); \ + if (!raw_irqs_disabled_flags(flags)) \ + trace_hardirqs_off(); \ } while (0) #define local_irq_restore(flags) \ do { \ - if (raw_irqs_disabled_flags(flags)) { \ - raw_local_irq_restore(flags); \ - trace_hardirqs_off(); \ - } else { \ + if (!raw_irqs_disabled_flags(flags)) \ trace_hardirqs_on(); \ - raw_local_irq_restore(flags); \ - } \ + raw_local_irq_restore(flags); \ } while (0) #define safe_halt() \ -- cgit v1.2.3 From aac544c3553d98ebe150dda19a25aa253f7ad3fe Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 25 Aug 2020 01:25:11 +0200 Subject: Compiler Attributes: remove comment about sparse not supporting __has_attribute Sparse supports __has_attribute() since 2018-08-31, so the comment is not true anymore but more importantly is rather confusing. So remove it. Signed-off-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 6 ------ 1 file changed, 6 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index 6122efdad6ad..af7a58c19e20 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -24,12 +24,6 @@ * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute * by hand. - * - * sparse does not support __has_attribute (yet) and defines __GNUC_MINOR__ - * depending on the compiler used to build it; however, these attributes have - * no semantic effects for sparse, so it does not matter. Also note that, - * in order to avoid sparse's warnings, even the unsupported ones must be - * defined to 0. */ #ifndef __has_attribute # define __has_attribute(x) __GCC4_has_attribute_##x -- cgit v1.2.3 From 5861af92ff2a2e002449191413c35f3ec5f721fe Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Tue, 25 Aug 2020 01:25:26 +0200 Subject: Compiler Attributes: fix comment concerning GCC 4.6 GCC 4.6 is not supported anymore, so remove a reference to it, leaving just the part about version prior GCC 5. Signed-off-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_attributes.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/compiler_attributes.h b/include/linux/compiler_attributes.h index af7a58c19e20..ea7b756b1c8f 100644 --- a/include/linux/compiler_attributes.h +++ b/include/linux/compiler_attributes.h @@ -22,7 +22,7 @@ /* * __has_attribute is supported on gcc >= 5, clang >= 2.9 and icc >= 17. - * In the meantime, to support 4.6 <= gcc < 5, we implement __has_attribute + * In the meantime, to support gcc < 5, we implement __has_attribute * by hand. */ #ifndef __has_attribute -- cgit v1.2.3 From 30b8e6b22fd0f7a56911e69c681e92532e72e3b6 Mon Sep 17 00:00:00 2001 From: Viresh Kumar Date: Thu, 27 Aug 2020 10:54:16 +0530 Subject: cpufreq: Use WARN_ON_ONCE() for invalid relation The relation can't be invalid here, so if it turns out to be invalid, just WARN_ON_ONCE() and return 0. Signed-off-by: Viresh Kumar [ rjw: Subject and changelog edits ] Signed-off-by: Rafael J. Wysocki --- include/linux/cpufreq.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/cpufreq.h b/include/linux/cpufreq.h index 8f141d4c859c..a911e5d06845 100644 --- a/include/linux/cpufreq.h +++ b/include/linux/cpufreq.h @@ -956,8 +956,8 @@ static inline int cpufreq_frequency_table_target(struct cpufreq_policy *policy, case CPUFREQ_RELATION_C: return cpufreq_table_find_index_c(policy, target_freq); default: - pr_err("%s: Invalid relation: %d\n", __func__, relation); - return -EINVAL; + WARN_ON_ONCE(1); + return 0; } } -- cgit v1.2.3 From 645f08975f49441b3e753d8dc5b740cbcb226594 Mon Sep 17 00:00:00 2001 From: Miaohe Lin Date: Thu, 27 Aug 2020 07:27:49 -0400 Subject: net: Fix some comments Fix some comments, including wrong function name, duplicated word and so on. Signed-off-by: Miaohe Lin Signed-off-by: David S. Miller --- include/linux/skbuff.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/skbuff.h b/include/linux/skbuff.h index e8bca74857a3..8d9ab50b08c9 100644 --- a/include/linux/skbuff.h +++ b/include/linux/skbuff.h @@ -71,7 +71,7 @@ * NETIF_F_IPV6_CSUM - Driver (device) is only able to checksum plain * TCP or UDP packets over IPv6. These are specifically * unencapsulated packets of the form IPv6|TCP or - * IPv4|UDP where the Next Header field in the IPv6 + * IPv6|UDP where the Next Header field in the IPv6 * header is either TCP or UDP. IPv6 extension headers * are not supported with this feature. This feature * cannot be set in features for a device with @@ -2667,7 +2667,7 @@ static inline int pskb_network_may_pull(struct sk_buff *skb, unsigned int len) * * Using max(32, L1_CACHE_BYTES) makes sense (especially with RPS) * to reduce average number of cache lines per packet. - * get_rps_cpus() for example only access one 64 bytes aligned block : + * get_rps_cpu() for example only access one 64 bytes aligned block : * NET_IP_ALIGN(2) + ethernet_header(14) + IP_header(20/40) + ports(8) */ #ifndef NET_SKB_PAD -- cgit v1.2.3 From ee921183557af39c1a0475f982d43b0fcac25e2e Mon Sep 17 00:00:00 2001 From: Pablo Neira Ayuso Date: Sun, 23 Aug 2020 13:55:36 +0200 Subject: netfilter: nfnetlink: nfnetlink_unicast() reports EAGAIN instead of ENOBUFS Frontend callback reports EAGAIN to nfnetlink to retry a command, this is used to signal that module autoloading is required. Unfortunately, nlmsg_unicast() reports EAGAIN in case the receiver socket buffer gets full, so it enters a busy-loop. This patch updates nfnetlink_unicast() to turn EAGAIN into ENOBUFS and to use nlmsg_unicast(). Remove the flags field in nfnetlink_unicast() since this is always MSG_DONTWAIT in the existing code which is exactly what nlmsg_unicast() passes to netlink_unicast() as parameter. Fixes: 96518518cc41 ("netfilter: add nftables") Reported-by: Phil Sutter Signed-off-by: Pablo Neira Ayuso --- include/linux/netfilter/nfnetlink.h | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/netfilter/nfnetlink.h b/include/linux/netfilter/nfnetlink.h index 851425c3178f..89016d08f6a2 100644 --- a/include/linux/netfilter/nfnetlink.h +++ b/include/linux/netfilter/nfnetlink.h @@ -43,8 +43,7 @@ int nfnetlink_has_listeners(struct net *net, unsigned int group); int nfnetlink_send(struct sk_buff *skb, struct net *net, u32 portid, unsigned int group, int echo, gfp_t flags); int nfnetlink_set_err(struct net *net, u32 portid, u32 group, int error); -int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid, - int flags); +int nfnetlink_unicast(struct sk_buff *skb, struct net *net, u32 portid); static inline u16 nfnl_msg_type(u8 subsys, u8 msg_type) { -- cgit v1.2.3 From ef91bb196b0db1013ef8705367bc2d7944ef696b Mon Sep 17 00:00:00 2001 From: Herbert Xu Date: Fri, 28 Aug 2020 17:11:25 +1000 Subject: kernel.h: Silence sparse warning in lower_32_bits I keep getting sparse warnings in crypto such as: CHECK drivers/crypto/ccree/cc_hash.c drivers/crypto/ccree/cc_hash.c:49:9: warning: cast truncates bits from constant value (47b5481dbefa4fa4 becomes befa4fa4) drivers/crypto/ccree/cc_hash.c:49:26: warning: cast truncates bits from constant value (db0c2e0d64f98fa7 becomes 64f98fa7) [.. many more ..] This patch removes the warning by adding a mask to keep sparse happy. Signed-off-by: Herbert Xu Signed-off-by: Linus Torvalds --- include/linux/kernel.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/kernel.h b/include/linux/kernel.h index 500def620d8f..c25b8e41c0ea 100644 --- a/include/linux/kernel.h +++ b/include/linux/kernel.h @@ -186,7 +186,7 @@ * lower_32_bits - return bits 0-31 of a number * @n: the number we're accessing */ -#define lower_32_bits(n) ((u32)(n)) +#define lower_32_bits(n) ((u32)((n) & 0xffffffff)) struct completion; struct pt_regs; -- cgit v1.2.3 From e5fc436f06eef54ef512ea55a9db8eb9f2e76959 Mon Sep 17 00:00:00 2001 From: Luc Van Oostenryck Date: Fri, 28 Aug 2020 10:53:01 +0200 Subject: sparse: use static inline for __chk_{user,io}_ptr() __chk_user_ptr() & __chk_io_ptr() are dummy extern functions which only exist to enforce the typechecking of __user or __iomem pointers in macros when using sparse. This typechecking is done by inserting a call to these functions. But the presence of these calls can inhibit some simplifications and so influence the result of sparse's analysis of context/locking. Fix this by changing these calls into static inline calls with an empty body. Signed-off-by: Luc Van Oostenryck Signed-off-by: Miguel Ojeda --- include/linux/compiler_types.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'include/linux') diff --git a/include/linux/compiler_types.h b/include/linux/compiler_types.h index 4b33cb385f96..6e390d58a9f8 100644 --- a/include/linux/compiler_types.h +++ b/include/linux/compiler_types.h @@ -11,8 +11,8 @@ # define __iomem __attribute__((noderef, address_space(__iomem))) # define __percpu __attribute__((noderef, address_space(__percpu))) # define __rcu __attribute__((noderef, address_space(__rcu))) -extern void __chk_user_ptr(const volatile void __user *); -extern void __chk_io_ptr(const volatile void __iomem *); +static inline void __chk_user_ptr(const volatile void __user *ptr) { } +static inline void __chk_io_ptr(const volatile void __iomem *ptr) { } /* context/locking */ # define __must_hold(x) __attribute__((context(x,1,1))) # define __acquires(x) __attribute__((context(x,0,1))) -- cgit v1.2.3 From 35556bed836f8dc07ac55f69c8d17dce3e7f0e25 Mon Sep 17 00:00:00 2001 From: Marc Zyngier Date: Tue, 1 Sep 2020 10:52:33 +0100 Subject: HID: core: Sanitize event code and type when mapping input When calling into hid_map_usage(), the passed event code is blindly stored as is, even if it doesn't fit in the associated bitmap. This event code can come from a variety of sources, including devices masquerading as input devices, only a bit more "programmable". Instead of taking the event code at face value, check that it actually fits the corresponding bitmap, and if it doesn't: - spit out a warning so that we know which device is acting up - NULLify the bitmap pointer so that we catch unexpected uses Code paths that can make use of untrusted inputs can now check that the mapping was indeed correct and bail out if not. Cc: stable@vger.kernel.org Signed-off-by: Marc Zyngier Signed-off-by: Benjamin Tissoires --- include/linux/hid.h | 42 +++++++++++++++++++++++++++++------------- 1 file changed, 29 insertions(+), 13 deletions(-) (limited to 'include/linux') diff --git a/include/linux/hid.h b/include/linux/hid.h index 875f71132b14..c7044a14200e 100644 --- a/include/linux/hid.h +++ b/include/linux/hid.h @@ -959,34 +959,49 @@ static inline void hid_device_io_stop(struct hid_device *hid) { * @max: maximal valid usage->code to consider later (out parameter) * @type: input event type (EV_KEY, EV_REL, ...) * @c: code which corresponds to this usage and type + * + * The value pointed to by @bit will be set to NULL if either @type is + * an unhandled event type, or if @c is out of range for @type. This + * can be used as an error condition. */ static inline void hid_map_usage(struct hid_input *hidinput, struct hid_usage *usage, unsigned long **bit, int *max, - __u8 type, __u16 c) + __u8 type, unsigned int c) { struct input_dev *input = hidinput->input; - - usage->type = type; - usage->code = c; + unsigned long *bmap = NULL; + unsigned int limit = 0; switch (type) { case EV_ABS: - *bit = input->absbit; - *max = ABS_MAX; + bmap = input->absbit; + limit = ABS_MAX; break; case EV_REL: - *bit = input->relbit; - *max = REL_MAX; + bmap = input->relbit; + limit = REL_MAX; break; case EV_KEY: - *bit = input->keybit; - *max = KEY_MAX; + bmap = input->keybit; + limit = KEY_MAX; break; case EV_LED: - *bit = input->ledbit; - *max = LED_MAX; + bmap = input->ledbit; + limit = LED_MAX; break; } + + if (unlikely(c > limit || !bmap)) { + pr_warn_ratelimited("%s: Invalid code %d type %d\n", + input->name, c, type); + *bit = NULL; + return; + } + + usage->type = type; + usage->code = c; + *max = limit; + *bit = bmap; } /** @@ -1000,7 +1015,8 @@ static inline void hid_map_usage_clear(struct hid_input *hidinput, __u8 type, __u16 c) { hid_map_usage(hidinput, usage, bit, max, type, c); - clear_bit(c, *bit); + if (*bit) + clear_bit(usage->code, *bit); } /** -- cgit v1.2.3 From 3b5455636fe26ea21b4189d135a424a6da016418 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Wed, 2 Sep 2020 12:32:45 -0400 Subject: libata: implement ATA_HORKAGE_MAX_TRIM_128M and apply to Sandisks All three generations of Sandisk SSDs lock up hard intermittently. Experiments showed that disabling NCQ lowered the failure rate significantly and the kernel has been disabling NCQ for some models of SD7's and 8's, which is obviously undesirable. Karthik worked with Sandisk to root cause the hard lockups to trim commands larger than 128M. This patch implements ATA_HORKAGE_MAX_TRIM_128M which limits max trim size to 128M and applies it to all three generations of Sandisk SSDs. Signed-off-by: Tejun Heo Cc: Karthik Shivaram Cc: stable@vger.kernel.org Signed-off-by: Jens Axboe --- include/linux/libata.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/libata.h b/include/linux/libata.h index 77ccf040a128..5f550eb27f81 100644 --- a/include/linux/libata.h +++ b/include/linux/libata.h @@ -421,6 +421,7 @@ enum { ATA_HORKAGE_NO_DMA_LOG = (1 << 23), /* don't use DMA for log read */ ATA_HORKAGE_NOTRIM = (1 << 24), /* don't use TRIM */ ATA_HORKAGE_MAX_SEC_1024 = (1 << 25), /* Limit max sects to 1024 */ + ATA_HORKAGE_MAX_TRIM_128M = (1 << 26), /* Limit max trim size to 128M */ /* DMA mask for user DMA control: User visible values; DO NOT renumber */ -- cgit v1.2.3 From 7e24969022cbd61ddc586f14824fc205661bb124 Mon Sep 17 00:00:00 2001 From: Ming Lei Date: Mon, 17 Aug 2020 18:00:55 +0800 Subject: block: allow for_each_bvec to support zero len bvec Block layer usually doesn't support or allow zero-length bvec. Since commit 1bdc76aea115 ("iov_iter: use bvec iterator to implement iterate_bvec()"), iterate_bvec() switches to bvec iterator. However, Al mentioned that 'Zero-length segments are not disallowed' in iov_iter. Fixes for_each_bvec() so that it can move on after seeing one zero length bvec. Fixes: 1bdc76aea115 ("iov_iter: use bvec iterator to implement iterate_bvec()") Reported-by: syzbot Signed-off-by: Ming Lei Tested-by: Tetsuo Handa Cc: Al Viro Cc: Matthew Wilcox Cc: Link: https://www.mail-archive.com/linux-kernel@vger.kernel.org/msg2262077.html Signed-off-by: Jens Axboe --- include/linux/bvec.h | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/bvec.h b/include/linux/bvec.h index ac0c7299d5b8..dd74503f7e5e 100644 --- a/include/linux/bvec.h +++ b/include/linux/bvec.h @@ -117,11 +117,18 @@ static inline bool bvec_iter_advance(const struct bio_vec *bv, return true; } +static inline void bvec_iter_skip_zero_bvec(struct bvec_iter *iter) +{ + iter->bi_bvec_done = 0; + iter->bi_idx++; +} + #define for_each_bvec(bvl, bio_vec, iter, start) \ for (iter = (start); \ (iter).bi_size && \ ((bvl = bvec_iter_bvec((bio_vec), (iter))), 1); \ - bvec_iter_advance((bio_vec), &(iter), (bvl).bv_len)) + (bvl).bv_len ? (void)bvec_iter_advance((bio_vec), &(iter), \ + (bvl).bv_len) : bvec_iter_skip_zero_bvec(&(iter))) /* for iterating one bio from start to end */ #define BVEC_ITER_ALL_INIT (struct bvec_iter) \ -- cgit v1.2.3 From 4533d3aed857c558d6aabd00d0cb04100c5a2258 Mon Sep 17 00:00:00 2001 From: Roger Pau Monne Date: Tue, 1 Sep 2020 10:33:25 +0200 Subject: memremap: rename MEMORY_DEVICE_DEVDAX to MEMORY_DEVICE_GENERIC MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit This is in preparation for the logic behind MEMORY_DEVICE_DEVDAX also being used by non DAX devices. No functional change intended. Signed-off-by: Roger Pau Monné Reviewed-by: Ira Weiny Acked-by: Andrew Morton Reviewed-by: Pankaj Gupta Link: https://lore.kernel.org/r/20200901083326.21264-3-roger.pau@citrix.com Signed-off-by: Juergen Gross --- include/linux/memremap.h | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'include/linux') diff --git a/include/linux/memremap.h b/include/linux/memremap.h index 5f5b2df06e61..e5862746751b 100644 --- a/include/linux/memremap.h +++ b/include/linux/memremap.h @@ -46,11 +46,10 @@ struct vmem_altmap { * wakeup is used to coordinate physical address space management (ex: * fs truncate/hole punch) vs pinned pages (ex: device dma). * - * MEMORY_DEVICE_DEVDAX: + * MEMORY_DEVICE_GENERIC: * Host memory that has similar access semantics as System RAM i.e. DMA - * coherent and supports page pinning. In contrast to - * MEMORY_DEVICE_FS_DAX, this memory is access via a device-dax - * character device. + * coherent and supports page pinning. This is for example used by DAX devices + * that expose memory using a character device. * * MEMORY_DEVICE_PCI_P2PDMA: * Device memory residing in a PCI BAR intended for use with Peer-to-Peer @@ -60,7 +59,7 @@ enum memory_type { /* 0 is reserved to catch uninitialized type fields */ MEMORY_DEVICE_PRIVATE = 1, MEMORY_DEVICE_FS_DAX, - MEMORY_DEVICE_DEVDAX, + MEMORY_DEVICE_GENERIC, MEMORY_DEVICE_PCI_P2PDMA, }; -- cgit v1.2.3 From 4facb95b7adaf77e2da73aafb9ba60996fe42a12 Mon Sep 17 00:00:00 2001 From: Thomas Gleixner Date: Wed, 2 Sep 2020 01:50:54 +0200 Subject: x86/entry: Unbreak 32bit fast syscall Andy reported that the syscall treacing for 32bit fast syscall fails: # ./tools/testing/selftests/x86/ptrace_syscall_32 ... [RUN] SYSEMU [FAIL] Initial args are wrong (nr=224, args=10 11 12 13 14 4289172732) ... [RUN] SYSCALL [FAIL] Initial args are wrong (nr=29, args=0 0 0 0 0 4289172732) The eason is that the conversion to generic entry code moved the retrieval of the sixth argument (EBP) after the point where the syscall entry work runs, i.e. ptrace, seccomp, audit... Unbreak it by providing a split up version of syscall_enter_from_user_mode(). - syscall_enter_from_user_mode_prepare() establishes state and enables interrupts - syscall_enter_from_user_mode_work() runs the entry work Replace the call to syscall_enter_from_user_mode() in the 32bit fast syscall C-entry with the split functions and stick the EBP retrieval between them. Fixes: 27d6b4d14f5c ("x86/entry: Use generic syscall entry function") Reported-by: Andy Lutomirski Signed-off-by: Thomas Gleixner Link: https://lore.kernel.org/r/87k0xdjbtt.fsf@nanos.tec.linutronix.de --- include/linux/entry-common.h | 51 ++++++++++++++++++++++++++++++++++++-------- 1 file changed, 42 insertions(+), 9 deletions(-) (limited to 'include/linux') diff --git a/include/linux/entry-common.h b/include/linux/entry-common.h index efebbffcd5cc..159c7476b11b 100644 --- a/include/linux/entry-common.h +++ b/include/linux/entry-common.h @@ -110,15 +110,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs #endif /** - * syscall_enter_from_user_mode - Check and handle work before invoking - * a syscall + * syscall_enter_from_user_mode_prepare - Establish state and enable interrupts * @regs: Pointer to currents pt_regs - * @syscall: The syscall number * * Invoked from architecture specific syscall entry code with interrupts * disabled. The calling code has to be non-instrumentable. When the - * function returns all state is correct and the subsequent functions can be - * instrumented. + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This handles lockdep, RCU (context tracking) and tracing state. + * + * This is invoked when there is extra architecture specific functionality + * to be done between establishing state and handling user mode entry work. + */ +void syscall_enter_from_user_mode_prepare(struct pt_regs *regs); + +/** + * syscall_enter_from_user_mode_work - Check and handle work before invoking + * a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * enabled after invoking syscall_enter_from_user_mode_prepare() and extra + * architecture specific work. * * Returns: The original or a modified syscall number * @@ -127,12 +142,30 @@ static inline __must_check int arch_syscall_enter_tracehook(struct pt_regs *regs * syscall_set_return_value() first. If neither of those are called and -1 * is returned, then the syscall will fail with ENOSYS. * - * The following functionality is handled here: + * It handles the following work items: * - * 1) Establish state (lockdep, RCU (context tracking), tracing) - * 2) TIF flag dependent invocations of arch_syscall_enter_tracehook(), + * 1) TIF flag dependent invocations of arch_syscall_enter_tracehook(), * __secure_computing(), trace_sys_enter() - * 3) Invocation of audit_syscall_entry() + * 2) Invocation of audit_syscall_entry() + */ +long syscall_enter_from_user_mode_work(struct pt_regs *regs, long syscall); + +/** + * syscall_enter_from_user_mode - Establish state and check and handle work + * before invoking a syscall + * @regs: Pointer to currents pt_regs + * @syscall: The syscall number + * + * Invoked from architecture specific syscall entry code with interrupts + * disabled. The calling code has to be non-instrumentable. When the + * function returns all state is correct, interrupts are enabled and the + * subsequent functions can be instrumented. + * + * This is combination of syscall_enter_from_user_mode_prepare() and + * syscall_enter_from_user_mode_work(). + * + * Returns: The original or a modified syscall number. See + * syscall_enter_from_user_mode_work() for further explanation. */ long syscall_enter_from_user_mode(struct pt_regs *regs, long syscall); -- cgit v1.2.3 From 1a0cf26323c80e2f1c58fc04f15686de61bfab0c Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 21 Aug 2020 19:49:56 -0400 Subject: mm/ksm: Remove reuse_ksm_page() Remove the function as the last reference has gone away with the do_wp_page() changes. Signed-off-by: Peter Xu Signed-off-by: Linus Torvalds --- include/linux/ksm.h | 7 ------- 1 file changed, 7 deletions(-) (limited to 'include/linux') diff --git a/include/linux/ksm.h b/include/linux/ksm.h index e48b1e453ff5..161e8164abcf 100644 --- a/include/linux/ksm.h +++ b/include/linux/ksm.h @@ -53,8 +53,6 @@ struct page *ksm_might_need_to_copy(struct page *page, void rmap_walk_ksm(struct page *page, struct rmap_walk_control *rwc); void ksm_migrate_page(struct page *newpage, struct page *oldpage); -bool reuse_ksm_page(struct page *page, - struct vm_area_struct *vma, unsigned long address); #else /* !CONFIG_KSM */ @@ -88,11 +86,6 @@ static inline void rmap_walk_ksm(struct page *page, static inline void ksm_migrate_page(struct page *newpage, struct page *oldpage) { } -static inline bool reuse_ksm_page(struct page *page, - struct vm_area_struct *vma, unsigned long address) -{ - return false; -} #endif /* CONFIG_MMU */ #endif /* !CONFIG_KSM */ -- cgit v1.2.3 From 798a6b87ecd72828a6c6b5469aaa2032a57e92b7 Mon Sep 17 00:00:00 2001 From: Peter Xu Date: Fri, 21 Aug 2020 19:49:58 -0400 Subject: mm: Add PGREUSE counter This accounts for wp_page_reuse() case, where we reused a page for COW. Signed-off-by: Peter Xu Signed-off-by: Linus Torvalds --- include/linux/vm_event_item.h | 1 + 1 file changed, 1 insertion(+) (limited to 'include/linux') diff --git a/include/linux/vm_event_item.h b/include/linux/vm_event_item.h index 24fc7c3ae7d6..58d3f91baad1 100644 --- a/include/linux/vm_event_item.h +++ b/include/linux/vm_event_item.h @@ -30,6 +30,7 @@ enum vm_event_item { PGPGIN, PGPGOUT, PSWPIN, PSWPOUT, PGFAULT, PGMAJFAULT, PGLAZYFREED, PGREFILL, + PGREUSE, PGSTEAL_KSWAPD, PGSTEAL_DIRECT, PGSCAN_KSWAPD, -- cgit v1.2.3 From 428fc0aff4e59399ec719ffcc1f7a5d29a4ee476 Mon Sep 17 00:00:00 2001 From: Jason Gunthorpe Date: Fri, 4 Sep 2020 16:36:19 -0700 Subject: include/linux/log2.h: add missing () around n in roundup_pow_of_two() Otherwise gcc generates warnings if the expression is complicated. Fixes: 312a0c170945 ("[PATCH] LOG2: Alter roundup_pow_of_two() so that it can use a ilog2() on a constant") Signed-off-by: Jason Gunthorpe Signed-off-by: Andrew Morton Link: https://lkml.kernel.org/r/0-v1-8a2697e3c003+41165-log_brackets_jgg@nvidia.com Signed-off-by: Linus Torvalds --- include/linux/log2.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'include/linux') diff --git a/include/linux/log2.h b/include/linux/log2.h index 83a4a3ca3e8a..c619ec6eff4a 100644 --- a/include/linux/log2.h +++ b/include/linux/log2.h @@ -173,7 +173,7 @@ unsigned long __rounddown_pow_of_two(unsigned long n) #define roundup_pow_of_two(n) \ ( \ __builtin_constant_p(n) ? ( \ - (n == 1) ? 1 : \ + ((n) == 1) ? 1 : \ (1UL << (ilog2((n) - 1) + 1)) \ ) : \ __roundup_pow_of_two(n) \ -- cgit v1.2.3